summaryrefslogtreecommitdiff
path: root/lib/CodeGen/SelectionDAG
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2017-12-18 20:10:56 +0000
committerDimitry Andric <dim@FreeBSD.org>2017-12-18 20:10:56 +0000
commit044eb2f6afba375a914ac9d8024f8f5142bb912e (patch)
tree1475247dc9f9fe5be155ebd4c9069c75aadf8c20 /lib/CodeGen/SelectionDAG
parenteb70dddbd77e120e5d490bd8fbe7ff3f8fa81c6b (diff)
Notes
Diffstat (limited to 'lib/CodeGen/SelectionDAG')
-rw-r--r--lib/CodeGen/SelectionDAG/CMakeLists.txt2
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp2497
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp30
-rw-r--r--lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp12
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.cpp25
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp543
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp99
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.cpp62
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h27
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp3
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp62
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp252
-rw-r--r--lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp4
-rw-r--r--lib/CodeGen/SelectionDAG/SDNodeDbgValue.h85
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp4
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp215
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp57
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp11
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp650
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp13
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp664
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h219
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp76
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp157
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp4
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp5
-rw-r--r--lib/CodeGen/SelectionDAG/StatepointLowering.cpp39
-rw-r--r--lib/CodeGen/SelectionDAG/StatepointLowering.h16
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp330
29 files changed, 3834 insertions, 2329 deletions
diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt
index ae9c5adb03979..fd1e5e2cfc567 100644
--- a/lib/CodeGen/SelectionDAG/CMakeLists.txt
+++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt
@@ -24,7 +24,7 @@ add_llvm_library(LLVMSelectionDAG
SelectionDAGTargetInfo.cpp
StatepointLowering.cpp
TargetLowering.cpp
-
+
DEPENDS
intrinsics_gen
)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 432c86dd6f1e1..f97732c1c49d0 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1,4 +1,4 @@
-//===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===//
+//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -16,32 +16,64 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/CodeGen/DAGCombine.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Constant.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <functional>
+#include <iterator>
+#include <string>
+#include <tuple>
+#include <utility>
+#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "dagcombine"
@@ -53,43 +85,41 @@ STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
STATISTIC(SlicedLoads, "Number of load sliced");
-namespace {
- static cl::opt<bool>
- CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
- cl::desc("Enable DAG combiner's use of IR alias analysis"));
+static cl::opt<bool>
+CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
+ cl::desc("Enable DAG combiner's use of IR alias analysis"));
- static cl::opt<bool>
- UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
- cl::desc("Enable DAG combiner's use of TBAA"));
+static cl::opt<bool>
+UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
+ cl::desc("Enable DAG combiner's use of TBAA"));
#ifndef NDEBUG
- static cl::opt<std::string>
- CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
- cl::desc("Only use DAG-combiner alias analysis in this"
- " function"));
+static cl::opt<std::string>
+CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
+ cl::desc("Only use DAG-combiner alias analysis in this"
+ " function"));
#endif
- /// Hidden option to stress test load slicing, i.e., when this option
- /// is enabled, load slicing bypasses most of its profitability guards.
- static cl::opt<bool>
- StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
- cl::desc("Bypass the profitability model of load "
- "slicing"),
- cl::init(false));
+/// Hidden option to stress test load slicing, i.e., when this option
+/// is enabled, load slicing bypasses most of its profitability guards.
+static cl::opt<bool>
+StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
+ cl::desc("Bypass the profitability model of load slicing"),
+ cl::init(false));
- static cl::opt<bool>
- MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
- cl::desc("DAG combiner may split indexing from loads"));
+static cl::opt<bool>
+ MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
+ cl::desc("DAG combiner may split indexing from loads"));
-//------------------------------ DAGCombiner ---------------------------------//
+namespace {
class DAGCombiner {
SelectionDAG &DAG;
const TargetLowering &TLI;
CombineLevel Level;
CodeGenOpt::Level OptLevel;
- bool LegalOperations;
- bool LegalTypes;
+ bool LegalOperations = false;
+ bool LegalTypes = false;
bool ForCodeSize;
/// \brief Worklist of all of the nodes that need to be simplified.
@@ -128,6 +158,19 @@ namespace {
SDValue visit(SDNode *N);
public:
+ DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
+ : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
+ OptLevel(OL), AA(AA) {
+ ForCodeSize = DAG.getMachineFunction().getFunction().optForSize();
+
+ MaximumLegalStoreInBits = 0;
+ for (MVT VT : MVT::all_valuetypes())
+ if (EVT(VT).isSimple() && VT != MVT::Other &&
+ TLI.isTypeLegal(EVT(VT)) &&
+ VT.getSizeInBits() >= MaximumLegalStoreInBits)
+ MaximumLegalStoreInBits = VT.getSizeInBits();
+ }
+
/// Add to the worklist making sure its instance is at the back (next to be
/// processed.)
void AddToWorklist(SDNode *N) {
@@ -285,7 +328,7 @@ namespace {
SDValue visitSIGN_EXTEND(SDNode *N);
SDValue visitZERO_EXTEND(SDNode *N);
SDValue visitANY_EXTEND(SDNode *N);
- SDValue visitAssertZext(SDNode *N);
+ SDValue visitAssertExt(SDNode *N);
SDValue visitSIGN_EXTEND_INREG(SDNode *N);
SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
@@ -348,6 +391,7 @@ namespace {
SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
SDValue foldSelectOfConstants(SDNode *N);
+ SDValue foldVSelectOfConstants(SDNode *N);
SDValue foldBinOpIntoSelect(SDNode *BO);
bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
@@ -371,6 +415,7 @@ namespace {
SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
SDValue CombineExtLoad(SDNode *N);
SDValue combineRepeatedFPDivisors(SDNode *N);
+ SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
SDValue BuildSDIV(SDNode *N);
SDValue BuildSDIVPow2(SDNode *N);
@@ -400,14 +445,11 @@ namespace {
SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
SDValue reduceBuildVecToShuffle(SDNode *N);
- SDValue reduceBuildVecToTrunc(SDNode *N);
SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
ArrayRef<int> VectorMask, SDValue VecIn1,
SDValue VecIn2, unsigned LeftIdx);
SDValue matchVSelectOpSizesWithSetCC(SDNode *N);
- SDValue GetDemandedBits(SDValue V, const APInt &Mask);
-
/// Walk up chain skipping non-aliasing memory nodes,
/// looking for aliasing nodes and adding them to the Aliases vector.
void GatherAllAliases(SDNode *N, SDValue OriginalChain,
@@ -434,12 +476,14 @@ namespace {
/// Holds a pointer to an LSBaseSDNode as well as information on where it
/// is located in a sequence of memory operations connected by a chain.
struct MemOpLink {
- MemOpLink(LSBaseSDNode *N, int64_t Offset)
- : MemNode(N), OffsetFromBase(Offset) {}
// Ptr to the mem node.
LSBaseSDNode *MemNode;
+
// Offset from the base ptr.
int64_t OffsetFromBase;
+
+ MemOpLink(LSBaseSDNode *N, int64_t Offset)
+ : MemNode(N), OffsetFromBase(Offset) {}
};
/// This is a helper function for visitMUL to check the profitability
@@ -450,38 +494,49 @@ namespace {
SDValue &AddNode,
SDValue &ConstNode);
-
/// This is a helper function for visitAND and visitZERO_EXTEND. Returns
/// true if the (and (load x) c) pattern matches an extload. ExtVT returns
- /// the type of the loaded value to be extended. LoadedVT returns the type
- /// of the original loaded value. NarrowLoad returns whether the load would
- /// need to be narrowed in order to match.
+ /// the type of the loaded value to be extended.
bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
- EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
- bool &NarrowLoad);
+ EVT LoadResultTy, EVT &ExtVT);
+
+ /// Helper function to calculate whether the given Load can have its
+ /// width reduced to ExtVT.
+ bool isLegalNarrowLoad(LoadSDNode *LoadN, ISD::LoadExtType ExtType,
+ EVT &ExtVT, unsigned ShAmt = 0);
+
+ /// Used by BackwardsPropagateMask to find suitable loads.
+ bool SearchForAndLoads(SDNode *N, SmallPtrSetImpl<LoadSDNode*> &Loads,
+ SmallPtrSetImpl<SDNode*> &NodeWithConsts,
+ ConstantSDNode *Mask, SDNode *&UncombinedNode);
+ /// Attempt to propagate a given AND node back to load leaves so that they
+ /// can be combined into narrow loads.
+ bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
/// Helper function for MergeConsecutiveStores which merges the
/// component store chains.
SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
unsigned NumStores);
- /// This is a helper function for MergeConsecutiveStores. When the source
- /// elements of the consecutive stores are all constants or all extracted
- /// vector elements, try to merge them into one larger store.
- /// \return True if a merged store was created.
+ /// This is a helper function for MergeConsecutiveStores. When the
+ /// source elements of the consecutive stores are all constants or
+ /// all extracted vector elements, try to merge them into one
+ /// larger store introducing bitcasts if necessary. \return True
+ /// if a merged store was created.
bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
EVT MemVT, unsigned NumStores,
bool IsConstantSrc, bool UseVector,
bool UseTrunc);
- /// This is a helper function for MergeConsecutiveStores.
- /// Stores that may be merged are placed in StoreNodes.
+ /// This is a helper function for MergeConsecutiveStores. Stores
+ /// that potentially may be merged with St are placed in
+ /// StoreNodes.
void getStoreMergeCandidates(StoreSDNode *St,
SmallVectorImpl<MemOpLink> &StoreNodes);
/// Helper function for MergeConsecutiveStores. Checks if
- /// Candidate stores have indirect dependency through their
- /// operands. \return True if safe to merge
+ /// candidate stores have indirect dependency through their
+ /// operands. \return True if safe to merge.
bool checkMergeStoreCandidatesForDependencies(
SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores);
@@ -500,19 +555,6 @@ namespace {
SDValue distributeTruncateThroughAnd(SDNode *N);
public:
- DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
- : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
- OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(AA) {
- ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize();
-
- MaximumLegalStoreInBits = 0;
- for (MVT VT : MVT::all_valuetypes())
- if (EVT(VT).isSimple() && VT != MVT::Other &&
- TLI.isTypeLegal(EVT(VT)) &&
- VT.getSizeInBits() >= MaximumLegalStoreInBits)
- MaximumLegalStoreInBits = VT.getSizeInBits();
- }
-
/// Runs the dag combiner on all nodes in the work list
void Run(CombineLevel AtLevel);
@@ -541,14 +583,12 @@ namespace {
return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
}
};
-}
-
-namespace {
/// This class is a DAGUpdateListener that removes any deleted
/// nodes from the worklist.
class WorklistRemover : public SelectionDAG::DAGUpdateListener {
DAGCombiner &DC;
+
public:
explicit WorklistRemover(DAGCombiner &dc)
: SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
@@ -557,7 +597,8 @@ public:
DC.removeFromWorklist(N);
}
};
-}
+
+} // end anonymous namespace
//===----------------------------------------------------------------------===//
// TargetLowering::DAGCombinerInfo implementation
@@ -577,7 +618,6 @@ CombineTo(SDNode *N, SDValue Res, bool AddTo) {
return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
}
-
SDValue TargetLowering::DAGCombinerInfo::
CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
@@ -873,6 +913,56 @@ static bool isAnyConstantBuildVector(const SDNode *N) {
ISD::isBuildVectorOfConstantFPSDNodes(N);
}
+// Attempt to match a unary predicate against a scalar/splat constant or
+// every element of a constant BUILD_VECTOR.
+static bool matchUnaryPredicate(SDValue Op,
+ std::function<bool(ConstantSDNode *)> Match) {
+ if (auto *Cst = dyn_cast<ConstantSDNode>(Op))
+ return Match(Cst);
+
+ if (ISD::BUILD_VECTOR != Op.getOpcode())
+ return false;
+
+ EVT SVT = Op.getValueType().getScalarType();
+ for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
+ auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(i));
+ if (!Cst || Cst->getValueType(0) != SVT || !Match(Cst))
+ return false;
+ }
+ return true;
+}
+
+// Attempt to match a binary predicate against a pair of scalar/splat constants
+// or every element of a pair of constant BUILD_VECTORs.
+static bool matchBinaryPredicate(
+ SDValue LHS, SDValue RHS,
+ std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match) {
+ if (LHS.getValueType() != RHS.getValueType())
+ return false;
+
+ if (auto *LHSCst = dyn_cast<ConstantSDNode>(LHS))
+ if (auto *RHSCst = dyn_cast<ConstantSDNode>(RHS))
+ return Match(LHSCst, RHSCst);
+
+ if (ISD::BUILD_VECTOR != LHS.getOpcode() ||
+ ISD::BUILD_VECTOR != RHS.getOpcode())
+ return false;
+
+ EVT SVT = LHS.getValueType().getScalarType();
+ for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
+ auto *LHSCst = dyn_cast<ConstantSDNode>(LHS.getOperand(i));
+ auto *RHSCst = dyn_cast<ConstantSDNode>(RHS.getOperand(i));
+ if (!LHSCst || !RHSCst)
+ return false;
+ if (LHSCst->getValueType(0) != SVT ||
+ LHSCst->getValueType(0) != RHSCst->getValueType(0))
+ return false;
+ if (!Match(LHSCst, RHSCst))
+ return false;
+ }
+ return true;
+}
+
SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
SDValue N1) {
EVT VT = N0.getValueType();
@@ -1123,10 +1213,10 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
Replace0 &= !N0->hasOneUse();
Replace1 &= (N0 != N1) && !N1->hasOneUse();
- // Combine Op here so it is presreved past replacements.
+ // Combine Op here so it is preserved past replacements.
CombineTo(Op.getNode(), RV);
- // If operands have a use ordering, make sur we deal with
+ // If operands have a use ordering, make sure we deal with
// predecessor first.
if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
std::swap(N0, N1);
@@ -1473,7 +1563,8 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N);
case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N);
case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
- case ISD::AssertZext: return visitAssertZext(N);
+ case ISD::AssertSext:
+ case ISD::AssertZext: return visitAssertExt(N);
case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
@@ -1572,15 +1663,15 @@ SDValue DAGCombiner::combine(SDNode *N) {
}
}
- // If N is a commutative binary node, try commuting it to enable more
- // sdisel CSE.
+ // If N is a commutative binary node, try eliminate it if the commuted
+ // version is already present in the DAG.
if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
N->getNumValues() == 1) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
// Constant operands are canonicalized to RHS.
- if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
+ if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
SDValue Ops[] = {N1, N0};
SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
N->getFlags());
@@ -1632,7 +1723,6 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
// Check each of the operands.
for (const SDValue &Op : TF->op_values()) {
-
switch (Op.getOpcode()) {
case ISD::EntryToken:
// Entry tokens don't need to be added to the list. They are
@@ -1907,6 +1997,15 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
}
}
+
+ // Undo the add -> or combine to merge constant offsets from a frame index.
+ if (N0.getOpcode() == ISD::OR &&
+ isa<FrameIndexSDNode>(N0.getOperand(0)) &&
+ isa<ConstantSDNode>(N0.getOperand(1)) &&
+ DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
+ SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
+ return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
+ }
}
if (SDValue NewSel = foldBinOpIntoSelect(N))
@@ -2064,7 +2163,8 @@ SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference)
}
// (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
- if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)))
+ if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
+ N1.getResNo() == 0)
return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
N0, N1.getOperand(0), N1.getOperand(2));
@@ -2537,6 +2637,12 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
+ assert((!N0IsConst ||
+ ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&
+ "Splat APInt should be element width");
+ assert((!N1IsConst ||
+ ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
+ "Splat APInt should be element width");
} else {
N0IsConst = isa<ConstantSDNode>(N0);
if (N0IsConst) {
@@ -2562,12 +2668,8 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// fold (mul x, 0) -> 0
if (N1IsConst && ConstValue1.isNullValue())
return N1;
- // We require a splat of the entire scalar bit width for non-contiguous
- // bit patterns.
- bool IsFullSplat =
- ConstValue1.getBitWidth() == VT.getScalarSizeInBits();
// fold (mul x, 1) -> x
- if (N1IsConst && ConstValue1.isOneValue() && IsFullSplat)
+ if (N1IsConst && ConstValue1.isOneValue())
return N0;
if (SDValue NewSel = foldBinOpIntoSelect(N))
@@ -2580,16 +2682,20 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
DAG.getConstant(0, DL, VT), N0);
}
// fold (mul x, (1 << c)) -> x << c
- if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isPowerOf2() &&
- IsFullSplat) {
+ if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
+ DAG.isKnownToBeAPowerOfTwo(N1) &&
+ (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
SDLoc DL(N);
- return DAG.getNode(ISD::SHL, DL, VT, N0,
- DAG.getConstant(ConstValue1.logBase2(), DL,
- getShiftAmountTy(N0.getValueType())));
+ SDValue LogBase2 = BuildLogBase2(N1, DL);
+ AddToWorklist(LogBase2.getNode());
+
+ EVT ShiftVT = getShiftAmountTy(N0.getValueType());
+ SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
+ AddToWorklist(Trunc.getNode());
+ return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
}
// fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
- if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2() &&
- IsFullSplat) {
+ if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
unsigned Log2Val = (-ConstValue1).logBase2();
SDLoc DL(N);
// FIXME: If the input is something that is easily negated (e.g. a
@@ -2835,7 +2941,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
// If integer divide is expensive and we satisfy the requirements, emit an
// alternate sequence. Targets may check function attributes for size/speed
// trade-offs.
- AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+ AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
if (SDValue Op = BuildSDIV(N))
return Op;
@@ -2906,7 +3012,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
}
// fold (udiv x, c) -> alternate
- AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+ AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
if (SDValue Op = BuildUDIV(N))
return Op;
@@ -2965,7 +3071,7 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
}
}
- AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+ AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
// If X/C can be simplified by the division-by-constant logic, lower
// X%C to the equivalent of X-X/C*C.
@@ -3003,19 +3109,26 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
EVT VT = N->getValueType(0);
SDLoc DL(N);
+ if (VT.isVector()) {
+ // fold (mulhs x, 0) -> 0
+ if (ISD::isBuildVectorAllZeros(N1.getNode()))
+ return N1;
+ if (ISD::isBuildVectorAllZeros(N0.getNode()))
+ return N0;
+ }
+
// fold (mulhs x, 0) -> 0
if (isNullConstant(N1))
return N1;
// fold (mulhs x, 1) -> (sra x, size(x)-1)
- if (isOneConstant(N1)) {
- SDLoc DL(N);
+ if (isOneConstant(N1))
return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
getShiftAmountTy(N0.getValueType())));
- }
+
// fold (mulhs x, undef) -> 0
if (N0.isUndef() || N1.isUndef())
- return DAG.getConstant(0, SDLoc(N), VT);
+ return DAG.getConstant(0, DL, VT);
// If the type twice as wide is legal, transform the mulhs to a wider multiply
// plus a shift.
@@ -3043,6 +3156,14 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
EVT VT = N->getValueType(0);
SDLoc DL(N);
+ if (VT.isVector()) {
+ // fold (mulhu x, 0) -> 0
+ if (ISD::isBuildVectorAllZeros(N1.getNode()))
+ return N1;
+ if (ISD::isBuildVectorAllZeros(N0.getNode()))
+ return N0;
+ }
+
// fold (mulhu x, 0) -> 0
if (isNullConstant(N1))
return N1;
@@ -3216,7 +3337,7 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
if (SDValue FoldedVOp = SimplifyVBinOp(N))
return FoldedVOp;
- // fold (add c1, c2) -> c1+c2
+ // fold operation with constant operands.
ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
if (N0C && N1C)
@@ -3599,22 +3720,20 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
}
bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
- EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
- bool &NarrowLoad) {
- uint32_t ActiveBits = AndC->getAPIntValue().getActiveBits();
-
- if (ActiveBits == 0 || !AndC->getAPIntValue().isMask(ActiveBits))
+ EVT LoadResultTy, EVT &ExtVT) {
+ if (!AndC->getAPIntValue().isMask())
return false;
+ unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
+
ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
- LoadedVT = LoadN->getMemoryVT();
+ EVT LoadedVT = LoadN->getMemoryVT();
if (ExtVT == LoadedVT &&
(!LegalOperations ||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
// ZEXTLOAD will match without needing to change the size of the value being
// loaded.
- NarrowLoad = false;
return true;
}
@@ -3634,10 +3753,185 @@ bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
return false;
- NarrowLoad = true;
return true;
}
+bool DAGCombiner::isLegalNarrowLoad(LoadSDNode *LoadN, ISD::LoadExtType ExtType,
+ EVT &ExtVT, unsigned ShAmt) {
+ // Don't transform one with multiple uses, this would require adding a new
+ // load.
+ if (!SDValue(LoadN, 0).hasOneUse())
+ return false;
+
+ if (LegalOperations &&
+ !TLI.isLoadExtLegal(ExtType, LoadN->getValueType(0), ExtVT))
+ return false;
+
+ // Do not generate loads of non-round integer types since these can
+ // be expensive (and would be wrong if the type is not byte sized).
+ if (!ExtVT.isRound())
+ return false;
+
+ // Don't change the width of a volatile load.
+ if (LoadN->isVolatile())
+ return false;
+
+ // Verify that we are actually reducing a load width here.
+ if (LoadN->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits())
+ return false;
+
+ // For the transform to be legal, the load must produce only two values
+ // (the value loaded and the chain). Don't transform a pre-increment
+ // load, for example, which produces an extra value. Otherwise the
+ // transformation is not equivalent, and the downstream logic to replace
+ // uses gets things wrong.
+ if (LoadN->getNumValues() > 2)
+ return false;
+
+ // If the load that we're shrinking is an extload and we're not just
+ // discarding the extension we can't simply shrink the load. Bail.
+ // TODO: It would be possible to merge the extensions in some cases.
+ if (LoadN->getExtensionType() != ISD::NON_EXTLOAD &&
+ LoadN->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt)
+ return false;
+
+ if (!TLI.shouldReduceLoadWidth(LoadN, ExtType, ExtVT))
+ return false;
+
+ // It's not possible to generate a constant of extended or untyped type.
+ EVT PtrType = LoadN->getOperand(1).getValueType();
+ if (PtrType == MVT::Untyped || PtrType.isExtended())
+ return false;
+
+ return true;
+}
+
+bool DAGCombiner::SearchForAndLoads(SDNode *N,
+ SmallPtrSetImpl<LoadSDNode*> &Loads,
+ SmallPtrSetImpl<SDNode*> &NodesWithConsts,
+ ConstantSDNode *Mask,
+ SDNode *&NodeToMask) {
+ // Recursively search for the operands, looking for loads which can be
+ // narrowed.
+ for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) {
+ SDValue Op = N->getOperand(i);
+
+ if (Op.getValueType().isVector())
+ return false;
+
+ // Some constants may need fixing up later if they are too large.
+ if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
+ if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
+ (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
+ NodesWithConsts.insert(N);
+ continue;
+ }
+
+ if (!Op.hasOneUse())
+ return false;
+
+ switch(Op.getOpcode()) {
+ case ISD::LOAD: {
+ auto *Load = cast<LoadSDNode>(Op);
+ EVT ExtVT;
+ if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
+ isLegalNarrowLoad(Load, ISD::ZEXTLOAD, ExtVT)) {
+ // Only add this load if we can make it more narrow.
+ if (ExtVT.bitsLT(Load->getMemoryVT()))
+ Loads.insert(Load);
+ continue;
+ }
+ return false;
+ }
+ case ISD::ZERO_EXTEND:
+ case ISD::ANY_EXTEND:
+ case ISD::AssertZext: {
+ unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
+ EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
+ EVT VT = Op.getOpcode() == ISD::AssertZext ?
+ cast<VTSDNode>(Op.getOperand(1))->getVT() :
+ Op.getOperand(0).getValueType();
+
+ // We can accept extending nodes if the mask is wider or an equal
+ // width to the original type.
+ if (ExtVT.bitsGE(VT))
+ continue;
+ break;
+ }
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::AND:
+ if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
+ NodeToMask))
+ return false;
+ continue;
+ }
+
+ // Allow one node which will masked along with any loads found.
+ if (NodeToMask)
+ return false;
+ NodeToMask = Op.getNode();
+ }
+ return true;
+}
+
+bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
+ auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!Mask)
+ return false;
+
+ if (!Mask->getAPIntValue().isMask())
+ return false;
+
+ // No need to do anything if the and directly uses a load.
+ if (isa<LoadSDNode>(N->getOperand(0)))
+ return false;
+
+ SmallPtrSet<LoadSDNode*, 8> Loads;
+ SmallPtrSet<SDNode*, 2> NodesWithConsts;
+ SDNode *FixupNode = nullptr;
+ if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
+ if (Loads.size() == 0)
+ return false;
+
+ SDValue MaskOp = N->getOperand(1);
+
+ // If it exists, fixup the single node we allow in the tree that needs
+ // masking.
+ if (FixupNode) {
+ SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
+ FixupNode->getValueType(0),
+ SDValue(FixupNode, 0), MaskOp);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
+ DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0),
+ MaskOp);
+ }
+
+ // Narrow any constants that need it.
+ for (auto *LogicN : NodesWithConsts) {
+ auto *C = cast<ConstantSDNode>(LogicN->getOperand(1));
+ SDValue And = DAG.getNode(ISD::AND, SDLoc(C), C->getValueType(0),
+ SDValue(C, 0), MaskOp);
+ DAG.UpdateNodeOperands(LogicN, LogicN->getOperand(0), And);
+ }
+
+ // Create narrow loads.
+ for (auto *Load : Loads) {
+ SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
+ SDValue(Load, 0), MaskOp);
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
+ DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp);
+ SDValue NewLoad = ReduceLoadWidth(And.getNode());
+ assert(NewLoad &&
+ "Shouldn't be masking the load if it can't be narrowed");
+ CombineTo(Load, NewLoad, NewLoad.getValue(1));
+ }
+ DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
+ return true;
+ }
+ return false;
+}
+
SDValue DAGCombiner::visitAND(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -3829,55 +4123,23 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
(N0.getOpcode() == ISD::ANY_EXTEND &&
N0.getOperand(0).getOpcode() == ISD::LOAD))) {
- bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
- LoadSDNode *LN0 = HasAnyExt
- ? cast<LoadSDNode>(N0.getOperand(0))
- : cast<LoadSDNode>(N0);
- if (LN0->getExtensionType() != ISD::SEXTLOAD &&
- LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) {
- auto NarrowLoad = false;
- EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
- EVT ExtVT, LoadedVT;
- if (isAndLoadExtLoad(N1C, LN0, LoadResultTy, ExtVT, LoadedVT,
- NarrowLoad)) {
- if (!NarrowLoad) {
- SDValue NewLoad =
- DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
- LN0->getChain(), LN0->getBasePtr(), ExtVT,
- LN0->getMemOperand());
- AddToWorklist(N);
- CombineTo(LN0, NewLoad, NewLoad.getValue(1));
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
- } else {
- EVT PtrType = LN0->getOperand(1).getValueType();
-
- unsigned Alignment = LN0->getAlignment();
- SDValue NewPtr = LN0->getBasePtr();
-
- // For big endian targets, we need to add an offset to the pointer
- // to load the correct bytes. For little endian systems, we merely
- // need to read fewer bytes from the same pointer.
- if (DAG.getDataLayout().isBigEndian()) {
- unsigned LVTStoreBytes = LoadedVT.getStoreSize();
- unsigned EVTStoreBytes = ExtVT.getStoreSize();
- unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
- SDLoc DL(LN0);
- NewPtr = DAG.getNode(ISD::ADD, DL, PtrType,
- NewPtr, DAG.getConstant(PtrOff, DL, PtrType));
- Alignment = MinAlign(Alignment, PtrOff);
- }
+ if (SDValue Res = ReduceLoadWidth(N)) {
+ LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
+ ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
- AddToWorklist(NewPtr.getNode());
+ AddToWorklist(N);
+ CombineTo(LN0, Res, Res.getValue(1));
+ return SDValue(N, 0);
+ }
+ }
- SDValue Load = DAG.getExtLoad(
- ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, LN0->getChain(), NewPtr,
- LN0->getPointerInfo(), ExtVT, Alignment,
- LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
- AddToWorklist(N);
- CombineTo(LN0, Load, Load.getValue(1));
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
- }
- }
+ if (Level >= AfterLegalizeTypes) {
+ // Attempt to propagate the AND back up to the leaves which, if they're
+ // loads, can be combined to narrow loads and the AND node can be removed.
+ // Perform after legalization so that extend nodes will already be
+ // combined into the loads.
+ if (BackwardsPropagateMask(N, DAG)) {
+ return SDValue(N, 0);
}
}
@@ -3974,7 +4236,7 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
return SDValue();
- // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00)
+ // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
bool LookPassAnd0 = false;
bool LookPassAnd1 = false;
if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
@@ -4593,20 +4855,6 @@ SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
return nullptr;
}
-// if Left + Right == Sum (constant or constant splat vector)
-static bool sumMatchConstant(SDValue Left, SDValue Right, unsigned Sum,
- SelectionDAG &DAG, const SDLoc &DL) {
- EVT ShiftVT = Left.getValueType();
- if (ShiftVT != Right.getValueType()) return false;
-
- SDValue ShiftSum = DAG.FoldConstantArithmetic(ISD::ADD, DL, ShiftVT,
- Left.getNode(), Right.getNode());
- if (!ShiftSum) return false;
-
- ConstantSDNode *CSum = isConstOrConstSplat(ShiftSum);
- return CSum && CSum->getZExtValue() == Sum;
-}
-
// MatchRotate - Handle an 'or' of two operands. If this is one of the many
// idioms for rotate, and if the target supports rotation instructions, generate
// a rot[lr].
@@ -4620,6 +4868,16 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
if (!HasROTL && !HasROTR) return nullptr;
+ // Check for truncated rotate.
+ if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
+ LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
+ assert(LHS.getValueType() == RHS.getValueType());
+ if (SDNode *Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(),
+ SDValue(Rot, 0)).getNode();
+ }
+ }
+
// Match "(X shl/srl V1) & V2" where V2 may not be present.
SDValue LHSShift; // The shift.
SDValue LHSMask; // AND value if any.
@@ -4652,7 +4910,11 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
// fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
// fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
- if (sumMatchConstant(LHSShiftAmt, RHSShiftAmt, EltSizeInBits, DAG, DL)) {
+ auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
+ ConstantSDNode *RHS) {
+ return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
+ };
+ if (matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
@@ -4712,20 +4974,22 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
}
namespace {
+
/// Represents known origin of an individual byte in load combine pattern. The
/// value of the byte is either constant zero or comes from memory.
struct ByteProvider {
// For constant zero providers Load is set to nullptr. For memory providers
// Load represents the node which loads the byte from memory.
// ByteOffset is the offset of the byte in the value produced by the load.
- LoadSDNode *Load;
- unsigned ByteOffset;
+ LoadSDNode *Load = nullptr;
+ unsigned ByteOffset = 0;
- ByteProvider() : Load(nullptr), ByteOffset(0) {}
+ ByteProvider() = default;
static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
return ByteProvider(Load, ByteOffset);
}
+
static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
bool isConstantZero() const { return !Load; }
@@ -4740,6 +5004,8 @@ private:
: Load(Load), ByteOffset(ByteOffset) {}
};
+} // end anonymous namespace
+
/// Recursively traverses the expression calculating the origin of the requested
/// byte of the given value. Returns None if the provider can't be calculated.
///
@@ -4751,9 +5017,9 @@ private:
/// Because the parts of the expression are not allowed to have more than one
/// use this function iterates over trees, not DAGs. So it never visits the same
/// node more than once.
-const Optional<ByteProvider> calculateByteProvider(SDValue Op, unsigned Index,
- unsigned Depth,
- bool Root = false) {
+static const Optional<ByteProvider>
+calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
+ bool Root = false) {
// Typical i64 by i8 pattern requires recursion up to 8 calls depth
if (Depth == 10)
return None;
@@ -4837,7 +5103,6 @@ const Optional<ByteProvider> calculateByteProvider(SDValue Op, unsigned Index,
return None;
}
-} // namespace
/// Match a pattern where a wide type scalar value is loaded by several narrow
/// loads and combined by shifts and ors. Fold it into a single load or a load
@@ -4950,7 +5215,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
Loads.insert(L);
}
- assert(Loads.size() > 0 && "All the bytes of the value must be loaded from "
+ assert(!Loads.empty() && "All the bytes of the value must be loaded from "
"memory, so there must be at least one load which produces the value");
assert(Base && "Base address of the accessed memory location must be set");
assert(FirstOffset != INT64_MAX && "First byte offset must be set");
@@ -5373,7 +5638,11 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
if (isNullConstantOrNullSplatConstant(N0))
return N0;
// fold (shl x, c >= size(x)) -> undef
- if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
+ // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
+ auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
+ return Val->getAPIntValue().uge(OpSizeInBits);
+ };
+ if (matchUnaryPredicate(N1, MatchShiftTooBig))
return DAG.getUNDEF(VT);
// fold (shl x, 0) -> x
if (N1C && N1C->isNullValue())
@@ -5400,20 +5669,29 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
return SDValue(N, 0);
// fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
- if (N1C && N0.getOpcode() == ISD::SHL) {
- if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
- SDLoc DL(N);
- APInt c1 = N0C1->getAPIntValue();
- APInt c2 = N1C->getAPIntValue();
+ if (N0.getOpcode() == ISD::SHL) {
+ auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
+ ConstantSDNode *RHS) {
+ APInt c1 = LHS->getAPIntValue();
+ APInt c2 = RHS->getAPIntValue();
zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+ return (c1 + c2).uge(OpSizeInBits);
+ };
+ if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
+ return DAG.getConstant(0, SDLoc(N), VT);
- APInt Sum = c1 + c2;
- if (Sum.uge(OpSizeInBits))
- return DAG.getConstant(0, DL, VT);
-
- return DAG.getNode(
- ISD::SHL, DL, VT, N0.getOperand(0),
- DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
+ auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
+ ConstantSDNode *RHS) {
+ APInt c1 = LHS->getAPIntValue();
+ APInt c2 = RHS->getAPIntValue();
+ zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+ return (c1 + c2).ult(OpSizeInBits);
+ };
+ if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
+ SDLoc DL(N);
+ EVT ShiftVT = N1.getValueType();
+ SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
+ return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
}
}
@@ -5527,16 +5805,18 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
}
// fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
+ // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
// Variant of version done on multiply, except mul by a power of 2 is turned
// into a shift.
- if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
+ if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
+ N0.getNode()->hasOneUse() &&
isConstantOrConstantVector(N1, /* No Opaques */ true) &&
isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
AddToWorklist(Shl0.getNode());
AddToWorklist(Shl1.getNode());
- return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1);
+ return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
}
// fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
@@ -5579,7 +5859,11 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
if (N0C && N1C && !N1C->isOpaque())
return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
// fold (sra x, c >= size(x)) -> undef
- if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
+ // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
+ auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
+ return Val->getAPIntValue().uge(OpSizeInBits);
+ };
+ if (matchUnaryPredicate(N1, MatchShiftTooBig))
return DAG.getUNDEF(VT);
// fold (sra x, 0) -> x
if (N1C && N1C->isNullValue())
@@ -5603,20 +5887,31 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
}
// fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
- if (N1C && N0.getOpcode() == ISD::SRA) {
- if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
- SDLoc DL(N);
- APInt c1 = N0C1->getAPIntValue();
- APInt c2 = N1C->getAPIntValue();
- zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
-
- APInt Sum = c1 + c2;
- if (Sum.uge(OpSizeInBits))
- Sum = APInt(OpSizeInBits, OpSizeInBits - 1);
+ if (N0.getOpcode() == ISD::SRA) {
+ SDLoc DL(N);
+ EVT ShiftVT = N1.getValueType();
- return DAG.getNode(
- ISD::SRA, DL, VT, N0.getOperand(0),
- DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
+ auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
+ ConstantSDNode *RHS) {
+ APInt c1 = LHS->getAPIntValue();
+ APInt c2 = RHS->getAPIntValue();
+ zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+ return (c1 + c2).uge(OpSizeInBits);
+ };
+ if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
+ return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
+ DAG.getConstant(OpSizeInBits - 1, DL, ShiftVT));
+
+ auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
+ ConstantSDNode *RHS) {
+ APInt c1 = LHS->getAPIntValue();
+ APInt c2 = RHS->getAPIntValue();
+ zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+ return (c1 + c2).ult(OpSizeInBits);
+ };
+ if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
+ SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
+ return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), Sum);
}
}
@@ -5647,7 +5942,6 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
TLI.isTruncateFree(VT, TruncVT)) {
-
SDLoc DL(N);
SDValue Amt = DAG.getConstant(ShiftAmt, DL,
getShiftAmountTy(N0.getOperand(0).getValueType()));
@@ -5697,7 +5991,6 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
-
// If the sign bit is known to be zero, switch this to a SRL.
if (DAG.SignBitIsZero(N0))
return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
@@ -5730,7 +6023,11 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
if (isNullConstantOrNullSplatConstant(N0))
return N0;
// fold (srl x, c >= size(x)) -> undef
- if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
+ // NOTE: ALL vector elements must be too big to avoid partial UNDEFs.
+ auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) {
+ return Val->getAPIntValue().uge(OpSizeInBits);
+ };
+ if (matchUnaryPredicate(N1, MatchShiftTooBig))
return DAG.getUNDEF(VT);
// fold (srl x, 0) -> x
if (N1C && N1C->isNullValue())
@@ -5745,20 +6042,29 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
return DAG.getConstant(0, SDLoc(N), VT);
// fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
- if (N1C && N0.getOpcode() == ISD::SRL) {
- if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
- SDLoc DL(N);
- APInt c1 = N0C1->getAPIntValue();
- APInt c2 = N1C->getAPIntValue();
+ if (N0.getOpcode() == ISD::SRL) {
+ auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
+ ConstantSDNode *RHS) {
+ APInt c1 = LHS->getAPIntValue();
+ APInt c2 = RHS->getAPIntValue();
zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+ return (c1 + c2).uge(OpSizeInBits);
+ };
+ if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
+ return DAG.getConstant(0, SDLoc(N), VT);
- APInt Sum = c1 + c2;
- if (Sum.uge(OpSizeInBits))
- return DAG.getConstant(0, DL, VT);
-
- return DAG.getNode(
- ISD::SRL, DL, VT, N0.getOperand(0),
- DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
+ auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
+ ConstantSDNode *RHS) {
+ APInt c1 = LHS->getAPIntValue();
+ APInt c2 = RHS->getAPIntValue();
+ zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+ return (c1 + c2).ult(OpSizeInBits);
+ };
+ if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
+ SDLoc DL(N);
+ EVT ShiftVT = N1.getValueType();
+ SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
+ return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
}
}
@@ -6008,7 +6314,6 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) {
return SDValue();
}
-
/// \brief Generate Min/Max node
static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
SDValue RHS, SDValue True, SDValue False,
@@ -6096,7 +6401,7 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
// For any constants that differ by 1, we can transform the select into an
// extend and add. Use a target hook because some targets may prefer to
// transform in the other direction.
- if (TLI.convertSelectOfConstantsToMath()) {
+ if (TLI.convertSelectOfConstantsToMath(VT)) {
if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) {
// select Cond, C1, C1-1 --> add (zext Cond), C1-1
if (VT != MVT::i1)
@@ -6371,7 +6676,6 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
}
SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
-
if (Level >= AfterLegalizeTypes)
return SDValue();
@@ -6432,7 +6736,6 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
}
SDValue DAGCombiner::visitMSTORE(SDNode *N) {
-
if (Level >= AfterLegalizeTypes)
return SDValue();
@@ -6447,7 +6750,6 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
// prevents the type legalizer from unrolling SETCC into scalar comparisons
// and enables future optimizations (e.g. min/max pattern matching on X86).
if (Mask.getOpcode() == ISD::SETCC) {
-
// Check if any splitting is required.
if (TLI.getTypeAction(*DAG.getContext(), VT) !=
TargetLowering::TypeSplitVector)
@@ -6504,11 +6806,10 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
}
SDValue DAGCombiner::visitMGATHER(SDNode *N) {
-
if (Level >= AfterLegalizeTypes)
return SDValue();
- MaskedGatherSDNode *MGT = dyn_cast<MaskedGatherSDNode>(N);
+ MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
SDValue Mask = MGT->getMask();
SDLoc DL(N);
@@ -6581,7 +6882,6 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) {
}
SDValue DAGCombiner::visitMLOAD(SDNode *N) {
-
if (Level >= AfterLegalizeTypes)
return SDValue();
@@ -6593,7 +6893,6 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
// SETCC, then split both nodes and its operands before legalization. This
// prevents the type legalizer from unrolling SETCC into scalar comparisons
// and enables future optimizations (e.g. min/max pattern matching on X86).
-
if (Mask.getOpcode() == ISD::SETCC) {
EVT VT = N->getValueType(0);
@@ -6665,6 +6964,57 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
return SDValue();
}
+/// A vector select of 2 constant vectors can be simplified to math/logic to
+/// avoid a variable select instruction and possibly avoid constant loads.
+SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
+ SDValue Cond = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ EVT VT = N->getValueType(0);
+ if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
+ !TLI.convertSelectOfConstantsToMath(VT) ||
+ !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
+ !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
+ return SDValue();
+
+ // Check if we can use the condition value to increment/decrement a single
+ // constant value. This simplifies a select to an add and removes a constant
+ // load/materialization from the general case.
+ bool AllAddOne = true;
+ bool AllSubOne = true;
+ unsigned Elts = VT.getVectorNumElements();
+ for (unsigned i = 0; i != Elts; ++i) {
+ SDValue N1Elt = N1.getOperand(i);
+ SDValue N2Elt = N2.getOperand(i);
+ if (N1Elt.isUndef() || N2Elt.isUndef())
+ continue;
+
+ const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
+ const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
+ if (C1 != C2 + 1)
+ AllAddOne = false;
+ if (C1 != C2 - 1)
+ AllSubOne = false;
+ }
+
+ // Further simplifications for the extra-special cases where the constants are
+ // all 0 or all -1 should be implemented as folds of these patterns.
+ SDLoc DL(N);
+ if (AllAddOne || AllSubOne) {
+ // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
+ // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
+ auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
+ SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
+ return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
+ }
+
+ // The general case for select-of-constants:
+ // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
+ // ...but that only makes sense if a vselect is slower than 2 logic ops, so
+ // leave that to a machine-specific pass.
+ return SDValue();
+}
+
SDValue DAGCombiner::visitVSELECT(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -6729,6 +7079,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
return CV;
}
+ if (SDValue V = foldVSelectOfConstants(N))
+ return V;
+
return SDValue();
}
@@ -7243,8 +7596,15 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
SDLoc(N0.getOperand(0)),
N0.getOperand(0).getValueType(), ExtLoad);
ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND);
+ bool NoReplaceTruncAnd = !N0.hasOneUse();
bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
CombineTo(N, And);
+ // If N0 has multiple uses, change other uses as well.
+ if (NoReplaceTruncAnd) {
+ SDValue TruncAnd =
+ DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
+ CombineTo(N0.getNode(), TruncAnd);
+ }
if (NoReplaceTrunc)
DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
else
@@ -7307,7 +7667,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
return SCC;
- if (!VT.isVector()) {
+ if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
EVT SetCCVT = getSetCCResultType(N00VT);
// Don't do this transform for i1 because there's a select transform
// that would reverse it.
@@ -7399,20 +7759,6 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
}
- // fold (zext (truncate (load x))) -> (zext (smaller load x))
- // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
- if (N0.getOpcode() == ISD::TRUNCATE) {
- if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
- SDNode *oye = N0.getOperand(0).getNode();
- if (NarrowLoad.getNode() != N0.getNode()) {
- CombineTo(N0.getNode(), NarrowLoad);
- // CombineTo deleted the truncate, if needed, but not what's under it.
- AddToWorklist(oye);
- }
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
- }
- }
-
// fold (zext (truncate x)) -> (and x, mask)
if (N0.getOpcode() == ISD::TRUNCATE) {
// fold (zext (truncate (load x))) -> (zext (smaller load x))
@@ -7445,7 +7791,11 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
AddToWorklist(Op.getNode());
- return DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
+ SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
+ // We may safely transfer the debug info describing the truncate node over
+ // to the equivalent and operation.
+ DAG.transferDbgValues(N0, And);
+ return And;
}
}
@@ -7522,11 +7872,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
if (!N0.hasOneUse()) {
if (N0.getOpcode() == ISD::AND) {
auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
- auto NarrowLoad = false;
EVT LoadResultTy = AndC->getValueType(0);
- EVT ExtVT, LoadedVT;
- if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, ExtVT, LoadedVT,
- NarrowLoad))
+ EVT ExtVT;
+ if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, ExtVT))
DoXform = false;
}
if (DoXform)
@@ -7547,8 +7895,15 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
SDLoc(N0.getOperand(0)),
N0.getOperand(0).getValueType(), ExtLoad);
ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::ZERO_EXTEND);
+ bool NoReplaceTruncAnd = !N0.hasOneUse();
bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
CombineTo(N, And);
+ // If N0 has multiple uses, change other uses as well.
+ if (NoReplaceTruncAnd) {
+ SDValue TruncAnd =
+ DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
+ CombineTo(N0.getNode(), TruncAnd);
+ }
if (NoReplaceTrunc)
DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
else
@@ -7604,10 +7959,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
// If the desired elements are smaller or larger than the source
// elements we can use a matching integer vector type and then
// truncate/sign extend.
- EVT MatchingElementType = EVT::getIntegerVT(
- *DAG.getContext(), N00VT.getScalarSizeInBits());
- EVT MatchingVectorType = EVT::getVectorVT(
- *DAG.getContext(), MatchingElementType, N00VT.getVectorNumElements());
+ EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
SDValue VsetCC =
DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
N0.getOperand(1), N0.getOperand(2));
@@ -7731,7 +8083,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
ISD::ANY_EXTEND);
// If the load value is used only by N, replace it via CombineTo N.
bool NoReplaceTrunc = N0.hasOneUse();
- CombineTo(N, ExtLoad);
+ CombineTo(N, ExtLoad);
if (NoReplaceTrunc)
DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
else
@@ -7769,13 +8121,16 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
// aext(setcc) -> aext(vsetcc)
// Only do this before legalize for now.
if (VT.isVector() && !LegalOperations) {
- EVT N0VT = N0.getOperand(0).getValueType();
- // We know that the # elements of the results is the same as the
- // # elements of the compare (and the # elements of the compare result
- // for that matter). Check to see that they are the same size. If so,
- // we know that the element size of the sext'd result matches the
- // element size of the compare operands.
- if (VT.getSizeInBits() == N0VT.getSizeInBits())
+ EVT N00VT = N0.getOperand(0).getValueType();
+ if (getSetCCResultType(N00VT) == N0.getValueType())
+ return SDValue();
+
+ // We know that the # elements of the results is the same as the
+ // # elements of the compare (and the # elements of the compare result
+ // for that matter). Check to see that they are the same size. If so,
+ // we know that the element size of the sext'd result matches the
+ // element size of the compare operands.
+ if (VT.getSizeInBits() == N00VT.getSizeInBits())
return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
N0.getOperand(1),
cast<CondCodeSDNode>(N0.getOperand(2))->get());
@@ -7783,7 +8138,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
// elements we can use a matching integer vector type and then
// truncate/any extend
else {
- EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger();
+ EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
SDValue VsetCC =
DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
N0.getOperand(1),
@@ -7804,77 +8159,47 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
return SDValue();
}
-SDValue DAGCombiner::visitAssertZext(SDNode *N) {
+SDValue DAGCombiner::visitAssertExt(SDNode *N) {
+ unsigned Opcode = N->getOpcode();
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
- EVT EVT = cast<VTSDNode>(N1)->getVT();
+ EVT AssertVT = cast<VTSDNode>(N1)->getVT();
- // fold (assertzext (assertzext x, vt), vt) -> (assertzext x, vt)
- if (N0.getOpcode() == ISD::AssertZext &&
- EVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
+ // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
+ if (N0.getOpcode() == Opcode &&
+ AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
return N0;
- return SDValue();
-}
+ if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
+ N0.getOperand(0).getOpcode() == Opcode) {
+ // We have an assert, truncate, assert sandwich. Make one stronger assert
+ // by asserting on the smallest asserted type to the larger source type.
+ // This eliminates the later assert:
+ // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
+ // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
+ SDValue BigA = N0.getOperand(0);
+ EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
+ assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
+ "Asserting zero/sign-extended bits to a type larger than the "
+ "truncated destination does not provide information");
-/// See if the specified operand can be simplified with the knowledge that only
-/// the bits specified by Mask are used. If so, return the simpler operand,
-/// otherwise return a null SDValue.
-///
-/// (This exists alongside SimplifyDemandedBits because GetDemandedBits can
-/// simplify nodes with multiple uses more aggressively.)
-SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
- switch (V.getOpcode()) {
- default: break;
- case ISD::Constant: {
- const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode());
- assert(CV && "Const value should be ConstSDNode.");
- const APInt &CVal = CV->getAPIntValue();
- APInt NewVal = CVal & Mask;
- if (NewVal != CVal)
- return DAG.getConstant(NewVal, SDLoc(V), V.getValueType());
- break;
+ SDLoc DL(N);
+ EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
+ SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
+ SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
+ BigA.getOperand(0), MinAssertVTVal);
+ return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
}
- case ISD::OR:
- case ISD::XOR:
- // If the LHS or RHS don't contribute bits to the or, drop them.
- if (DAG.MaskedValueIsZero(V.getOperand(0), Mask))
- return V.getOperand(1);
- if (DAG.MaskedValueIsZero(V.getOperand(1), Mask))
- return V.getOperand(0);
- break;
- case ISD::SRL:
- // Only look at single-use SRLs.
- if (!V.getNode()->hasOneUse())
- break;
- if (ConstantSDNode *RHSC = getAsNonOpaqueConstant(V.getOperand(1))) {
- // See if we can recursively simplify the LHS.
- unsigned Amt = RHSC->getZExtValue();
- // Watch out for shift count overflow though.
- if (Amt >= Mask.getBitWidth()) break;
- APInt NewMask = Mask << Amt;
- if (SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask))
- return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(),
- SimplifyLHS, V.getOperand(1));
- }
- break;
- case ISD::AND: {
- // X & -1 -> X (ignoring bits which aren't demanded).
- ConstantSDNode *AndVal = isConstOrConstSplat(V.getOperand(1));
- if (AndVal && (AndVal->getAPIntValue() & Mask) == Mask)
- return V.getOperand(0);
- break;
- }
- }
return SDValue();
}
/// If the result of a wider load is shifted to right of N bits and then
/// truncated to a narrower type and where N is a multiple of number of bits of
/// the narrower type, transform it to a narrower load from address + N / num of
-/// bits of new type. If the result is to be extended, also fold the extension
-/// to form a extending load.
+/// bits of new type. Also narrow the load if the result is masked with an AND
+/// to effectively produce a smaller type. If the result is to be extended, also
+/// fold the extension to form a extending load.
SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
unsigned Opc = N->getOpcode();
@@ -7893,28 +8218,40 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
ExtType = ISD::SEXTLOAD;
ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
} else if (Opc == ISD::SRL) {
- // Another special-case: SRL is basically zero-extending a narrower value.
+ // Another special-case: SRL is basically zero-extending a narrower value,
+ // or it maybe shifting a higher subword, half or byte into the lowest
+ // bits.
ExtType = ISD::ZEXTLOAD;
N0 = SDValue(N, 0);
- ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
- if (!N01) return SDValue();
- ExtVT = EVT::getIntegerVT(*DAG.getContext(),
- VT.getSizeInBits() - N01->getZExtValue());
- }
- if (LegalOperations && !TLI.isLoadExtLegal(ExtType, VT, ExtVT))
- return SDValue();
- unsigned EVTBits = ExtVT.getSizeInBits();
+ auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
+ auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (!N01 || !LN0)
+ return SDValue();
- // Do not generate loads of non-round integer types since these can
- // be expensive (and would be wrong if the type is not byte sized).
- if (!ExtVT.isRound())
- return SDValue();
+ uint64_t ShiftAmt = N01->getZExtValue();
+ uint64_t MemoryWidth = LN0->getMemoryVT().getSizeInBits();
+ if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
+ ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
+ else
+ ExtVT = EVT::getIntegerVT(*DAG.getContext(),
+ VT.getSizeInBits() - ShiftAmt);
+ } else if (Opc == ISD::AND) {
+ // An AND with a constant mask is the same as a truncate + zero-extend.
+ auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!AndC || !AndC->getAPIntValue().isMask())
+ return SDValue();
+
+ unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
+ ExtType = ISD::ZEXTLOAD;
+ ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
+ }
unsigned ShAmt = 0;
if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
ShAmt = N01->getZExtValue();
+ unsigned EVTBits = ExtVT.getSizeInBits();
// Is the shift amount a multiple of size of VT?
if ((ShAmt & (EVTBits-1)) == 0) {
N0 = N0.getOperand(0);
@@ -7951,42 +8288,12 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
}
}
- // If we haven't found a load, we can't narrow it. Don't transform one with
- // multiple uses, this would require adding a new load.
- if (!isa<LoadSDNode>(N0) || !N0.hasOneUse())
+ // If we haven't found a load, we can't narrow it.
+ if (!isa<LoadSDNode>(N0))
return SDValue();
- // Don't change the width of a volatile load.
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- if (LN0->isVolatile())
- return SDValue();
-
- // Verify that we are actually reducing a load width here.
- if (LN0->getMemoryVT().getSizeInBits() < EVTBits)
- return SDValue();
-
- // For the transform to be legal, the load must produce only two values
- // (the value loaded and the chain). Don't transform a pre-increment
- // load, for example, which produces an extra value. Otherwise the
- // transformation is not equivalent, and the downstream logic to replace
- // uses gets things wrong.
- if (LN0->getNumValues() > 2)
- return SDValue();
-
- // If the load that we're shrinking is an extload and we're not just
- // discarding the extension we can't simply shrink the load. Bail.
- // TODO: It would be possible to merge the extensions in some cases.
- if (LN0->getExtensionType() != ISD::NON_EXTLOAD &&
- LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt)
- return SDValue();
-
- if (!TLI.shouldReduceLoadWidth(LN0, ExtType, ExtVT))
- return SDValue();
-
- EVT PtrType = N0.getOperand(1).getValueType();
-
- if (PtrType == MVT::Untyped || PtrType.isExtended())
- // It's not possible to generate a constant of extended or untyped type.
+ if (!isLegalNarrowLoad(LN0, ExtType, ExtVT, ShAmt))
return SDValue();
// For big endian targets, we need to adjust the offset to the pointer to
@@ -7997,6 +8304,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
}
+ EVT PtrType = N0.getOperand(1).getValueType();
uint64_t PtrOff = ShAmt / 8;
unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
SDLoc DL(LN0);
@@ -8130,10 +8438,14 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
}
// fold (sext_inreg (extload x)) -> (sextload x)
+ // If sextload is not supported by target, we can only do the combine when
+ // load has one use. Doing otherwise can block folding the extload with other
+ // extends that the target does support.
if (ISD::isEXTLoad(N0.getNode()) &&
ISD::isUNINDEXEDLoad(N0.getNode()) &&
EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
- ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile() &&
+ N0.hasOneUse()) ||
TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
@@ -8208,12 +8520,18 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// noop truncate
if (N0.getValueType() == N->getValueType(0))
return N0;
- // fold (truncate c1) -> c1
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
- return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
+
// fold (truncate (truncate x)) -> (truncate x)
if (N0.getOpcode() == ISD::TRUNCATE)
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
+
+ // fold (truncate c1) -> c1
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
+ SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
+ if (C.getNode() != N)
+ return C;
+ }
+
// fold (truncate (ext x)) -> (ext x) or (truncate x) or x
if (N0.getOpcode() == ISD::ZERO_EXTEND ||
N0.getOpcode() == ISD::SIGN_EXTEND ||
@@ -8245,7 +8563,6 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// we need to be more careful about the vector instructions that we generate.
if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
-
EVT VecTy = N0.getOperand(0).getValueType();
EVT ExTy = N0.getValueType();
EVT TrTy = N->getValueType(0);
@@ -8311,7 +8628,6 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
N0.getOperand(0).hasOneUse()) {
-
SDValue BuildVect = N0.getOperand(0);
EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
EVT TruncVecEltTy = VT.getVectorElementType();
@@ -8340,9 +8656,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// Currently we only perform this optimization on scalars because vectors
// may have different active low bits.
if (!VT.isVector()) {
- if (SDValue Shorter =
- GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(),
- VT.getSizeInBits())))
+ APInt Mask =
+ APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
+ if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
}
@@ -8413,7 +8729,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// Fold truncate of a bitcast of a vector to an extract of the low vector
// element.
//
- // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, 0
+ // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
SDValue VecSrc = N0.getOperand(0);
EVT SrcVT = VecSrc.getValueType();
@@ -8423,8 +8739,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
SDLoc SL(N);
EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
+ unsigned Idx = isLE ? 0 : SrcVT.getVectorNumElements() - 1;
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
- VecSrc, DAG.getConstant(0, SL, IdxVT));
+ VecSrc, DAG.getConstant(Idx, SL, IdxVT));
}
}
@@ -8466,11 +8783,18 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
+
+ // A BUILD_PAIR is always having the least significant part in elt 0 and the
+ // most significant part in elt 1. So when combining into one large load, we
+ // need to consider the endianness.
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(LD1, LD2);
+
if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
LD1->getAddressSpace() != LD2->getAddressSpace())
return SDValue();
EVT LD1VT = LD1->getValueType(0);
- unsigned LD1Bytes = LD1VT.getSizeInBits() / 8;
+ unsigned LD1Bytes = LD1VT.getStoreSize();
if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
unsigned Align = LD1->getAlignment();
@@ -8751,12 +9075,15 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
if (Op.getOpcode() == ISD::BITCAST &&
Op.getOperand(0).getValueType() == VT)
return SDValue(Op.getOperand(0));
- if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
+ if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
return DAG.getBitcast(VT, Op);
return SDValue();
};
+ // FIXME: If either input vector is bitcast, try to convert the shuffle to
+ // the result type of this bitcast. This would eliminate at least one
+ // bitcast. See the transform in InstCombine.
SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
if (!(SV0 && SV1))
@@ -8949,7 +9276,6 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
// Always prefer FMAD to FMA for precision.
unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
- bool LookThroughFPExt = TLI.isFPExtFree(VT);
// Is the node an FMUL and contractable either due to global flags or
// SDNodeFlags.
@@ -8979,28 +9305,31 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
}
// Look through FP_EXTEND nodes to do more combining.
- if (LookThroughFPExt) {
- // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
- if (N0.getOpcode() == ISD::FP_EXTEND) {
- SDValue N00 = N0.getOperand(0);
- if (isContractableFMUL(N00))
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N00.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N00.getOperand(1)), N1);
+
+ // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
+ if (N0.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (isContractableFMUL(N00) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N00.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N00.getOperand(1)), N1);
}
+ }
- // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
- // Note: Commutes FADD operands.
- if (N1.getOpcode() == ISD::FP_EXTEND) {
- SDValue N10 = N1.getOperand(0);
- if (isContractableFMUL(N10))
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N10.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N10.getOperand(1)), N0);
+ // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
+ // Note: Commutes FADD operands.
+ if (N1.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N10 = N1.getOperand(0);
+ if (isContractableFMUL(N10) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N10.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N10.getOperand(1)), N0);
}
}
@@ -9036,80 +9365,87 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
N0));
}
- if (LookThroughFPExt) {
- // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
- // -> (fma x, y, (fma (fpext u), (fpext v), z))
- auto FoldFAddFMAFPExtFMul = [&] (
- SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
- Z));
- };
- if (N0.getOpcode() == PreferredFusedOpcode) {
- SDValue N02 = N0.getOperand(2);
- if (N02.getOpcode() == ISD::FP_EXTEND) {
- SDValue N020 = N02.getOperand(0);
- if (isContractableFMUL(N020))
- return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
- N020.getOperand(0), N020.getOperand(1),
- N1);
+
+ // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
+ // -> (fma x, y, (fma (fpext u), (fpext v), z))
+ auto FoldFAddFMAFPExtFMul = [&] (
+ SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
+ Z));
+ };
+ if (N0.getOpcode() == PreferredFusedOpcode) {
+ SDValue N02 = N0.getOperand(2);
+ if (N02.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N020 = N02.getOperand(0);
+ if (isContractableFMUL(N020) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
+ return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
+ N020.getOperand(0), N020.getOperand(1),
+ N1);
}
}
+ }
- // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
- // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
- // FIXME: This turns two single-precision and one double-precision
- // operation into two double-precision operations, which might not be
- // interesting for all targets, especially GPUs.
- auto FoldFAddFPExtFMAFMul = [&] (
- SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
- Z));
- };
- if (N0.getOpcode() == ISD::FP_EXTEND) {
- SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == PreferredFusedOpcode) {
- SDValue N002 = N00.getOperand(2);
- if (isContractableFMUL(N002))
- return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
- N002.getOperand(0), N002.getOperand(1),
- N1);
+ // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
+ // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
+ // FIXME: This turns two single-precision and one double-precision
+ // operation into two double-precision operations, which might not be
+ // interesting for all targets, especially GPUs.
+ auto FoldFAddFPExtFMAFMul = [&] (
+ SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
+ Z));
+ };
+ if (N0.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == PreferredFusedOpcode) {
+ SDValue N002 = N00.getOperand(2);
+ if (isContractableFMUL(N002) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
+ return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
+ N002.getOperand(0), N002.getOperand(1),
+ N1);
}
}
+ }
- // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
- // -> (fma y, z, (fma (fpext u), (fpext v), x))
- if (N1.getOpcode() == PreferredFusedOpcode) {
- SDValue N12 = N1.getOperand(2);
- if (N12.getOpcode() == ISD::FP_EXTEND) {
- SDValue N120 = N12.getOperand(0);
- if (isContractableFMUL(N120))
- return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
- N120.getOperand(0), N120.getOperand(1),
- N0);
+ // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
+ // -> (fma y, z, (fma (fpext u), (fpext v), x))
+ if (N1.getOpcode() == PreferredFusedOpcode) {
+ SDValue N12 = N1.getOperand(2);
+ if (N12.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N120 = N12.getOperand(0);
+ if (isContractableFMUL(N120) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
+ return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
+ N120.getOperand(0), N120.getOperand(1),
+ N0);
}
}
+ }
- // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
- // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
- // FIXME: This turns two single-precision and one double-precision
- // operation into two double-precision operations, which might not be
- // interesting for all targets, especially GPUs.
- if (N1.getOpcode() == ISD::FP_EXTEND) {
- SDValue N10 = N1.getOperand(0);
- if (N10.getOpcode() == PreferredFusedOpcode) {
- SDValue N102 = N10.getOperand(2);
- if (isContractableFMUL(N102))
- return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
- N102.getOperand(0), N102.getOperand(1),
- N0);
+ // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
+ // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
+ // FIXME: This turns two single-precision and one double-precision
+ // operation into two double-precision operations, which might not be
+ // interesting for all targets, especially GPUs.
+ if (N1.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N10 = N1.getOperand(0);
+ if (N10.getOpcode() == PreferredFusedOpcode) {
+ SDValue N102 = N10.getOperand(2);
+ if (isContractableFMUL(N102) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
+ return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
+ N102.getOperand(0), N102.getOperand(1),
+ N0);
}
}
}
@@ -9151,7 +9487,6 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
// Always prefer FMAD to FMA for precision.
unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
- bool LookThroughFPExt = TLI.isFPExtFree(VT);
// Is the node an FMUL and contractable either due to global flags or
// SDNodeFlags.
@@ -9187,79 +9522,83 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
}
// Look through FP_EXTEND nodes to do more combining.
- if (LookThroughFPExt) {
- // fold (fsub (fpext (fmul x, y)), z)
- // -> (fma (fpext x), (fpext y), (fneg z))
- if (N0.getOpcode() == ISD::FP_EXTEND) {
- SDValue N00 = N0.getOperand(0);
- if (isContractableFMUL(N00))
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N00.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N00.getOperand(1)),
- DAG.getNode(ISD::FNEG, SL, VT, N1));
+
+ // fold (fsub (fpext (fmul x, y)), z)
+ // -> (fma (fpext x), (fpext y), (fneg z))
+ if (N0.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (isContractableFMUL(N00) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N00.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N00.getOperand(1)),
+ DAG.getNode(ISD::FNEG, SL, VT, N1));
}
+ }
- // fold (fsub x, (fpext (fmul y, z)))
- // -> (fma (fneg (fpext y)), (fpext z), x)
- // Note: Commutes FSUB operands.
- if (N1.getOpcode() == ISD::FP_EXTEND) {
- SDValue N10 = N1.getOperand(0);
- if (isContractableFMUL(N10))
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT,
+ // fold (fsub x, (fpext (fmul y, z)))
+ // -> (fma (fneg (fpext y)), (fpext z), x)
+ // Note: Commutes FSUB operands.
+ if (N1.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N10 = N1.getOperand(0);
+ if (isContractableFMUL(N10) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N10.getOperand(0))),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N10.getOperand(1)),
+ N0);
+ }
+ }
+
+ // fold (fsub (fpext (fneg (fmul, x, y))), z)
+ // -> (fneg (fma (fpext x), (fpext y), z))
+ // Note: This could be removed with appropriate canonicalization of the
+ // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
+ // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
+ // from implementing the canonicalization in visitFSUB.
+ if (N0.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == ISD::FNEG) {
+ SDValue N000 = N00.getOperand(0);
+ if (isContractableFMUL(N000) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
+ return DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N10.getOperand(0))),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N10.getOperand(1)),
- N0);
- }
-
- // fold (fsub (fpext (fneg (fmul, x, y))), z)
- // -> (fneg (fma (fpext x), (fpext y), z))
- // Note: This could be removed with appropriate canonicalization of the
- // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
- // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
- // from implementing the canonicalization in visitFSUB.
- if (N0.getOpcode() == ISD::FP_EXTEND) {
- SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == ISD::FNEG) {
- SDValue N000 = N00.getOperand(0);
- if (isContractableFMUL(N000)) {
- return DAG.getNode(ISD::FNEG, SL, VT,
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N000.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N000.getOperand(1)),
- N1));
- }
+ N000.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N000.getOperand(1)),
+ N1));
}
}
+ }
- // fold (fsub (fneg (fpext (fmul, x, y))), z)
- // -> (fneg (fma (fpext x)), (fpext y), z)
- // Note: This could be removed with appropriate canonicalization of the
- // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
- // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
- // from implementing the canonicalization in visitFSUB.
- if (N0.getOpcode() == ISD::FNEG) {
- SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == ISD::FP_EXTEND) {
- SDValue N000 = N00.getOperand(0);
- if (isContractableFMUL(N000)) {
- return DAG.getNode(ISD::FNEG, SL, VT,
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N000.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N000.getOperand(1)),
- N1));
- }
+ // fold (fsub (fneg (fpext (fmul, x, y))), z)
+ // -> (fneg (fma (fpext x)), (fpext y), z)
+ // Note: This could be removed with appropriate canonicalization of the
+ // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
+ // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
+ // from implementing the canonicalization in visitFSUB.
+ if (N0.getOpcode() == ISD::FNEG) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N000 = N00.getOperand(0);
+ if (isContractableFMUL(N000) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N000.getValueType())) {
+ return DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N000.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N000.getOperand(1)),
+ N1));
}
}
-
}
// More folding opportunities when target permits.
@@ -9298,102 +9637,108 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
N21, N0));
}
- if (LookThroughFPExt) {
- // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
- // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
- if (N0.getOpcode() == PreferredFusedOpcode) {
- SDValue N02 = N0.getOperand(2);
- if (N02.getOpcode() == ISD::FP_EXTEND) {
- SDValue N020 = N02.getOperand(0);
- if (isContractableFMUL(N020))
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- N0.getOperand(0), N0.getOperand(1),
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N020.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N020.getOperand(1)),
- DAG.getNode(ISD::FNEG, SL, VT,
- N1)));
- }
- }
- // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
- // -> (fma (fpext x), (fpext y),
- // (fma (fpext u), (fpext v), (fneg z)))
- // FIXME: This turns two single-precision and one double-precision
- // operation into two double-precision operations, which might not be
- // interesting for all targets, especially GPUs.
- if (N0.getOpcode() == ISD::FP_EXTEND) {
- SDValue N00 = N0.getOperand(0);
- if (N00.getOpcode() == PreferredFusedOpcode) {
- SDValue N002 = N00.getOperand(2);
- if (isContractableFMUL(N002))
- return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N00.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N00.getOperand(1)),
- DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N002.getOperand(0)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N002.getOperand(1)),
- DAG.getNode(ISD::FNEG, SL, VT,
- N1)));
- }
- }
-
- // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
- // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
- if (N1.getOpcode() == PreferredFusedOpcode &&
- N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
- SDValue N120 = N1.getOperand(2).getOperand(0);
- if (isContractableFMUL(N120)) {
- SDValue N1200 = N120.getOperand(0);
- SDValue N1201 = N120.getOperand(1);
+ // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
+ // -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
+ if (N0.getOpcode() == PreferredFusedOpcode) {
+ SDValue N02 = N0.getOperand(2);
+ if (N02.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N020 = N02.getOperand(0);
+ if (isContractableFMUL(N020) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
- N1.getOperand(1),
+ N0.getOperand(0), N0.getOperand(1),
DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL,
- VT, N1200)),
DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N1201),
- N0));
+ N020.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N020.getOperand(1)),
+ DAG.getNode(ISD::FNEG, SL, VT,
+ N1)));
}
}
+ }
- // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
- // -> (fma (fneg (fpext y)), (fpext z),
- // (fma (fneg (fpext u)), (fpext v), x))
- // FIXME: This turns two single-precision and one double-precision
- // operation into two double-precision operations, which might not be
- // interesting for all targets, especially GPUs.
- if (N1.getOpcode() == ISD::FP_EXTEND &&
- N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
- SDValue N100 = N1.getOperand(0).getOperand(0);
- SDValue N101 = N1.getOperand(0).getOperand(1);
- SDValue N102 = N1.getOperand(0).getOperand(2);
- if (isContractableFMUL(N102)) {
- SDValue N1020 = N102.getOperand(0);
- SDValue N1021 = N102.getOperand(1);
+ // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
+ // -> (fma (fpext x), (fpext y),
+ // (fma (fpext u), (fpext v), (fneg z)))
+ // FIXME: This turns two single-precision and one double-precision
+ // operation into two double-precision operations, which might not be
+ // interesting for all targets, especially GPUs.
+ if (N0.getOpcode() == ISD::FP_EXTEND) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == PreferredFusedOpcode) {
+ SDValue N002 = N00.getOperand(2);
+ if (isContractableFMUL(N002) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
return DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N100)),
- DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N00.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N00.getOperand(1)),
DAG.getNode(PreferredFusedOpcode, SL, VT,
- DAG.getNode(ISD::FNEG, SL, VT,
- DAG.getNode(ISD::FP_EXTEND, SL,
- VT, N1020)),
DAG.getNode(ISD::FP_EXTEND, SL, VT,
- N1021),
- N0));
+ N002.getOperand(0)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N002.getOperand(1)),
+ DAG.getNode(ISD::FNEG, SL, VT,
+ N1)));
}
}
}
+
+ // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
+ // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
+ if (N1.getOpcode() == PreferredFusedOpcode &&
+ N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
+ SDValue N120 = N1.getOperand(2).getOperand(0);
+ if (isContractableFMUL(N120) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
+ SDValue N1200 = N120.getOperand(0);
+ SDValue N1201 = N120.getOperand(1);
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
+ N1.getOperand(1),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL,
+ VT, N1200)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N1201),
+ N0));
+ }
+ }
+
+ // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
+ // -> (fma (fneg (fpext y)), (fpext z),
+ // (fma (fneg (fpext u)), (fpext v), x))
+ // FIXME: This turns two single-precision and one double-precision
+ // operation into two double-precision operations, which might not be
+ // interesting for all targets, especially GPUs.
+ if (N1.getOpcode() == ISD::FP_EXTEND &&
+ N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
+ SDValue CvtSrc = N1.getOperand(0);
+ SDValue N100 = CvtSrc.getOperand(0);
+ SDValue N101 = CvtSrc.getOperand(1);
+ SDValue N102 = CvtSrc.getOperand(2);
+ if (isContractableFMUL(N102) &&
+ TLI.isFPExtFoldable(PreferredFusedOpcode, VT, CvtSrc.getValueType())) {
+ SDValue N1020 = N102.getOperand(0);
+ SDValue N1021 = N102.getOperand(1);
+ return DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N100)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
+ DAG.getNode(PreferredFusedOpcode, SL, VT,
+ DAG.getNode(ISD::FNEG, SL, VT,
+ DAG.getNode(ISD::FP_EXTEND, SL,
+ VT, N1020)),
+ DAG.getNode(ISD::FP_EXTEND, SL, VT,
+ N1021),
+ N0));
+ }
+ }
}
return SDValue();
@@ -9959,6 +10304,14 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
// TODO: The FMA node should have flags that propagate to this node.
return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
}
+
+ // fma (fneg x), K, y -> fma x -K, y
+ if (N0.getOpcode() == ISD::FNEG &&
+ (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
+ (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT)))) {
+ return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
+ DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2);
+ }
}
if (Options.UnsafeFPMath) {
@@ -10081,8 +10434,8 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
(!LegalOperations ||
// FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
// backend)... we should handle this gracefully after Legalize.
- // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) ||
- TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
+ // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
+ TLI.isOperationLegal(ISD::ConstantFP, VT) ||
TLI.isFPImmLegal(Recip, VT)))
return DAG.getNode(ISD::FMUL, DL, VT, N0,
DAG.getConstantFP(Recip, DL, VT), Flags);
@@ -10264,7 +10617,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
// ...but only if the target supports immediate floating-point values
(!LegalOperations ||
- TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
+ TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
// If the input is a legal type, and SINT_TO_FP is not legal on this target,
@@ -10282,7 +10635,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
!VT.isVector() &&
(!LegalOperations ||
- TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
+ TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
SDLoc DL(N);
SDValue Ops[] =
{ N0.getOperand(0), N0.getOperand(1),
@@ -10296,7 +10649,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
if (N0.getOpcode() == ISD::ZERO_EXTEND &&
N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
(!LegalOperations ||
- TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
+ TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
SDLoc DL(N);
SDValue Ops[] =
{ N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
@@ -10318,7 +10671,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
// ...but only if the target supports immediate floating-point values
(!LegalOperations ||
- TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
+ TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
// If the input is a legal type, and UINT_TO_FP is not legal on this target,
@@ -10333,10 +10686,9 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
// The next optimizations are desirable only if SELECT_CC can be lowered.
if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
// fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
-
if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
(!LegalOperations ||
- TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
+ TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
SDLoc DL(N);
SDValue Ops[] =
{ N0.getOperand(0), N0.getOperand(1),
@@ -10557,6 +10909,19 @@ SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
if (isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
+ // fold ftrunc (known rounded int x) -> x
+ // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
+ // likely to be generated to extract integer from a rounded floating value.
+ switch (N0.getOpcode()) {
+ default: break;
+ case ISD::FRINT:
+ case ISD::FTRUNC:
+ case ISD::FNEARBYINT:
+ case ISD::FFLOOR:
+ case ISD::FCEIL:
+ return N0;
+ }
+
return SDValue();
}
@@ -11160,6 +11525,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
// Replace the uses of Ptr with uses of the updated base value.
DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
deleteAndRecombine(Ptr.getNode());
+ AddToWorklist(Result.getNode());
return true;
}
@@ -11445,6 +11811,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
}
namespace {
+
/// \brief Helper structure used to slice a load in smaller loads.
/// Basically a slice is obtained from the following sequence:
/// Origin = load Ty1, Base
@@ -11462,21 +11829,19 @@ struct LoadedSlice {
struct Cost {
/// Are we optimizing for code size.
bool ForCodeSize;
+
/// Various cost.
- unsigned Loads;
- unsigned Truncates;
- unsigned CrossRegisterBanksCopies;
- unsigned ZExts;
- unsigned Shift;
+ unsigned Loads = 0;
+ unsigned Truncates = 0;
+ unsigned CrossRegisterBanksCopies = 0;
+ unsigned ZExts = 0;
+ unsigned Shift = 0;
- Cost(bool ForCodeSize = false)
- : ForCodeSize(ForCodeSize), Loads(0), Truncates(0),
- CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {}
+ Cost(bool ForCodeSize = false) : ForCodeSize(ForCodeSize) {}
/// \brief Get the cost of one isolated slice.
Cost(const LoadedSlice &LS, bool ForCodeSize = false)
- : ForCodeSize(ForCodeSize), Loads(1), Truncates(0),
- CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {
+ : ForCodeSize(ForCodeSize), Loads(1) {
EVT TruncType = LS.Inst->getValueType(0);
EVT LoadedType = LS.getLoadedType();
if (TruncType != LoadedType &&
@@ -11538,13 +11903,17 @@ struct LoadedSlice {
bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
};
+
// The last instruction that represent the slice. This should be a
// truncate instruction.
SDNode *Inst;
+
// The original load instruction.
LoadSDNode *Origin;
+
// The right shift amount in bits from the original load.
unsigned Shift;
+
// The DAG from which Origin came from.
// This is used to get some contextual information about legal types, etc.
SelectionDAG *DAG;
@@ -11746,7 +12115,8 @@ struct LoadedSlice {
return true;
}
};
-}
+
+} // end anonymous namespace
/// \brief Check that all bits set in \p UsedBits form a dense region, i.e.,
/// \p UsedBits looks like 0..0 1..1 0..0.
@@ -11804,7 +12174,6 @@ static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
// Set the beginning of the pair.
First = Second) {
-
Second = &LoadedSlices[CurrSlice];
// If First is NULL, it means we start a new pair.
@@ -11935,7 +12304,7 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) {
// will be across several bytes. We do not support that.
unsigned Width = User->getValueSizeInBits(0);
if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
- return 0;
+ return false;
// Build the slice for this chain of computations.
LoadedSlice LS(User, LD, Shift, &DAG);
@@ -12060,7 +12429,6 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
return Result;
}
-
/// Check to see if IVal is something that provides a value as specified by
/// MaskInfo. If so, replace the specified store with a narrower store of
/// truncated IVal.
@@ -12121,7 +12489,6 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
.getNode();
}
-
/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
/// narrowing the load and store if it would end up being a win for performance
@@ -12325,7 +12692,6 @@ bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
// Walk all the users of the constant with which we're multiplying.
for (SDNode *Use : ConstNode->uses()) {
-
if (Use == MulNode) // This use is the one we're on right now. Skip it.
continue;
@@ -12376,6 +12742,12 @@ bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
return false;
}
+static SDValue peekThroughBitcast(SDValue V) {
+ while (V.getOpcode() == ISD::BITCAST)
+ V = V.getOperand(0);
+ return V;
+}
+
SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
unsigned NumStores) {
SmallVector<SDValue, 8> Chains;
@@ -12403,56 +12775,93 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
if (NumStores < 2)
return false;
- int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
-
// The latest Node in the DAG.
SDLoc DL(StoreNodes[0].MemNode);
- SDValue StoredVal;
+ int64_t ElementSizeBits = MemVT.getStoreSizeInBits();
+ unsigned SizeInBits = NumStores * ElementSizeBits;
+ unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
+
+ EVT StoreTy;
if (UseVector) {
- bool IsVec = MemVT.isVector();
- unsigned Elts = NumStores;
- if (IsVec) {
- // When merging vector stores, get the total number of elements.
- Elts *= MemVT.getVectorNumElements();
- }
+ unsigned Elts = NumStores * NumMemElts;
// Get the type for the merged vector store.
- EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
- assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
+ StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
+ } else
+ StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
+ SDValue StoredVal;
+ if (UseVector) {
if (IsConstantSrc) {
SmallVector<SDValue, 8> BuildVector;
- for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) {
+ for (unsigned I = 0; I != NumStores; ++I) {
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
SDValue Val = St->getValue();
- if (MemVT.getScalarType().isInteger())
- if (auto *CFP = dyn_cast<ConstantFPSDNode>(St->getValue()))
- Val = DAG.getConstant(
- (uint32_t)CFP->getValueAPF().bitcastToAPInt().getZExtValue(),
- SDLoc(CFP), MemVT);
+ // If constant is of the wrong type, convert it now.
+ if (MemVT != Val.getValueType()) {
+ Val = peekThroughBitcast(Val);
+ // Deal with constants of wrong size.
+ if (ElementSizeBits != Val.getValueSizeInBits()) {
+ EVT IntMemVT =
+ EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
+ if (isa<ConstantFPSDNode>(Val)) {
+ // Not clear how to truncate FP values.
+ return false;
+ } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
+ Val = DAG.getConstant(C->getAPIntValue()
+ .zextOrTrunc(Val.getValueSizeInBits())
+ .zextOrTrunc(ElementSizeBits),
+ SDLoc(C), IntMemVT);
+ }
+ // Make sure correctly size type is the correct type.
+ Val = DAG.getBitcast(MemVT, Val);
+ }
BuildVector.push_back(Val);
}
- StoredVal = DAG.getBuildVector(Ty, DL, BuildVector);
+ StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
+ : ISD::BUILD_VECTOR,
+ DL, StoreTy, BuildVector);
} else {
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i < NumStores; ++i) {
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
- SDValue Val = St->getValue();
- // All operands of BUILD_VECTOR / CONCAT_VECTOR must have the same type.
- if (Val.getValueType() != MemVT)
- return false;
+ SDValue Val = peekThroughBitcast(St->getValue());
+ // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
+ // type MemVT. If the underlying value is not the correct
+ // type, but it is an extraction of an appropriate vector we
+ // can recast Val to be of the correct type. This may require
+ // converting between EXTRACT_VECTOR_ELT and
+ // EXTRACT_SUBVECTOR.
+ if ((MemVT != Val.getValueType()) &&
+ (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
+ Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
+ SDValue Vec = Val.getOperand(0);
+ EVT MemVTScalarTy = MemVT.getScalarType();
+ // We may need to add a bitcast here to get types to line up.
+ if (MemVTScalarTy != Vec.getValueType()) {
+ unsigned Elts = Vec.getValueType().getSizeInBits() /
+ MemVTScalarTy.getSizeInBits();
+ EVT NewVecTy =
+ EVT::getVectorVT(*DAG.getContext(), MemVTScalarTy, Elts);
+ Vec = DAG.getBitcast(NewVecTy, Vec);
+ }
+ auto OpC = (MemVT.isVector()) ? ISD::EXTRACT_SUBVECTOR
+ : ISD::EXTRACT_VECTOR_ELT;
+ Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Val.getOperand(1));
+ }
Ops.push_back(Val);
}
// Build the extracted vector elements back into a vector.
- StoredVal = DAG.getNode(IsVec ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR,
- DL, Ty, Ops); }
+ StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
+ : ISD::BUILD_VECTOR,
+ DL, StoreTy, Ops);
+ }
} else {
// We should always use a vector store when merging extracted vector
// elements, so this path implies a store of constants.
assert(IsConstantSrc && "Merged vector elements should use vector store");
- unsigned SizeInBits = NumStores * ElementSizeBytes * 8;
APInt StoreInt(SizeInBits, 0);
// Construct a single integer constant which is made of the smaller
@@ -12463,18 +12872,25 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
SDValue Val = St->getValue();
- StoreInt <<= ElementSizeBytes * 8;
+ StoreInt <<= ElementSizeBits;
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
- StoreInt |= C->getAPIntValue().zextOrTrunc(SizeInBits);
+ StoreInt |= C->getAPIntValue()
+ .zextOrTrunc(ElementSizeBits)
+ .zextOrTrunc(SizeInBits);
} else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
- StoreInt |= C->getValueAPF().bitcastToAPInt().zextOrTrunc(SizeInBits);
+ StoreInt |= C->getValueAPF()
+ .bitcastToAPInt()
+ .zextOrTrunc(ElementSizeBits)
+ .zextOrTrunc(SizeInBits);
+ // If fp truncation is necessary give up for now.
+ if (MemVT.getSizeInBits() != ElementSizeBits)
+ return false;
} else {
llvm_unreachable("Invalid constant element type");
}
}
// Create the new Load and Store operations.
- EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
}
@@ -12483,7 +12899,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
// make sure we use trunc store if it's necessary to be legal.
SDValue NewStore;
- if (UseVector || !UseTrunc) {
+ if (!UseTrunc) {
NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(),
FirstInChain->getAlignment());
@@ -12517,6 +12933,7 @@ void DAGCombiner::getStoreMergeCandidates(
BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
EVT MemVT = St->getMemoryVT();
+ SDValue Val = peekThroughBitcast(St->getValue());
// We must have a base and an offset.
if (!BasePtr.getBase().getNode())
return;
@@ -12525,47 +12942,62 @@ void DAGCombiner::getStoreMergeCandidates(
if (BasePtr.getBase().isUndef())
return;
- bool IsConstantSrc = isa<ConstantSDNode>(St->getValue()) ||
- isa<ConstantFPSDNode>(St->getValue());
- bool IsExtractVecSrc =
- (St->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
- St->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR);
- bool IsLoadSrc = isa<LoadSDNode>(St->getValue());
+ bool IsConstantSrc = isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val);
+ bool IsExtractVecSrc = (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
+ Val.getOpcode() == ISD::EXTRACT_SUBVECTOR);
+ bool IsLoadSrc = isa<LoadSDNode>(Val);
BaseIndexOffset LBasePtr;
// Match on loadbaseptr if relevant.
- if (IsLoadSrc)
- LBasePtr = BaseIndexOffset::match(
- cast<LoadSDNode>(St->getValue())->getBasePtr(), DAG);
-
+ EVT LoadVT;
+ if (IsLoadSrc) {
+ auto *Ld = cast<LoadSDNode>(Val);
+ LBasePtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);
+ LoadVT = Ld->getMemoryVT();
+ // Load and store should be the same type.
+ if (MemVT != LoadVT)
+ return;
+ }
auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
int64_t &Offset) -> bool {
if (Other->isVolatile() || Other->isIndexed())
return false;
- // We can merge constant floats to equivalent integers
- if (Other->getMemoryVT() != MemVT)
- if (!(MemVT.isInteger() && MemVT.bitsEq(Other->getMemoryVT()) &&
- isa<ConstantFPSDNode>(Other->getValue())))
- return false;
+ SDValue Val = peekThroughBitcast(Other->getValue());
+ // Allow merging constants of different types as integers.
+ bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
+ : Other->getMemoryVT() != MemVT;
if (IsLoadSrc) {
+ if (NoTypeMatch)
+ return false;
// The Load's Base Ptr must also match
- if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Other->getValue())) {
+ if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Val)) {
auto LPtr = BaseIndexOffset::match(OtherLd->getBasePtr(), DAG);
+ if (LoadVT != OtherLd->getMemoryVT())
+ return false;
if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
return false;
} else
return false;
}
- if (IsConstantSrc)
- if (!(isa<ConstantSDNode>(Other->getValue()) ||
- isa<ConstantFPSDNode>(Other->getValue())))
+ if (IsConstantSrc) {
+ if (NoTypeMatch)
return false;
- if (IsExtractVecSrc)
- if (!(Other->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
- Other->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR))
+ if (!(isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val)))
+ return false;
+ }
+ if (IsExtractVecSrc) {
+ // Do not merge truncated stores here.
+ if (Other->isTruncatingStore())
return false;
+ if (!MemVT.bitsEq(Val.getValueType()))
+ return false;
+ if (Val.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
+ Val.getOpcode() != ISD::EXTRACT_SUBVECTOR)
+ return false;
+ }
Ptr = BaseIndexOffset::match(Other->getBasePtr(), DAG);
return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
};
+
// We looking for a root node which is an ancestor to all mergable
// stores. We search up through a load, to our root and then down
// through all children. For instance we will find Store{1,2,3} if
@@ -12612,10 +13044,8 @@ void DAGCombiner::getStoreMergeCandidates(
// indirectly through its operand (we already consider dependencies
// through the chain). Check in parallel by searching up from
// non-chain operands of candidates.
-
bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores) {
-
// FIXME: We should be able to truncate a full search of
// predecessors by doing a BFS and keeping tabs the originating
// stores from which worklist nodes come from in a similar way to
@@ -12648,12 +13078,13 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
return false;
EVT MemVT = St->getMemoryVT();
- int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
+ int64_t ElementSizeBytes = MemVT.getStoreSize();
+ unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
return false;
- bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute(
+ bool NoVectors = DAG.getMachineFunction().getFunction().hasFnAttribute(
Attribute::NoImplicitFloat);
// This function cannot currently deal with non-byte-sized memory sizes.
@@ -12665,7 +13096,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
// Perform an early exit check. Do not bother looking at stored values that
// are not constants, loads, or extracted vector elements.
- SDValue StoredVal = St->getValue();
+ SDValue StoredVal = peekThroughBitcast(St->getValue());
bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
isa<ConstantFPSDNode>(StoredVal);
@@ -12675,12 +13106,6 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
return false;
- // Don't merge vectors into wider vectors if the source data comes from loads.
- // TODO: This restriction can be lifted by using logic similar to the
- // ExtractVecSrc case.
- if (MemVT.isVector() && IsLoadSrc)
- return false;
-
SmallVector<MemOpLink, 8> StoreNodes;
// Find potential store merge candidates by searching through chain sub-DAG
getStoreMergeCandidates(St, StoreNodes);
@@ -12759,19 +13184,20 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
unsigned LastLegalVectorType = 1;
bool LastIntegerTrunc = false;
bool NonZero = false;
+ unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
SDValue StoredVal = ST->getValue();
-
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) {
- NonZero |= !C->isNullValue();
- } else if (ConstantFPSDNode *C =
- dyn_cast<ConstantFPSDNode>(StoredVal)) {
- NonZero |= !C->getConstantFPValue()->isNullValue();
- } else {
- // Non-constant.
- break;
+ bool IsElementZero = false;
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
+ IsElementZero = C->isNullValue();
+ else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
+ IsElementZero = C->getConstantFPValue()->isNullValue();
+ if (IsElementZero) {
+ if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
+ FirstZeroAfterNonZero = i;
}
+ NonZero |= !IsElementZero;
// Find a legal type for the constant store.
unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
@@ -12791,8 +13217,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) &&
- TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
- FirstStoreAS, FirstStoreAlign, &IsFast) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
+ FirstStoreAlign, &IsFast) &&
IsFast) {
LastIntegerTrunc = true;
LastLegalType = i + 1;
@@ -12806,13 +13232,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
!NoVectors) {
// Find a legal type for the vector store.
- unsigned Elts = i + 1;
- if (MemVT.isVector()) {
- // When merging vector stores, get the total number of elements.
- Elts *= MemVT.getVectorNumElements();
- }
+ unsigned Elts = (i + 1) * NumMemElts;
EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
- if (TLI.isTypeLegal(Ty) &&
+ if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
FirstStoreAlign, &IsFast) &&
@@ -12821,23 +13243,34 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
}
}
+ bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
+ unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
+
// Check if we found a legal integer type that creates a meaningful merge.
- if (LastLegalType < 2 && LastLegalVectorType < 2) {
- StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
+ if (NumElem < 2) {
+ // We know that candidate stores are in order and of correct
+ // shape. While there is no mergeable sequence from the
+ // beginning one may start later in the sequence. The only
+ // reason a merge of size N could have failed where another of
+ // the same size would not have, is if the alignment has
+ // improved or we've dropped a non-zero value. Drop as many
+ // candidates as we can here.
+ unsigned NumSkip = 1;
+ while (
+ (NumSkip < NumConsecutiveStores) &&
+ (NumSkip < FirstZeroAfterNonZero) &&
+ (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) {
+ NumSkip++;
+ }
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
continue;
}
- bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
- unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
-
bool Merged = MergeStoresOfConstantsOrVecElts(
StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc);
- if (!Merged) {
- StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
- continue;
- }
+ RV |= Merged;
+
// Remove merged stores for next iteration.
- RV = true;
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
continue;
}
@@ -12849,25 +13282,20 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
unsigned FirstStoreAS = FirstInChain->getAddressSpace();
unsigned FirstStoreAlign = FirstInChain->getAlignment();
unsigned NumStoresToMerge = 1;
- bool IsVec = MemVT.isVector();
for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
- unsigned StoreValOpcode = St->getValue().getOpcode();
+ SDValue StVal = peekThroughBitcast(St->getValue());
// This restriction could be loosened.
// Bail out if any stored values are not elements extracted from a
// vector. It should be possible to handle mixed sources, but load
// sources need more careful handling (see the block of code below that
// handles consecutive loads).
- if (StoreValOpcode != ISD::EXTRACT_VECTOR_ELT &&
- StoreValOpcode != ISD::EXTRACT_SUBVECTOR)
+ if (StVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
+ StVal.getOpcode() != ISD::EXTRACT_SUBVECTOR)
return RV;
// Find a legal type for the vector store.
- unsigned Elts = i + 1;
- if (IsVec) {
- // When merging vector stores, get the total number of elements.
- Elts *= MemVT.getVectorNumElements();
- }
+ unsigned Elts = (i + 1) * NumMemElts;
EVT Ty =
EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
bool IsFast;
@@ -12879,6 +13307,23 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
NumStoresToMerge = i + 1;
}
+ // Check if we found a legal integer type that creates a meaningful merge.
+ if (NumStoresToMerge < 2) {
+ // We know that candidate stores are in order and of correct
+ // shape. While there is no mergeable sequence from the
+ // beginning one may start later in the sequence. The only
+ // reason a merge of size N could have failed where another of
+ // the same size would not have, is if the alignment has
+ // improved. Drop as many candidates as we can here.
+ unsigned NumSkip = 1;
+ while ((NumSkip < NumConsecutiveStores) &&
+ (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
+ NumSkip++;
+
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
+ continue;
+ }
+
bool Merged = MergeStoresOfConstantsOrVecElts(
StoreNodes, MemVT, NumStoresToMerge, false, true, false);
if (!Merged) {
@@ -12905,7 +13350,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
BaseIndexOffset LdBasePtr;
for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
- LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue());
+ SDValue Val = peekThroughBitcast(St->getValue());
+ LoadSDNode *Ld = dyn_cast<LoadSDNode>(Val);
if (!Ld)
break;
@@ -12917,10 +13363,6 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
if (Ld->isVolatile() || Ld->isIndexed())
break;
- // We do not accept ext loads.
- if (Ld->getExtensionType() != ISD::NON_EXTLOAD)
- break;
-
// The stored memory type must be the same.
if (Ld->getMemoryVT() != MemVT)
break;
@@ -12986,7 +13428,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
isDereferenceable = false;
// Find a legal type for the vector store.
- EVT StoreTy = EVT::getVectorVT(Context, MemVT, i + 1);
+ unsigned Elts = (i + 1) * NumMemElts;
+ EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
+
bool IsFastSt, IsFastLd;
if (TLI.isTypeLegal(StoreTy) &&
TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
@@ -13023,8 +13467,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy,
StoreTy) &&
TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) &&
- TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
- FirstStoreAS, FirstStoreAlign, &IsFastSt) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
+ FirstStoreAlign, &IsFastSt) &&
IsFastSt &&
TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
FirstLoadAlign, &IsFastLd) &&
@@ -13047,7 +13491,19 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
NumElem = std::min(LastLegalType, NumElem);
if (NumElem < 2) {
- StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1);
+ // We know that candidate stores are in order and of correct
+ // shape. While there is no mergeable sequence from the
+ // beginning one may start later in the sequence. The only
+ // reason a merge of size N could have failed where another of
+ // the same size would not have is if the alignment or either
+ // the load or store has improved. Drop as many candidates as we
+ // can here.
+ unsigned NumSkip = 1;
+ while ((NumSkip < LoadNodes.size()) &&
+ (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
+ (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
+ NumSkip++;
+ StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
continue;
}
@@ -13055,7 +13511,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
// to memory.
EVT JointMemOpVT;
if (UseVectorTy) {
- JointMemOpVT = EVT::getVectorVT(Context, MemVT, NumElem);
+ // Find a legal type for the vector store.
+ unsigned Elts = NumElem * NumMemElts;
+ JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
} else {
unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
@@ -13104,12 +13562,17 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
SDValue(NewLoad.getNode(), 1));
}
- // Replace the all stores with the new store.
- for (unsigned i = 0; i < NumElem; ++i)
+ // Replace the all stores with the new store. Recursively remove
+ // corresponding value if its no longer used.
+ for (unsigned i = 0; i < NumElem; ++i) {
+ SDValue Val = StoreNodes[i].MemNode->getOperand(1);
CombineTo(StoreNodes[i].MemNode, NewStore);
+ if (Val.getNode()->use_empty())
+ recursivelyDeleteUnusedNodes(Val.getNode());
+ }
+
RV = true;
StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
- continue;
}
return RV;
}
@@ -13284,7 +13747,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// See if we can simplify the input to this truncstore with knowledge that
// only the low bits are being used. For example:
// "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
- SDValue Shorter = GetDemandedBits(
+ SDValue Shorter = DAG.GetDemandedBits(
Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
ST->getMemoryVT().getScalarSizeInBits()));
AddToWorklist(Value.getNode());
@@ -13356,11 +13819,11 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
Ptr, ST->getMemoryVT(), ST->getMemOperand());
}
- // Only perform this optimization before the types are legal, because we
- // don't want to perform this optimization on every DAGCombine invocation.
- if ((TLI.mergeStoresAfterLegalization()) ? Level == AfterLegalizeDAG
- : !LegalTypes) {
- for (;;) {
+ // Always perform this optimization before types are legal. If the target
+ // prefers, also try this after legalization to catch stores that were created
+ // by intrinsics or other nodes.
+ if (!LegalTypes || (TLI.mergeStoresAfterLegalization())) {
+ while (true) {
// There can be multiple store sequences on the same chain.
// Keep trying to merge store sequences until we are unable to do so
// or until we merge the last store on the chain.
@@ -13499,6 +13962,60 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
return St1;
}
+/// Convert a disguised subvector insertion into a shuffle:
+/// insert_vector_elt V, (bitcast X from vector type), IdxC -->
+/// bitcast(shuffle (bitcast V), (extended X), Mask)
+/// Note: We do not use an insert_subvector node because that requires a legal
+/// subvector type.
+SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
+ SDValue InsertVal = N->getOperand(1);
+ if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
+ !InsertVal.getOperand(0).getValueType().isVector())
+ return SDValue();
+
+ SDValue SubVec = InsertVal.getOperand(0);
+ SDValue DestVec = N->getOperand(0);
+ EVT SubVecVT = SubVec.getValueType();
+ EVT VT = DestVec.getValueType();
+ unsigned NumSrcElts = SubVecVT.getVectorNumElements();
+ unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
+ unsigned NumMaskVals = ExtendRatio * NumSrcElts;
+
+ // Step 1: Create a shuffle mask that implements this insert operation. The
+ // vector that we are inserting into will be operand 0 of the shuffle, so
+ // those elements are just 'i'. The inserted subvector is in the first
+ // positions of operand 1 of the shuffle. Example:
+ // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
+ SmallVector<int, 16> Mask(NumMaskVals);
+ for (unsigned i = 0; i != NumMaskVals; ++i) {
+ if (i / NumSrcElts == InsIndex)
+ Mask[i] = (i % NumSrcElts) + NumMaskVals;
+ else
+ Mask[i] = i;
+ }
+
+ // Bail out if the target can not handle the shuffle we want to create.
+ EVT SubVecEltVT = SubVecVT.getVectorElementType();
+ EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
+ if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
+ return SDValue();
+
+ // Step 2: Create a wide vector from the inserted source vector by appending
+ // undefined elements. This is the same size as our destination vector.
+ SDLoc DL(N);
+ SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
+ ConcatOps[0] = SubVec;
+ SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
+
+ // Step 3: Shuffle in the padded subvector.
+ SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
+ SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
+ AddToWorklist(PaddedSubV.getNode());
+ AddToWorklist(DestVecBC.getNode());
+ AddToWorklist(Shuf.getNode());
+ return DAG.getBitcast(VT, Shuf);
+}
+
SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
SDValue InVec = N->getOperand(0);
SDValue InVal = N->getOperand(1);
@@ -13511,10 +14028,20 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
EVT VT = InVec.getValueType();
- // Check that we know which element is being inserted
- if (!isa<ConstantSDNode>(EltNo))
+ // Remove redundant insertions:
+ // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
+ if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
+ return InVec;
+
+ // We must know which element is being inserted for folds below here.
+ auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
+ if (!IndexC)
return SDValue();
- unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ unsigned Elt = IndexC->getZExtValue();
+
+ if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
+ return Shuf;
// Canonicalize insert_vector_elt dag nodes.
// Example:
@@ -13692,9 +14219,11 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// converts.
}
- // extract_vector_elt (v2i32 (bitcast i64:x)), 0 -> i32 (trunc i64:x)
+ // extract_vector_elt (v2i32 (bitcast i64:x)), EltTrunc -> i32 (trunc i64:x)
+ bool isLE = DAG.getDataLayout().isLittleEndian();
+ unsigned EltTrunc = isLE ? 0 : VT.getVectorNumElements() - 1;
if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST && InVec.hasOneUse() &&
- ConstEltNo->isNullValue() && VT.isInteger()) {
+ ConstEltNo->getZExtValue() == EltTrunc && VT.isInteger()) {
SDValue BCSrc = InVec.getOperand(0);
if (BCSrc.getValueType().isScalarInteger())
return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc);
@@ -13748,7 +14277,10 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// FIXME: We should handle recursing on other vector shuffles and
// scalar_to_vector here as well.
- if (!LegalOperations) {
+ if (!LegalOperations ||
+ // FIXME: Should really be just isOperationLegalOrCustom.
+ TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VT) ||
+ TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VT)) {
EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec,
DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy));
@@ -14054,10 +14586,18 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
EVT InVT1 = VecIn1.getValueType();
EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
- unsigned Vec2Offset = InVT1.getVectorNumElements();
+ unsigned Vec2Offset = 0;
unsigned NumElems = VT.getVectorNumElements();
unsigned ShuffleNumElems = NumElems;
+ // In case both the input vectors are extracted from same base
+ // vector we do not need extra addend (Vec2Offset) while
+ // computing shuffle mask.
+ if (!VecIn2 || !(VecIn1.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
+ !(VecIn2.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
+ !(VecIn1.getOperand(0) == VecIn2.getOperand(0)))
+ Vec2Offset = InVT1.getVectorNumElements();
+
// We can't generate a shuffle node with mismatched input and output types.
// Try to make the types match the type of the output.
if (InVT1 != VT || InVT2 != VT) {
@@ -14072,7 +14612,7 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
VecIn2 = SDValue();
} else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
- if (!TLI.isExtractSubvectorCheap(VT, NumElems))
+ if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
return SDValue();
if (!VecIn2.getNode()) {
@@ -14204,7 +14744,6 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
!isa<ConstantSDNode>(Op.getOperand(1)))
return SDValue();
-
SDValue ExtractedFromVec = Op.getOperand(0);
// All inputs must have the same element type as the output.
@@ -14227,6 +14766,50 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
if (VecIn.size() < 2)
return SDValue();
+ // If all the Operands of BUILD_VECTOR extract from same
+ // vector, then split the vector efficiently based on the maximum
+ // vector access index and adjust the VectorMask and
+ // VecIn accordingly.
+ if (VecIn.size() == 2) {
+ unsigned MaxIndex = 0;
+ unsigned NearestPow2 = 0;
+ SDValue Vec = VecIn.back();
+ EVT InVT = Vec.getValueType();
+ MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
+ SmallVector<unsigned, 8> IndexVec(NumElems, 0);
+
+ for (unsigned i = 0; i < NumElems; i++) {
+ if (VectorMask[i] <= 0)
+ continue;
+ unsigned Index = N->getOperand(i).getConstantOperandVal(1);
+ IndexVec[i] = Index;
+ MaxIndex = std::max(MaxIndex, Index);
+ }
+
+ NearestPow2 = PowerOf2Ceil(MaxIndex);
+ if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
+ NumElems * 2 < NearestPow2) {
+ unsigned SplitSize = NearestPow2 / 2;
+ EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
+ InVT.getVectorElementType(), SplitSize);
+ if (TLI.isTypeLegal(SplitVT)) {
+ SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
+ DAG.getConstant(SplitSize, DL, IdxTy));
+ SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
+ DAG.getConstant(0, DL, IdxTy));
+ VecIn.pop_back();
+ VecIn.push_back(VecIn1);
+ VecIn.push_back(VecIn2);
+
+ for (unsigned i = 0; i < NumElems; i++) {
+ if (VectorMask[i] <= 0)
+ continue;
+ VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
+ }
+ }
+ }
+ }
+
// TODO: We want to sort the vectors by descending length, so that adjacent
// pairs have similar length, and the longer vector is always first in the
// pair.
@@ -14315,77 +14898,9 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
}
}
-
return Shuffles[0];
}
-// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
-// operations which can be matched to a truncate.
-SDValue DAGCombiner::reduceBuildVecToTrunc(SDNode *N) {
- // TODO: Add support for big-endian.
- if (DAG.getDataLayout().isBigEndian())
- return SDValue();
- if (N->getNumOperands() < 2)
- return SDValue();
- SDLoc DL(N);
- EVT VT = N->getValueType(0);
- unsigned NumElems = N->getNumOperands();
-
- if (!isTypeLegal(VT))
- return SDValue();
-
- // If the input is something other than an EXTRACT_VECTOR_ELT with a constant
- // index, bail out.
- // TODO: Allow undef elements in some cases?
- if (any_of(N->ops(), [VT](SDValue Op) {
- return Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
- !isa<ConstantSDNode>(Op.getOperand(1)) ||
- Op.getValueType() != VT.getVectorElementType();
- }))
- return SDValue();
-
- // Helper for obtaining an EXTRACT_VECTOR_ELT's constant index
- auto GetExtractIdx = [](SDValue Extract) {
- return cast<ConstantSDNode>(Extract.getOperand(1))->getSExtValue();
- };
-
- // The first BUILD_VECTOR operand must be an an extract from index zero
- // (assuming no undef and little-endian).
- if (GetExtractIdx(N->getOperand(0)) != 0)
- return SDValue();
-
- // Compute the stride from the first index.
- int Stride = GetExtractIdx(N->getOperand(1));
- SDValue ExtractedFromVec = N->getOperand(0).getOperand(0);
-
- // Proceed only if the stride and the types can be matched to a truncate.
- if ((Stride == 1 || !isPowerOf2_32(Stride)) ||
- (ExtractedFromVec.getValueType().getVectorNumElements() !=
- Stride * NumElems) ||
- (VT.getScalarSizeInBits() * Stride > 64))
- return SDValue();
-
- // Check remaining operands are consistent with the computed stride.
- for (unsigned i = 1; i != NumElems; ++i) {
- SDValue Op = N->getOperand(i);
-
- if ((Op.getOperand(0) != ExtractedFromVec) ||
- (GetExtractIdx(Op) != Stride * i))
- return SDValue();
- }
-
- // All checks were ok, construct the truncate.
- LLVMContext &Ctx = *DAG.getContext();
- EVT NewVT = VT.getVectorVT(
- Ctx, EVT::getIntegerVT(Ctx, VT.getScalarSizeInBits() * Stride), NumElems);
- EVT TruncVT =
- VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT;
-
- SDValue Res = DAG.getBitcast(NewVT, ExtractedFromVec);
- Res = DAG.getNode(ISD::TRUNCATE, SDLoc(N), TruncVT, Res);
- return DAG.getBitcast(VT, Res);
-}
-
SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
EVT VT = N->getValueType(0);
@@ -14428,10 +14943,6 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
return V;
- if (TLI.isDesirableToCombineBuildVectorToTruncate())
- if (SDValue V = reduceBuildVecToTrunc(N))
- return V;
-
if (SDValue V = reduceBuildVecToShuffle(N))
return V;
@@ -14514,8 +15025,7 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
for (SDValue Op : N->ops()) {
// Peek through any bitcast.
- while (Op.getOpcode() == ISD::BITCAST)
- Op = Op.getOperand(0);
+ Op = peekThroughBitcast(Op);
// UNDEF nodes convert to UNDEF shuffle mask values.
if (Op.isUndef()) {
@@ -14534,8 +15044,7 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
EVT ExtVT = ExtVec.getValueType();
// Peek through any bitcast.
- while (ExtVec.getOpcode() == ISD::BITCAST)
- ExtVec = ExtVec.getOperand(0);
+ ExtVec = peekThroughBitcast(ExtVec);
// UNDEF nodes convert to UNDEF shuffle mask values.
if (ExtVec.isUndef()) {
@@ -14760,9 +15269,7 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
// We are looking for an optionally bitcasted wide vector binary operator
// feeding an extract subvector.
- SDValue BinOp = Extract->getOperand(0);
- if (BinOp.getOpcode() == ISD::BITCAST)
- BinOp = BinOp.getOperand(0);
+ SDValue BinOp = peekThroughBitcast(Extract->getOperand(0));
// TODO: The motivating case for this transform is an x86 AVX1 target. That
// target has temptingly almost legal versions of bitwise logic ops in 256-bit
@@ -14786,13 +15293,8 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
return SDValue();
// Peek through bitcasts of the binary operator operands if needed.
- SDValue LHS = BinOp.getOperand(0);
- if (LHS.getOpcode() == ISD::BITCAST)
- LHS = LHS.getOperand(0);
-
- SDValue RHS = BinOp.getOperand(1);
- if (RHS.getOpcode() == ISD::BITCAST)
- RHS = RHS.getOperand(0);
+ SDValue LHS = peekThroughBitcast(BinOp.getOperand(0));
+ SDValue RHS = peekThroughBitcast(BinOp.getOperand(1));
// We need at least one concatenation operation of a binop operand to make
// this transform worthwhile. The concat must double the input vector sizes.
@@ -14891,8 +15393,34 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
}
// Skip bitcasting
- if (V->getOpcode() == ISD::BITCAST)
- V = V.getOperand(0);
+ V = peekThroughBitcast(V);
+
+ // If the input is a build vector. Try to make a smaller build vector.
+ if (V->getOpcode() == ISD::BUILD_VECTOR) {
+ if (auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
+ EVT InVT = V->getValueType(0);
+ unsigned ExtractSize = NVT.getSizeInBits();
+ unsigned EltSize = InVT.getScalarSizeInBits();
+ // Only do this if we won't split any elements.
+ if (ExtractSize % EltSize == 0) {
+ unsigned NumElems = ExtractSize / EltSize;
+ EVT ExtractVT = EVT::getVectorVT(*DAG.getContext(),
+ InVT.getVectorElementType(), NumElems);
+ if ((!LegalOperations ||
+ TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT)) &&
+ (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
+ unsigned IdxVal = (Idx->getZExtValue() * NVT.getScalarSizeInBits()) /
+ EltSize;
+
+ // Extract the pieces from the original build_vector.
+ SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
+ makeArrayRef(V->op_begin() + IdxVal,
+ NumElems));
+ return DAG.getBitcast(NVT, BuildVec);
+ }
+ }
+ }
+ }
if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
// Handle only simple case where vector being inserted and vector
@@ -15013,6 +15541,37 @@ static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0,
return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask());
}
+static SDValue simplifyShuffleMask(ShuffleVectorSDNode *SVN, SDValue N0,
+ SDValue N1, SelectionDAG &DAG) {
+ auto isUndefElt = [](SDValue V, int Idx) {
+ // TODO - handle more cases as required.
+ if (V.getOpcode() == ISD::BUILD_VECTOR)
+ return V.getOperand(Idx).isUndef();
+ if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
+ return (Idx != 0) || V.getOperand(0).isUndef();
+ return false;
+ };
+
+ EVT VT = SVN->getValueType(0);
+ unsigned NumElts = VT.getVectorNumElements();
+
+ bool Changed = false;
+ SmallVector<int, 8> NewMask;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int Idx = SVN->getMaskElt(i);
+ if ((0 <= Idx && Idx < (int)NumElts && isUndefElt(N0, Idx)) ||
+ ((int)NumElts < Idx && isUndefElt(N1, Idx - NumElts))) {
+ Changed = true;
+ Idx = -1;
+ }
+ NewMask.push_back(Idx);
+ }
+ if (Changed)
+ return DAG.getVectorShuffle(VT, SDLoc(SVN), N0, N1, NewMask);
+
+ return SDValue();
+}
+
// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
// or turn a shuffle of a single concat into simpler shuffle then concat.
static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
@@ -15091,7 +15650,7 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
//
// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
// We don't fold shuffles where one side is a non-zero constant, and we don't
-// fold shuffles if the resulting BUILD_VECTOR would have duplicate
+// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
// non-constant operands. This seems to work out reasonably well in practice.
static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
SelectionDAG &DAG,
@@ -15103,6 +15662,7 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
if (!N0->hasOneUse() || !N1->hasOneUse())
return SDValue();
+
// If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
// discussed above.
if (!N1.isUndef()) {
@@ -15114,6 +15674,15 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
return SDValue();
}
+ // If both inputs are splats of the same value then we can safely merge this
+ // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
+ bool IsSplat = false;
+ auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
+ auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
+ if (BV0 && BV1)
+ if (SDValue Splat0 = BV0->getSplatValue())
+ IsSplat = (Splat0 == BV1->getSplatValue());
+
SmallVector<SDValue, 8> Ops;
SmallSet<SDValue, 16> DuplicateOps;
for (int M : SVN->getMask()) {
@@ -15124,23 +15693,25 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
if (S.getOpcode() == ISD::BUILD_VECTOR) {
Op = S.getOperand(Idx);
} else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
- if (Idx == 0)
- Op = S.getOperand(0);
+ assert(Idx == 0 && "Unexpected SCALAR_TO_VECTOR operand index.");
+ Op = S.getOperand(0);
} else {
// Operand can't be combined - bail out.
return SDValue();
}
}
- // Don't duplicate a non-constant BUILD_VECTOR operand; semantically, this is
- // fine, but it's likely to generate low-quality code if the target can't
- // reconstruct an appropriate shuffle.
+ // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
+ // generating a splat; semantically, this is fine, but it's likely to
+ // generate low-quality code if the target can't reconstruct an appropriate
+ // shuffle.
if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
- if (!DuplicateOps.insert(Op).second)
+ if (!IsSplat && !DuplicateOps.insert(Op).second)
return SDValue();
Ops.push_back(Op);
}
+
// BUILD_VECTOR requires all inputs to be of the same type, find the
// maximum type and extend them all.
EVT SVT = VT.getScalarType();
@@ -15162,7 +15733,8 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
SelectionDAG &DAG,
const TargetLowering &TLI,
- bool LegalOperations) {
+ bool LegalOperations,
+ bool LegalTypes) {
EVT VT = SVN->getValueType(0);
bool IsBigEndian = DAG.getDataLayout().isBigEndian();
@@ -15190,14 +15762,18 @@ static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
// Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
// power-of-2 extensions as they are the most likely.
for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
+ // Check for non power of 2 vector sizes
+ if (NumElts % Scale != 0)
+ continue;
if (!isAnyExtend(Scale))
continue;
EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
- if (!LegalOperations ||
- TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
- return DAG.getBitcast(VT,
+ if (!LegalTypes || TLI.isTypeLegal(OutVT))
+ if (!LegalOperations ||
+ TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
+ return DAG.getBitcast(VT,
DAG.getAnyExtendVectorInReg(N0, SDLoc(SVN), OutVT));
}
@@ -15218,9 +15794,7 @@ static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
if (!VT.isInteger() || IsBigEndian)
return SDValue();
- SDValue N0 = SVN->getOperand(0);
- while (N0.getOpcode() == ISD::BITCAST)
- N0 = N0.getOperand(0);
+ SDValue N0 = peekThroughBitcast(SVN->getOperand(0));
unsigned Opcode = N0.getOpcode();
if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
@@ -15316,6 +15890,84 @@ static SDValue combineShuffleOfSplat(ArrayRef<int> UserMask,
NewMask);
}
+/// If the shuffle mask is taking exactly one element from the first vector
+/// operand and passing through all other elements from the second vector
+/// operand, return the index of the mask element that is choosing an element
+/// from the first operand. Otherwise, return -1.
+static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
+ int MaskSize = Mask.size();
+ int EltFromOp0 = -1;
+ // TODO: This does not match if there are undef elements in the shuffle mask.
+ // Should we ignore undefs in the shuffle mask instead? The trade-off is
+ // removing an instruction (a shuffle), but losing the knowledge that some
+ // vector lanes are not needed.
+ for (int i = 0; i != MaskSize; ++i) {
+ if (Mask[i] >= 0 && Mask[i] < MaskSize) {
+ // We're looking for a shuffle of exactly one element from operand 0.
+ if (EltFromOp0 != -1)
+ return -1;
+ EltFromOp0 = i;
+ } else if (Mask[i] != i + MaskSize) {
+ // Nothing from operand 1 can change lanes.
+ return -1;
+ }
+ }
+ return EltFromOp0;
+}
+
+/// If a shuffle inserts exactly one element from a source vector operand into
+/// another vector operand and we can access the specified element as a scalar,
+/// then we can eliminate the shuffle.
+static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
+ SelectionDAG &DAG) {
+ // First, check if we are taking one element of a vector and shuffling that
+ // element into another vector.
+ ArrayRef<int> Mask = Shuf->getMask();
+ SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
+ SDValue Op0 = Shuf->getOperand(0);
+ SDValue Op1 = Shuf->getOperand(1);
+ int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
+ if (ShufOp0Index == -1) {
+ // Commute mask and check again.
+ ShuffleVectorSDNode::commuteMask(CommutedMask);
+ ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
+ if (ShufOp0Index == -1)
+ return SDValue();
+ // Commute operands to match the commuted shuffle mask.
+ std::swap(Op0, Op1);
+ Mask = CommutedMask;
+ }
+
+ // The shuffle inserts exactly one element from operand 0 into operand 1.
+ // Now see if we can access that element as a scalar via a real insert element
+ // instruction.
+ // TODO: We can try harder to locate the element as a scalar. Examples: it
+ // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
+ assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
+ "Shuffle mask value must be from operand 0");
+ if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
+ return SDValue();
+
+ auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
+ if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
+ return SDValue();
+
+ // There's an existing insertelement with constant insertion index, so we
+ // don't need to check the legality/profitability of a replacement operation
+ // that differs at most in the constant value. The target should be able to
+ // lower any of those in a similar way. If not, legalization will expand this
+ // to a scalar-to-vector plus shuffle.
+ //
+ // Note that the shuffle may move the scalar from the position that the insert
+ // element used. Therefore, our new insert element occurs at the shuffle's
+ // mask index value, not the insert's index value.
+ // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
+ SDValue NewInsIndex = DAG.getConstant(ShufOp0Index, SDLoc(Shuf),
+ Op0.getOperand(2).getValueType());
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
+ Op1, Op0.getOperand(1), NewInsIndex);
+}
+
SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
EVT VT = N->getValueType(0);
unsigned NumElts = VT.getVectorNumElements();
@@ -15362,6 +16014,13 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
}
+ // Simplify shuffle mask if a referenced element is UNDEF.
+ if (SDValue V = simplifyShuffleMask(SVN, N0, N1, DAG))
+ return V;
+
+ if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
+ return InsElt;
+
// A shuffle of a single vector that is a splat can always be folded.
if (auto *N0Shuf = dyn_cast<ShuffleVectorSDNode>(N0))
if (N1->isUndef() && N0Shuf->isSplat())
@@ -15426,7 +16085,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
return S;
// Match shuffles that can be converted to any_vector_extend_in_reg.
- if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
+ if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations, LegalTypes))
return V;
// Combine "truncate_vector_in_reg" style shuffles.
@@ -15486,7 +16145,6 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
if (TLI.isTypeLegal(ScaleVT) &&
0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
-
int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
@@ -15661,23 +16319,46 @@ SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
EVT VT = N->getValueType(0);
// Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
- // with a VECTOR_SHUFFLE.
+ // with a VECTOR_SHUFFLE and possible truncate.
if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
SDValue InVec = InVal->getOperand(0);
SDValue EltNo = InVal->getOperand(1);
-
- // FIXME: We could support implicit truncation if the shuffle can be
- // scaled to a smaller vector scalar type.
- ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo);
- if (C0 && VT == InVec.getValueType() &&
- VT.getScalarType() == InVal.getValueType()) {
- SmallVector<int, 8> NewMask(VT.getVectorNumElements(), -1);
+ auto InVecT = InVec.getValueType();
+ if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
+ SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
int Elt = C0->getZExtValue();
NewMask[0] = Elt;
-
- if (TLI.isShuffleMaskLegal(NewMask, VT))
- return DAG.getVectorShuffle(VT, SDLoc(N), InVec, DAG.getUNDEF(VT),
- NewMask);
+ SDValue Val;
+ // If we have an implict truncate do truncate here as long as it's legal.
+ // if it's not legal, this should
+ if (VT.getScalarType() != InVal.getValueType() &&
+ InVal.getValueType().isScalarInteger() &&
+ isTypeLegal(VT.getScalarType())) {
+ Val =
+ DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
+ }
+ if (VT.getScalarType() == InVecT.getScalarType() &&
+ VT.getVectorNumElements() <= InVecT.getVectorNumElements() &&
+ TLI.isShuffleMaskLegal(NewMask, VT)) {
+ Val = DAG.getVectorShuffle(InVecT, SDLoc(N), InVec,
+ DAG.getUNDEF(InVecT), NewMask);
+ // If the initial vector is the correct size this shuffle is a
+ // valid result.
+ if (VT == InVecT)
+ return Val;
+ // If not we must truncate the vector.
+ if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
+ MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
+ SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy);
+ EVT SubVT =
+ EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(),
+ VT.getVectorNumElements());
+ Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, Val,
+ ZeroIdx);
+ return Val;
+ }
+ }
}
}
@@ -15694,12 +16375,47 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
if (N1.isUndef())
return N0;
+ // For nested INSERT_SUBVECTORs, attempt to combine inner node first to allow
+ // us to pull BITCASTs from input to output.
+ if (N0.hasOneUse() && N0->getOpcode() == ISD::INSERT_SUBVECTOR)
+ if (SDValue NN0 = visitINSERT_SUBVECTOR(N0.getNode()))
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, NN0, N1, N2);
+
// If this is an insert of an extracted vector into an undef vector, we can
// just use the input to the extract.
if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
return N1.getOperand(0);
+ // If we are inserting a bitcast value into an undef, with the same
+ // number of elements, just use the bitcast input of the extract.
+ // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
+ // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
+ if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
+ N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ N1.getOperand(0).getOperand(1) == N2 &&
+ N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() ==
+ VT.getVectorNumElements()) {
+ return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
+ }
+
+ // If both N1 and N2 are bitcast values on which insert_subvector
+ // would makes sense, pull the bitcast through.
+ // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
+ // BITCAST (INSERT_SUBVECTOR N0 N1 N2)
+ if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
+ SDValue CN0 = N0.getOperand(0);
+ SDValue CN1 = N1.getOperand(0);
+ if (CN0.getValueType().getVectorElementType() ==
+ CN1.getValueType().getVectorElementType() &&
+ CN0.getValueType().getVectorNumElements() ==
+ VT.getVectorNumElements()) {
+ SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
+ CN0.getValueType(), CN0, CN1, N2);
+ return DAG.getBitcast(VT, NewINSERT);
+ }
+ }
+
// Combine INSERT_SUBVECTORs where we are inserting to the same index.
// INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
// --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
@@ -15779,7 +16495,7 @@ SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
EVT VT = N->getValueType(0);
SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
+ SDValue RHS = peekThroughBitcast(N->getOperand(1));
SDLoc DL(N);
// Make sure we're not running after operation legalization where it
@@ -15790,9 +16506,6 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
if (N->getOpcode() != ISD::AND)
return SDValue();
- if (RHS.getOpcode() == ISD::BITCAST)
- RHS = RHS.getOperand(0);
-
if (RHS.getOpcode() != ISD::BUILD_VECTOR)
return SDValue();
@@ -15945,7 +16658,6 @@ SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
/// the DAG combiner loop to avoid it being looked at.
bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
SDValue RHS) {
-
// fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
// The select + setcc is redundant, because fsqrt returns NaN for X < 0.
if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
@@ -16418,7 +17130,7 @@ SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
SDValue DAGCombiner::BuildSDIV(SDNode *N) {
// when optimising for minimum size, we don't want to expand a div to a mul
// and a shift.
- if (DAG.getMachineFunction().getFunction()->optForMinSize())
+ if (DAG.getMachineFunction().getFunction().optForMinSize())
return SDValue();
ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
@@ -16429,7 +17141,7 @@ SDValue DAGCombiner::BuildSDIV(SDNode *N) {
if (C->isNullValue())
return SDValue();
- std::vector<SDNode*> Built;
+ std::vector<SDNode *> Built;
SDValue S =
TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
@@ -16464,7 +17176,7 @@ SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
SDValue DAGCombiner::BuildUDIV(SDNode *N) {
// when optimising for minimum size, we don't want to expand a div to a mul
// and a shift.
- if (DAG.getMachineFunction().getFunction()->optForMinSize())
+ if (DAG.getMachineFunction().getFunction().optForMinSize())
return SDValue();
ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
@@ -16475,7 +17187,7 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {
if (C->isNullValue())
return SDValue();
- std::vector<SDNode*> Built;
+ std::vector<SDNode *> Built;
SDValue S =
TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
@@ -16760,8 +17472,8 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
if (Op1->isInvariant() && Op0->writeMem())
return false;
- unsigned NumBytes0 = Op0->getMemoryVT().getSizeInBits() >> 3;
- unsigned NumBytes1 = Op1->getMemoryVT().getSizeInBits() >> 3;
+ unsigned NumBytes0 = Op0->getMemoryVT().getStoreSize();
+ unsigned NumBytes1 = Op1->getMemoryVT().getStoreSize();
// Check for BaseIndexOffset matching.
BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0->getBasePtr(), DAG);
@@ -16957,7 +17669,11 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
/// (aliasing node.)
SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
- SmallVector<SDValue, 8> Aliases; // Ops for replacing token factor.
+ if (OptLevel == CodeGenOpt::None)
+ return OldChain;
+
+ // Ops for replacing token factor.
+ SmallVector<SDValue, 8> Aliases;
// Accumulate all the aliases to this node.
GatherAllAliases(N, OldChain, Aliases);
@@ -16987,6 +17703,9 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
// to go from a partially-merged state to the desired final
// fully-merged state.
bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
+ if (OptLevel == CodeGenOpt::None)
+ return false;
+
// This holds the base pointer, index, and the offset in bytes from the base
// pointer.
BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index b2599b2e17f10..d3c94b5f9e6b4 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -63,6 +63,9 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
@@ -98,11 +101,8 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -168,8 +168,7 @@ bool FastISel::hasTrivialKill(const Value *V) {
// No-op casts are trivially coalesced by fast-isel.
if (const auto *Cast = dyn_cast<CastInst>(I))
- if (Cast->isNoopCast(DL.getIntPtrType(Cast->getContext())) &&
- !hasTrivialKill(Cast->getOperand(0)))
+ if (Cast->isNoopCast(DL) && !hasTrivialKill(Cast->getOperand(0)))
return false;
// Even the value might have only one use in the LLVM IR, it is possible that
@@ -1133,6 +1132,8 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
case Intrinsic::lifetime_end:
// The donothing intrinsic does, well, nothing.
case Intrinsic::donothing:
+ // Neither does the sideeffect intrinsic.
+ case Intrinsic::sideeffect:
// Neither does the assume intrinsic; it's also OK not to codegen its operand.
case Intrinsic::assume:
return true;
@@ -1187,7 +1188,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
// into an indirect DBG_VALUE.
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true,
- Op->getReg(), 0, DI->getVariable(), DI->getExpression());
+ Op->getReg(), DI->getVariable(), DI->getExpression());
} else
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::DBG_VALUE))
@@ -1212,35 +1213,32 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
if (!V) {
// Currently the optimizer can produce this; insert an undef to
// help debugging. Probably the optimizer should not do this.
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
- .addReg(0U)
- .addImm(DI->getOffset())
- .addMetadata(DI->getVariable())
- .addMetadata(DI->getExpression());
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, false, 0U,
+ DI->getVariable(), DI->getExpression());
} else if (const auto *CI = dyn_cast<ConstantInt>(V)) {
if (CI->getBitWidth() > 64)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
.addCImm(CI)
- .addImm(DI->getOffset())
+ .addImm(0U)
.addMetadata(DI->getVariable())
.addMetadata(DI->getExpression());
else
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
.addImm(CI->getZExtValue())
- .addImm(DI->getOffset())
+ .addImm(0U)
.addMetadata(DI->getVariable())
.addMetadata(DI->getExpression());
} else if (const auto *CF = dyn_cast<ConstantFP>(V)) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
.addFPImm(CF)
- .addImm(DI->getOffset())
+ .addImm(0U)
.addMetadata(DI->getVariable())
.addMetadata(DI->getExpression());
} else if (unsigned Reg = lookUpRegForValue(V)) {
// FIXME: This does not handle register-indirect values at offset 0.
- bool IsIndirect = DI->getOffset() != 0;
+ bool IsIndirect = false;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, IsIndirect, Reg,
- DI->getOffset(), DI->getVariable(), DI->getExpression());
+ DI->getVariable(), DI->getExpression());
} else {
// We can't yet handle anything else here because it would require
// generating code, thus altering codegen because of debug info.
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index b736037d71ddc..c7cdb49203b15 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -17,11 +17,14 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Instructions.h"
@@ -32,12 +35,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
using namespace llvm;
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index b96c96f0b4df4..cc9b41b4b487f 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -21,14 +21,14 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "instr-emitter"
@@ -673,7 +673,6 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,
MachineInstr *
InstrEmitter::EmitDbgValue(SDDbgValue *SD,
DenseMap<SDValue, unsigned> &VRBaseMap) {
- uint64_t Offset = SD->getOffset();
MDNode *Var = SD->getVariable();
MDNode *Expr = SD->getExpression();
DebugLoc DL = SD->getDebugLoc();
@@ -685,7 +684,7 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
// EmitTargetCodeForFrameDebugValue is responsible for allocation.
return BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE))
.addFrameIndex(SD->getFrameIx())
- .addImm(Offset)
+ .addImm(0)
.addMetadata(Var)
.addMetadata(Expr);
}
@@ -727,11 +726,9 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
// Indirect addressing is indicated by an Imm as the second parameter.
if (SD->isIndirect())
- MIB.addImm(Offset);
- else {
- assert(Offset == 0 && "direct value cannot have an offset");
+ MIB.addImm(0U);
+ else
MIB.addReg(0U, RegState::Debug);
- }
MIB.addMetadata(Var);
MIB.addMetadata(Expr);
@@ -938,10 +935,14 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
EmitCopyFromReg(Node, 0, IsClone, IsCloned, SrcReg, VRBaseMap);
break;
}
- case ISD::EH_LABEL: {
- MCSymbol *S = cast<EHLabelSDNode>(Node)->getLabel();
+ case ISD::EH_LABEL:
+ case ISD::ANNOTATION_LABEL: {
+ unsigned Opc = (Node->getOpcode() == ISD::EH_LABEL)
+ ? TargetOpcode::EH_LABEL
+ : TargetOpcode::ANNOTATION_LABEL;
+ MCSymbol *S = cast<LabelSDNode>(Node)->getLabel();
BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
- TII->get(TargetOpcode::EH_LABEL)).addSym(S);
+ TII->get(Opc)).addSym(S);
break;
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 7e4bc3ccb5d39..bb1dc17b7a1b0 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1,4 +1,4 @@
-//===-- LegalizeDAG.cpp - Implement SelectionDAG::Legalize ----------------===//
+//===- LegalizeDAG.cpp - Implement SelectionDAG::Legalize -----------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,37 +11,65 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Target/TargetOptions.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <tuple>
+#include <utility>
+
using namespace llvm;
#define DEBUG_TYPE "legalizedag"
namespace {
-struct FloatSignAsInt;
+/// Keeps track of state when getting the sign of a floating-point value as an
+/// integer.
+struct FloatSignAsInt {
+ EVT FloatVT;
+ SDValue Chain;
+ SDValue FloatPtr;
+ SDValue IntPtr;
+ MachinePointerInfo IntPointerInfo;
+ MachinePointerInfo FloatPointerInfo;
+ SDValue IntValue;
+ APInt SignMask;
+ uint8_t SignBit;
+};
//===----------------------------------------------------------------------===//
/// This takes an arbitrary SelectionDAG as input and
@@ -54,7 +82,6 @@ struct FloatSignAsInt;
/// as part of its processing. For example, if a target does not support a
/// 'setcc' instruction efficiently, but does support 'brcc' instruction, this
/// will attempt merge setcc and brc instructions into brcc's.
-///
class SelectionDAGLegalize {
const TargetMachine &TM;
const TargetLowering &TLI;
@@ -165,11 +192,13 @@ private:
public:
// Node replacement helpers
+
void ReplacedNode(SDNode *N) {
LegalizedNodes.erase(N);
if (UpdatedNodes)
UpdatedNodes->insert(N);
}
+
void ReplaceNode(SDNode *Old, SDNode *New) {
DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG);
dbgs() << " with: "; New->dump(&DAG));
@@ -182,6 +211,7 @@ public:
UpdatedNodes->insert(New);
ReplacedNode(Old);
}
+
void ReplaceNode(SDValue Old, SDValue New) {
DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG);
dbgs() << " with: "; New->dump(&DAG));
@@ -191,6 +221,7 @@ public:
UpdatedNodes->insert(New.getNode());
ReplacedNode(Old.getNode());
}
+
void ReplaceNode(SDNode *Old, const SDValue *New) {
DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG));
@@ -205,7 +236,8 @@ public:
ReplacedNode(Old);
}
};
-}
+
+} // end anonymous namespace
/// Return a vector shuffle operation which
/// performs the same shuffe in terms of order or result bytes, but on a type
@@ -376,6 +408,7 @@ SDValue SelectionDAGLegalize::ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val,
}
SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
+ DEBUG(dbgs() << "Optimizing float store operations\n");
// Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
// FIXME: We shouldn't do this for TargetConstantFP's.
// FIXME: move this to the DAG Combiner! Note that we can't regress due
@@ -434,172 +467,184 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
}
void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
- StoreSDNode *ST = cast<StoreSDNode>(Node);
- SDValue Chain = ST->getChain();
- SDValue Ptr = ST->getBasePtr();
- SDLoc dl(Node);
-
- unsigned Alignment = ST->getAlignment();
- MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
- AAMDNodes AAInfo = ST->getAAInfo();
-
- if (!ST->isTruncatingStore()) {
- if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) {
- ReplaceNode(ST, OptStore);
- return;
- }
+ StoreSDNode *ST = cast<StoreSDNode>(Node);
+ SDValue Chain = ST->getChain();
+ SDValue Ptr = ST->getBasePtr();
+ SDLoc dl(Node);
- {
- SDValue Value = ST->getValue();
- MVT VT = Value.getSimpleValueType();
- switch (TLI.getOperationAction(ISD::STORE, VT)) {
- default: llvm_unreachable("This action is not supported yet!");
- case TargetLowering::Legal: {
- // If this is an unaligned store and the target doesn't support it,
- // expand it.
- EVT MemVT = ST->getMemoryVT();
- unsigned AS = ST->getAddressSpace();
- unsigned Align = ST->getAlignment();
- const DataLayout &DL = DAG.getDataLayout();
- if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) {
- SDValue Result = TLI.expandUnalignedStore(ST, DAG);
- ReplaceNode(SDValue(ST, 0), Result);
- }
- break;
- }
- case TargetLowering::Custom: {
- SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
- if (Res && Res != SDValue(Node, 0))
- ReplaceNode(SDValue(Node, 0), Res);
- return;
- }
- case TargetLowering::Promote: {
- MVT NVT = TLI.getTypeToPromoteTo(ISD::STORE, VT);
- assert(NVT.getSizeInBits() == VT.getSizeInBits() &&
- "Can only promote stores to same size type");
- Value = DAG.getNode(ISD::BITCAST, dl, NVT, Value);
- SDValue Result =
- DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
- Alignment, MMOFlags, AAInfo);
- ReplaceNode(SDValue(Node, 0), Result);
- break;
- }
- }
- return;
- }
+ unsigned Alignment = ST->getAlignment();
+ MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
+ AAMDNodes AAInfo = ST->getAAInfo();
+
+ if (!ST->isTruncatingStore()) {
+ DEBUG(dbgs() << "Legalizing store operation\n");
+ if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) {
+ ReplaceNode(ST, OptStore);
+ return;
+ }
+
+ SDValue Value = ST->getValue();
+ MVT VT = Value.getSimpleValueType();
+ switch (TLI.getOperationAction(ISD::STORE, VT)) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Legal: {
+ // If this is an unaligned store and the target doesn't support it,
+ // expand it.
+ EVT MemVT = ST->getMemoryVT();
+ unsigned AS = ST->getAddressSpace();
+ unsigned Align = ST->getAlignment();
+ const DataLayout &DL = DAG.getDataLayout();
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) {
+ DEBUG(dbgs() << "Expanding unsupported unaligned store\n");
+ SDValue Result = TLI.expandUnalignedStore(ST, DAG);
+ ReplaceNode(SDValue(ST, 0), Result);
+ } else
+ DEBUG(dbgs() << "Legal store\n");
+ break;
+ }
+ case TargetLowering::Custom: {
+ DEBUG(dbgs() << "Trying custom lowering\n");
+ SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
+ if (Res && Res != SDValue(Node, 0))
+ ReplaceNode(SDValue(Node, 0), Res);
+ return;
+ }
+ case TargetLowering::Promote: {
+ MVT NVT = TLI.getTypeToPromoteTo(ISD::STORE, VT);
+ assert(NVT.getSizeInBits() == VT.getSizeInBits() &&
+ "Can only promote stores to same size type");
+ Value = DAG.getNode(ISD::BITCAST, dl, NVT, Value);
+ SDValue Result =
+ DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
+ Alignment, MMOFlags, AAInfo);
+ ReplaceNode(SDValue(Node, 0), Result);
+ break;
+ }
+ }
+ return;
+ }
+
+ DEBUG(dbgs() << "Legalizing truncating store operations\n");
+ SDValue Value = ST->getValue();
+ EVT StVT = ST->getMemoryVT();
+ unsigned StWidth = StVT.getSizeInBits();
+ auto &DL = DAG.getDataLayout();
+
+ if (StWidth != StVT.getStoreSizeInBits()) {
+ // Promote to a byte-sized store with upper bits zero if not
+ // storing an integral number of bytes. For example, promote
+ // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
+ EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
+ StVT.getStoreSizeInBits());
+ Value = DAG.getZeroExtendInReg(Value, dl, StVT);
+ SDValue Result =
+ DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), NVT,
+ Alignment, MMOFlags, AAInfo);
+ ReplaceNode(SDValue(Node, 0), Result);
+ } else if (StWidth & (StWidth - 1)) {
+ // If not storing a power-of-2 number of bits, expand as two stores.
+ assert(!StVT.isVector() && "Unsupported truncstore!");
+ unsigned RoundWidth = 1 << Log2_32(StWidth);
+ assert(RoundWidth < StWidth);
+ unsigned ExtraWidth = StWidth - RoundWidth;
+ assert(ExtraWidth < RoundWidth);
+ assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
+ "Store size not an integral number of bytes!");
+ EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
+ EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
+ SDValue Lo, Hi;
+ unsigned IncrementSize;
+
+ if (DL.isLittleEndian()) {
+ // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16)
+ // Store the bottom RoundWidth bits.
+ Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
+ RoundVT, Alignment, MMOFlags, AAInfo);
+
+ // Store the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, dl,
+ Ptr.getValueType()));
+ Hi = DAG.getNode(
+ ISD::SRL, dl, Value.getValueType(), Value,
+ DAG.getConstant(RoundWidth, dl,
+ TLI.getShiftAmountTy(Value.getValueType(), DL)));
+ Hi = DAG.getTruncStore(
+ Chain, dl, Hi, Ptr,
+ ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT,
+ MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo);
} else {
- SDValue Value = ST->getValue();
-
- EVT StVT = ST->getMemoryVT();
- unsigned StWidth = StVT.getSizeInBits();
- auto &DL = DAG.getDataLayout();
-
- if (StWidth != StVT.getStoreSizeInBits()) {
- // Promote to a byte-sized store with upper bits zero if not
- // storing an integral number of bytes. For example, promote
- // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
- EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
- StVT.getStoreSizeInBits());
- Value = DAG.getZeroExtendInReg(Value, dl, StVT);
- SDValue Result =
- DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), NVT,
- Alignment, MMOFlags, AAInfo);
- ReplaceNode(SDValue(Node, 0), Result);
- } else if (StWidth & (StWidth - 1)) {
- // If not storing a power-of-2 number of bits, expand as two stores.
- assert(!StVT.isVector() && "Unsupported truncstore!");
- unsigned RoundWidth = 1 << Log2_32(StWidth);
- assert(RoundWidth < StWidth);
- unsigned ExtraWidth = StWidth - RoundWidth;
- assert(ExtraWidth < RoundWidth);
- assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
- "Store size not an integral number of bytes!");
- EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
- EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
- SDValue Lo, Hi;
- unsigned IncrementSize;
-
- if (DL.isLittleEndian()) {
- // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16)
- // Store the bottom RoundWidth bits.
- Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
- RoundVT, Alignment, MMOFlags, AAInfo);
-
- // Store the remaining ExtraWidth bits.
- IncrementSize = RoundWidth / 8;
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, dl,
- Ptr.getValueType()));
- Hi = DAG.getNode(
- ISD::SRL, dl, Value.getValueType(), Value,
- DAG.getConstant(RoundWidth, dl,
- TLI.getShiftAmountTy(Value.getValueType(), DL)));
- Hi = DAG.getTruncStore(
- Chain, dl, Hi, Ptr,
- ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT,
- MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo);
- } else {
- // Big endian - avoid unaligned stores.
- // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X
- // Store the top RoundWidth bits.
- Hi = DAG.getNode(
- ISD::SRL, dl, Value.getValueType(), Value,
- DAG.getConstant(ExtraWidth, dl,
- TLI.getShiftAmountTy(Value.getValueType(), DL)));
- Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(),
- RoundVT, Alignment, MMOFlags, AAInfo);
-
- // Store the remaining ExtraWidth bits.
- IncrementSize = RoundWidth / 8;
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, dl,
- Ptr.getValueType()));
- Lo = DAG.getTruncStore(
- Chain, dl, Value, Ptr,
- ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT,
- MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo);
- }
+ // Big endian - avoid unaligned stores.
+ // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X
+ // Store the top RoundWidth bits.
+ Hi = DAG.getNode(
+ ISD::SRL, dl, Value.getValueType(), Value,
+ DAG.getConstant(ExtraWidth, dl,
+ TLI.getShiftAmountTy(Value.getValueType(), DL)));
+ Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(),
+ RoundVT, Alignment, MMOFlags, AAInfo);
- // The order of the stores doesn't matter.
- SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
- ReplaceNode(SDValue(Node, 0), Result);
+ // Store the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getConstant(IncrementSize, dl,
+ Ptr.getValueType()));
+ Lo = DAG.getTruncStore(
+ Chain, dl, Value, Ptr,
+ ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT,
+ MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo);
+ }
+
+ // The order of the stores doesn't matter.
+ SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
+ ReplaceNode(SDValue(Node, 0), Result);
+ } else {
+ switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) {
+ default: llvm_unreachable("This action is not supported yet!");
+ case TargetLowering::Legal: {
+ EVT MemVT = ST->getMemoryVT();
+ unsigned AS = ST->getAddressSpace();
+ unsigned Align = ST->getAlignment();
+ // If this is an unaligned store and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) {
+ SDValue Result = TLI.expandUnalignedStore(ST, DAG);
+ ReplaceNode(SDValue(ST, 0), Result);
+ }
+ break;
+ }
+ case TargetLowering::Custom: {
+ SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
+ if (Res && Res != SDValue(Node, 0))
+ ReplaceNode(SDValue(Node, 0), Res);
+ return;
+ }
+ case TargetLowering::Expand:
+ assert(!StVT.isVector() &&
+ "Vector Stores are handled in LegalizeVectorOps");
+
+ SDValue Result;
+
+ // TRUNCSTORE:i16 i32 -> STORE i16
+ if (TLI.isTypeLegal(StVT)) {
+ Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value);
+ Result = DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
+ Alignment, MMOFlags, AAInfo);
} else {
- switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) {
- default: llvm_unreachable("This action is not supported yet!");
- case TargetLowering::Legal: {
- EVT MemVT = ST->getMemoryVT();
- unsigned AS = ST->getAddressSpace();
- unsigned Align = ST->getAlignment();
- // If this is an unaligned store and the target doesn't support it,
- // expand it.
- if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) {
- SDValue Result = TLI.expandUnalignedStore(ST, DAG);
- ReplaceNode(SDValue(ST, 0), Result);
- }
- break;
- }
- case TargetLowering::Custom: {
- SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
- if (Res && Res != SDValue(Node, 0))
- ReplaceNode(SDValue(Node, 0), Res);
- return;
- }
- case TargetLowering::Expand:
- assert(!StVT.isVector() &&
- "Vector Stores are handled in LegalizeVectorOps");
-
- // TRUNCSTORE:i16 i32 -> STORE i16
- assert(TLI.isTypeLegal(StVT) &&
- "Do not know how to expand this store!");
- Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value);
- SDValue Result =
- DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
- Alignment, MMOFlags, AAInfo);
- ReplaceNode(SDValue(Node, 0), Result);
- break;
- }
+ // The in-memory type isn't legal. Truncate to the type it would promote
+ // to, and then do a truncstore.
+ Value = DAG.getNode(ISD::TRUNCATE, dl,
+ TLI.getTypeToTransformTo(*DAG.getContext(), StVT),
+ Value);
+ Result = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(),
+ StVT, Alignment, MMOFlags, AAInfo);
}
+
+ ReplaceNode(SDValue(Node, 0), Result);
+ break;
}
+ }
}
void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
@@ -611,6 +656,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
ISD::LoadExtType ExtType = LD->getExtensionType();
if (ExtType == ISD::NON_EXTLOAD) {
+ DEBUG(dbgs() << "Legalizing non-extending load operation\n");
MVT VT = Node->getSimpleValueType(0);
SDValue RVal = SDValue(Node, 0);
SDValue RChain = SDValue(Node, 1);
@@ -629,13 +675,13 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
}
break;
}
- case TargetLowering::Custom: {
+ case TargetLowering::Custom:
if (SDValue Res = TLI.LowerOperation(RVal, DAG)) {
RVal = Res;
RChain = Res.getValue(1);
}
break;
- }
+
case TargetLowering::Promote: {
MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
assert(NVT.getSizeInBits() == VT.getSizeInBits() &&
@@ -660,6 +706,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
return;
}
+ DEBUG(dbgs() << "Legalizing extending load operation\n");
EVT SrcVT = LD->getMemoryVT();
unsigned SrcWidth = SrcVT.getSizeInBits();
unsigned Alignment = LD->getAlignment();
@@ -795,7 +842,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
case TargetLowering::Custom:
isCustom = true;
LLVM_FALLTHROUGH;
- case TargetLowering::Legal: {
+ case TargetLowering::Legal:
Value = SDValue(Node, 0);
Chain = SDValue(Node, 1);
@@ -816,8 +863,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
}
}
break;
- }
- case TargetLowering::Expand:
+
+ case TargetLowering::Expand: {
EVT DestVT = Node->getValueType(0);
if (!TLI.isLoadExtLegal(ISD::EXTLOAD, DestVT, SrcVT)) {
// If the source type is not legal, see if there is a legal extload to
@@ -883,6 +930,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
Chain = Result.getValue(1);
break;
}
+ }
}
// Since loads produce two values, make sure to remember that we legalized
@@ -907,6 +955,7 @@ getStrictFPOpcodeAction(const TargetLowering &TLI, unsigned Opcode, EVT VT) {
case ISD::STRICT_FSQRT: EqOpc = ISD::FSQRT; break;
case ISD::STRICT_FPOW: EqOpc = ISD::FPOW; break;
case ISD::STRICT_FPOWI: EqOpc = ISD::FPOWI; break;
+ case ISD::STRICT_FMA: EqOpc = ISD::FMA; break;
case ISD::STRICT_FSIN: EqOpc = ISD::FSIN; break;
case ISD::STRICT_FCOS: EqOpc = ISD::FCOS; break;
case ISD::STRICT_FEXP: EqOpc = ISD::FEXP; break;
@@ -932,7 +981,9 @@ getStrictFPOpcodeAction(const TargetLowering &TLI, unsigned Opcode, EVT VT) {
void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
DEBUG(dbgs() << "\nLegalizing: "; Node->dump(&DAG));
- if (Node->getOpcode() == ISD::TargetConstant) // Allow illegal target nodes.
+ // Allow illegal target nodes and illegal registers.
+ if (Node->getOpcode() == ISD::TargetConstant ||
+ Node->getOpcode() == ISD::Register)
return;
#ifndef NDEBUG
@@ -946,7 +997,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
assert((TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) ==
TargetLowering::TypeLegal ||
TLI.isTypeLegal(Op.getValueType()) ||
- Op.getOpcode() == ISD::TargetConstant) &&
+ Op.getOpcode() == ISD::TargetConstant ||
+ Op.getOpcode() == ISD::Register) &&
"Unexpected illegal type!");
#endif
@@ -983,11 +1035,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Action = TLI.getOperationAction(Node->getOpcode(), InnerType);
break;
}
- case ISD::ATOMIC_STORE: {
+ case ISD::ATOMIC_STORE:
Action = TLI.getOperationAction(Node->getOpcode(),
Node->getOperand(2).getValueType());
break;
- }
case ISD::SELECT_CC:
case ISD::SETCC:
case ISD::BR_CC: {
@@ -1072,6 +1123,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
}
break;
case ISD::STRICT_FSQRT:
+ case ISD::STRICT_FMA:
case ISD::STRICT_FPOW:
case ISD::STRICT_FPOWI:
case ISD::STRICT_FSIN:
@@ -1090,7 +1142,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Action = getStrictFPOpcodeAction(TLI, Node->getOpcode(),
Node->getValueType(0));
break;
-
default:
if (Node->getOpcode() >= ISD::BUILTIN_OP_END) {
Action = TargetLowering::Legal;
@@ -1141,8 +1192,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
if (SAO != Op2)
NewNode = DAG.UpdateNodeOperands(Node, Op0, Op1, SAO);
}
+ break;
}
- break;
}
if (NewNode != Node) {
@@ -1151,8 +1202,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
}
switch (Action) {
case TargetLowering::Legal:
+ DEBUG(dbgs() << "Legal node: nothing to do\n");
return;
- case TargetLowering::Custom: {
+ case TargetLowering::Custom:
+ DEBUG(dbgs() << "Trying custom legalization\n");
// FIXME: The handling for custom lowering with multiple results is
// a complete mess.
if (SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG)) {
@@ -1160,6 +1213,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
return;
if (Node->getNumValues() == 1) {
+ DEBUG(dbgs() << "Successfully custom legalized node\n");
// We can just directly replace this node with the lowered value.
ReplaceNode(SDValue(Node, 0), Res);
return;
@@ -1168,11 +1222,12 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
SmallVector<SDValue, 8> ResultVals;
for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
ResultVals.push_back(Res.getValue(i));
+ DEBUG(dbgs() << "Successfully custom legalized node\n");
ReplaceNode(Node, ResultVals.data());
return;
}
+ DEBUG(dbgs() << "Could not custom legalize node\n");
LLVM_FALLTHROUGH;
- }
case TargetLowering::Expand:
if (ExpandNode(Node))
return;
@@ -1198,13 +1253,11 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::CALLSEQ_START:
case ISD::CALLSEQ_END:
break;
- case ISD::LOAD: {
+ case ISD::LOAD:
return LegalizeLoadOps(Node);
- }
- case ISD::STORE: {
+ case ISD::STORE:
return LegalizeStoreOps(Node);
}
- }
}
SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
@@ -1240,7 +1293,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
// If the index is dependent on the store we will introduce a cycle when
// creating the load (the load uses the index, and by replacing the chain
// we will make the index dependent on the load). Also, the store might be
- // dependent on the extractelement and introduce a cycle when creating
+ // dependent on the extractelement and introduce a cycle when creating
// the load.
if (SDNode::hasPredecessorHelper(ST, Visited, Worklist) ||
ST->hasPredecessor(Op.getNode()))
@@ -1361,22 +1414,6 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo);
}
-namespace {
-/// Keeps track of state when getting the sign of a floating-point value as an
-/// integer.
-struct FloatSignAsInt {
- EVT FloatVT;
- SDValue Chain;
- SDValue FloatPtr;
- SDValue IntPtr;
- MachinePointerInfo IntPointerInfo;
- MachinePointerInfo FloatPointerInfo;
- SDValue IntValue;
- APInt SignMask;
- uint8_t SignBit;
-};
-}
-
/// Bitcast a floating-point value to an integer value. Only bitcast the part
/// containing the sign bit if the target has no integer value capable of
/// holding all bits of the floating-point value.
@@ -1753,8 +1790,8 @@ ExpandBVWithShuffles(SDNode *Node, SelectionDAG &DAG,
// We do this in two phases; first to check the legality of the shuffles,
// and next, assuming that all shuffles are legal, to create the new nodes.
for (int Phase = 0; Phase < 2; ++Phase) {
- SmallVector<std::pair<SDValue, SmallVector<int, 16> >, 16> IntermedVals,
- NewIntermedVals;
+ SmallVector<std::pair<SDValue, SmallVector<int, 16>>, 16> IntermedVals,
+ NewIntermedVals;
for (unsigned i = 0; i < NumElems; ++i) {
SDValue V = Node->getOperand(i);
if (V.isUndef())
@@ -1977,10 +2014,10 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
// isTailCall may be true since the callee does not reference caller stack
// frame. Check if it's in the right position and that the return types match.
SDValue TCChain = InChain;
- const Function *F = DAG.getMachineFunction().getFunction();
+ const Function &F = DAG.getMachineFunction().getFunction();
bool isTailCall =
TLI.isInTailCallPosition(DAG, Node, TCChain) &&
- (RetTy == F->getReturnType() || F->getReturnType()->isVoidTy());
+ (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
if (isTailCall)
InChain = TCChain;
@@ -1996,10 +2033,13 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
- if (!CallInfo.second.getNode())
+ if (!CallInfo.second.getNode()) {
+ DEBUG(dbgs() << "Created tailcall: "; DAG.getRoot().dump());
// It's a tailcall, return the chain (which is the DAG root).
return DAG.getRoot();
+ }
+ DEBUG(dbgs() << "Created libcall: "; CallInfo.first.dump());
return CallInfo.first;
}
@@ -2285,9 +2325,10 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
EVT DestVT,
const SDLoc &dl) {
// TODO: Should any fast-math-flags be set for the created nodes?
-
+ DEBUG(dbgs() << "Legalizing INT_TO_FP\n");
if (Op0.getValueType() == MVT::i32 && TLI.isTypeLegal(MVT::f64)) {
- // simple 32-bit [signed|unsigned] integer to float/double expansion
+ DEBUG(dbgs() << "32-bit [signed|unsigned] integer to float/double "
+ "expansion\n");
// Get the stack frame index of a 8 byte buffer.
SDValue StackSlot = DAG.CreateStackTemporary(MVT::f64);
@@ -2352,6 +2393,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
// and in all alternate rounding modes.
// TODO: Generalize this for use with other types.
if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f64) {
+ DEBUG(dbgs() << "Converting unsigned i64 to f64\n");
SDValue TwoP52 =
DAG.getConstant(UINT64_C(0x4330000000000000), dl, MVT::i64);
SDValue TwoP84PlusTwoP52 =
@@ -2372,9 +2414,9 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0,
return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub);
}
- // Implementation of unsigned i64 to f32.
// TODO: Generalize this for use with other types.
if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) {
+ DEBUG(dbgs() << "Converting unsigned i64 to f32\n");
// For unsigned conversions, convert them to signed conversions using the
// algorithm from the x86_64 __floatundidf in compiler_rt.
if (!isSigned) {
@@ -2498,7 +2540,7 @@ SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT,
unsigned OpToUse = 0;
// Scan for the appropriate larger type to use.
- while (1) {
+ while (true) {
NewInTy = (MVT::SimpleValueType)(NewInTy.getSimpleVT().SimpleTy+1);
assert(NewInTy.isInteger() && "Ran out of possibilities!");
@@ -2539,7 +2581,7 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT,
unsigned OpToUse = 0;
// Scan for the appropriate larger type to use.
- while (1) {
+ while (true) {
NewOutTy = (MVT::SimpleValueType)(NewOutTy.getSimpleVT().SimpleTy+1);
assert(NewOutTy.isInteger() && "Ran out of possibilities!");
@@ -2559,7 +2601,6 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT,
// Otherwise, try a larger type.
}
-
// Okay, we found the operation and type to use.
SDValue Operation = DAG.getNode(OpToUse, dl, NewOutTy, LegalOp);
@@ -2745,7 +2786,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
return DAG.getNode(ISD::CTLZ, dl, Op.getValueType(), Op);
case ISD::CTLZ: {
EVT VT = Op.getValueType();
- unsigned len = VT.getSizeInBits();
+ unsigned Len = VT.getSizeInBits();
if (TLI.isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
EVT SetCCVT = getSetCCResultType(VT);
@@ -2753,7 +2794,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
SDValue Zero = DAG.getConstant(0, dl, VT);
SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
return DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
- DAG.getConstant(len, dl, VT), CTLZ);
+ DAG.getConstant(Len, dl, VT), CTLZ);
}
// for now, we do this:
@@ -2766,7 +2807,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
//
// Ref: "Hacker's Delight" by Henry Warren
EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
- for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {
+ for (unsigned i = 0; (1U << i) <= (Len / 2); ++i) {
SDValue Tmp3 = DAG.getConstant(1ULL << i, dl, ShVT);
Op = DAG.getNode(ISD::OR, dl, VT, Op,
DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3));
@@ -2778,11 +2819,22 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
// This trivially expands to CTTZ.
return DAG.getNode(ISD::CTTZ, dl, Op.getValueType(), Op);
case ISD::CTTZ: {
+ EVT VT = Op.getValueType();
+ unsigned Len = VT.getSizeInBits();
+
+ if (TLI.isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) {
+ EVT SetCCVT = getSetCCResultType(VT);
+ SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
+ return DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
+ DAG.getConstant(Len, dl, VT), CTTZ);
+ }
+
// for now, we use: { return popcount(~x & (x - 1)); }
// unless the target has ctlz but not ctpop, in which case we use:
// { return 32 - nlz(~x & (x-1)); }
// Ref: "Hacker's Delight" by Henry Warren
- EVT VT = Op.getValueType();
SDValue Tmp3 = DAG.getNode(ISD::AND, dl, VT,
DAG.getNOT(dl, Op, VT),
DAG.getNode(ISD::SUB, dl, VT, Op,
@@ -2799,6 +2851,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
}
bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
+ DEBUG(dbgs() << "Trying to expand node\n");
SmallVector<SDValue, 8> Results;
SDLoc dl(Node);
SDValue Tmp1, Tmp2, Tmp3, Tmp4;
@@ -2983,8 +3036,6 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// NOTE: we could fall back on load/store here too for targets without
// SRA. However, it is doubtful that any exist.
EVT ShiftAmountTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
- if (VT.isVector())
- ShiftAmountTy = VT;
unsigned BitsDiff = VT.getScalarSizeInBits() -
ExtraVT.getScalarSizeInBits();
SDValue ShiftCst = DAG.getConstant(BitsDiff, dl, ShiftAmountTy);
@@ -3062,10 +3113,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
case ISD::INSERT_SUBVECTOR:
Results.push_back(ExpandInsertToVectorThroughStack(SDValue(Node, 0)));
break;
- case ISD::CONCAT_VECTORS: {
+ case ISD::CONCAT_VECTORS:
Results.push_back(ExpandVectorBuildThroughStack(Node));
break;
- }
case ISD::SCALAR_TO_VECTOR:
Results.push_back(ExpandSCALAR_TO_VECTOR(Node));
break;
@@ -3083,14 +3133,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
SDValue Op0 = Node->getOperand(0);
SDValue Op1 = Node->getOperand(1);
if (!TLI.isTypeLegal(EltVT)) {
-
EVT NewEltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT);
// BUILD_VECTOR operands are allowed to be wider than the element type.
// But if NewEltVT is smaller that EltVT the BUILD_VECTOR does not accept
// it.
if (NewEltVT.bitsLT(EltVT)) {
-
// Convert shuffle node.
// If original node was v4i64 and the new EltVT is i32,
// cast operands to v8i32 and re-build the mask.
@@ -3261,6 +3309,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
break;
case ISD::FP_TO_FP16:
+ DEBUG(dbgs() << "Legalizing FP_TO_FP16\n");
if (!TLI.useSoftFloat() && TM.Options.UnsafeFPMath) {
SDValue Op = Node->getOperand(0);
MVT SVT = Op.getSimpleValueType();
@@ -3457,7 +3506,6 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
// Sub:
// Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
- //
SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,
@@ -3666,10 +3714,15 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Tmp2.getOperand(0), Tmp2.getOperand(1),
Node->getOperand(2));
} else {
- // We test only the i1 bit. Skip the AND if UNDEF.
- Tmp3 = (Tmp2.isUndef()) ? Tmp2 :
- DAG.getNode(ISD::AND, dl, Tmp2.getValueType(), Tmp2,
- DAG.getConstant(1, dl, Tmp2.getValueType()));
+ // We test only the i1 bit. Skip the AND if UNDEF or another AND.
+ if (Tmp2.isUndef() ||
+ (Tmp2.getOpcode() == ISD::AND &&
+ isa<ConstantSDNode>(Tmp2.getOperand(1)) &&
+ dyn_cast<ConstantSDNode>(Tmp2.getOperand(1))->getZExtValue() == 1))
+ Tmp3 = Tmp2;
+ else
+ Tmp3 = DAG.getNode(ISD::AND, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getConstant(1, dl, Tmp2.getValueType()));
Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1,
DAG.getCondCode(ISD::SETNE), Tmp3,
DAG.getConstant(0, dl, Tmp3.getValueType()),
@@ -3865,17 +3918,20 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
// Replace the original node with the legalized result.
- if (Results.empty())
+ if (Results.empty()) {
+ DEBUG(dbgs() << "Cannot expand node\n");
return false;
+ }
+ DEBUG(dbgs() << "Succesfully expanded node\n");
ReplaceNode(Node, Results.data());
return true;
}
void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
+ DEBUG(dbgs() << "Trying to convert node to libcall\n");
SmallVector<SDValue, 8> Results;
SDLoc dl(Node);
- SDValue Tmp1, Tmp2, Tmp3, Tmp4;
unsigned Opc = Node->getOpcode();
switch (Opc) {
case ISD::ATOMIC_FENCE: {
@@ -4057,6 +4113,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::REM_PPCF128));
break;
case ISD::FMA:
+ case ISD::STRICT_FMA:
Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64,
RTLIB::FMA_F80, RTLIB::FMA_F128,
RTLIB::FMA_PPCF128));
@@ -4126,8 +4183,11 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
}
// Replace the original node with the legalized result.
- if (!Results.empty())
+ if (!Results.empty()) {
+ DEBUG(dbgs() << "Successfully converted node to libcall\n");
ReplaceNode(Node, Results.data());
+ } else
+ DEBUG(dbgs() << "Could not convert node to libcall\n");
}
// Determine the vector type to use in place of an original scalar element when
@@ -4141,6 +4201,7 @@ static MVT getPromotedVectorElementType(const TargetLowering &TLI,
}
void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
+ DEBUG(dbgs() << "Trying to promote node\n");
SmallVector<SDValue, 8> Results;
MVT OVT = Node->getSimpleValueType(0);
if (Node->getOpcode() == ISD::UINT_TO_FP ||
@@ -4369,7 +4430,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
case ISD::FREM:
case ISD::FMINNUM:
case ISD::FMAXNUM:
- case ISD::FPOW: {
+ case ISD::FPOW:
Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2,
@@ -4377,8 +4438,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
Tmp3, DAG.getIntPtrConstant(0, dl)));
break;
- }
- case ISD::FMA: {
+ case ISD::FMA:
Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1));
Tmp3 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(2));
@@ -4387,7 +4447,6 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2, Tmp3),
DAG.getIntPtrConstant(0, dl)));
break;
- }
case ISD::FCOPYSIGN:
case ISD::FPOWI: {
Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
@@ -4419,13 +4478,12 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
case ISD::FLOG10:
case ISD::FABS:
case ISD::FEXP:
- case ISD::FEXP2: {
+ case ISD::FEXP2:
Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0));
Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT,
Tmp2, DAG.getIntPtrConstant(0, dl)));
break;
- }
case ISD::BUILD_VECTOR: {
MVT EltVT = OVT.getVectorElementType();
MVT NewEltVT = NVT.getVectorElementType();
@@ -4579,8 +4637,11 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
}
// Replace the original node with the legalized result.
- if (!Results.empty())
+ if (!Results.empty()) {
+ DEBUG(dbgs() << "Successfully promoted node\n");
ReplaceNode(Node, Results.data());
+ } else
+ DEBUG(dbgs() << "Could not promote node\n");
}
/// This is the entry point for the file.
@@ -4602,7 +4663,7 @@ void SelectionDAG::Legalize() {
// nodes with their original operands intact. Legalization can produce
// new nodes which may themselves need to be legalized. Iterate until all
// nodes have been legalized.
- for (;;) {
+ while (true) {
bool AnyLegalized = false;
for (auto NI = allnodes_end(); NI != allnodes_begin();) {
--NI;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 75fec7bd1d485..29f0bb475b083 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -40,8 +40,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
SDValue Res = SDValue();
// See if the target wants to custom expand this node.
- if (CustomLowerNode(N, N->getValueType(ResNo), true))
+ if (CustomLowerNode(N, N->getValueType(ResNo), true)) {
+ DEBUG(dbgs() << "Node has been custom expanded, done\n");
return;
+ }
switch (N->getOpcode()) {
default:
@@ -568,10 +570,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) {
SDValue DAGTypeLegalizer::PromoteIntRes_VSELECT(SDNode *N) {
SDValue Mask = N->getOperand(0);
- EVT OpTy = N->getOperand(1).getValueType();
- // Promote all the way up to the canonical SetCC type.
- Mask = PromoteTargetBoolean(Mask, OpTy);
SDValue LHS = GetPromotedInteger(N->getOperand(1));
SDValue RHS = GetPromotedInteger(N->getOperand(2));
return DAG.getNode(ISD::VSELECT, SDLoc(N),
@@ -773,7 +772,30 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) {
SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo) {
if (ResNo == 1)
return PromoteIntRes_Overflow(N);
- llvm_unreachable("Not implemented");
+
+ // We need to sign-extend the operands so the carry value computed by the
+ // wide operation will be equivalent to the carry value computed by the
+ // narrow operation.
+ // An ADDCARRY can generate carry only if any of the operands has its
+ // most significant bit set. Sign extension propagates the most significant
+ // bit into the higher bits which means the extra bit that the narrow
+ // addition would need (i.e. the carry) will be propagated through the higher
+ // bits of the wide addition.
+ // A SUBCARRY can generate borrow only if LHS < RHS and this property will be
+ // preserved by sign extension.
+ SDValue LHS = SExtPromotedInteger(N->getOperand(0));
+ SDValue RHS = SExtPromotedInteger(N->getOperand(1));
+
+ EVT ValueVTs[] = {LHS.getValueType(), N->getValueType(1)};
+
+ // Do the arithmetic in the wide type.
+ SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N), DAG.getVTList(ValueVTs),
+ LHS, RHS, N->getOperand(2));
+
+ // Update the users of the original carry/borrow value.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+
+ return SDValue(Res.getNode(), 0);
}
SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
@@ -885,8 +907,10 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
DEBUG(dbgs() << "Promote integer operand: "; N->dump(&DAG); dbgs() << "\n");
SDValue Res = SDValue();
- if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
+ if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) {
+ DEBUG(dbgs() << "Node has been custom lowered, done\n");
return false;
+ }
switch (N->getOpcode()) {
default:
@@ -1206,24 +1230,23 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N,
// When the data operand has illegal type, we should legalize the data
// operand first. The mask will be promoted/splitted/widened according to
// the data operand type.
- if (TLI.isTypeLegal(DataVT))
+ if (TLI.isTypeLegal(DataVT)) {
Mask = PromoteTargetBoolean(Mask, DataVT);
- else {
- if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger)
- return PromoteIntOp_MSTORE(N, 3);
-
- else if (getTypeAction(DataVT) == TargetLowering::TypeWidenVector)
- return WidenVecOp_MSTORE(N, 3);
-
- else {
- assert (getTypeAction(DataVT) == TargetLowering::TypeSplitVector);
- return SplitVecOp_MSTORE(N, 3);
- }
+ // Update in place.
+ SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
+ NewOps[2] = Mask;
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
}
+
+ if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger)
+ return PromoteIntOp_MSTORE(N, 3);
+ if (getTypeAction(DataVT) == TargetLowering::TypeWidenVector)
+ return WidenVecOp_MSTORE(N, 3);
+ assert (getTypeAction(DataVT) == TargetLowering::TypeSplitVector);
+ return SplitVecOp_MSTORE(N, 3);
} else { // Data operand
assert(OpNo == 3 && "Unexpected operand for promotion");
DataOp = GetPromotedInteger(DataOp);
- Mask = PromoteTargetBoolean(Mask, DataOp.getValueType());
TruncateStore = true;
}
@@ -1250,6 +1273,9 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N,
// The Mask
EVT DataVT = N->getValueType(0);
NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
+ } else if (OpNo == 4) {
+ // Need to sign extend the index since the bits will likely be used.
+ NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo));
} else
NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo));
@@ -1270,6 +1296,9 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N,
// The Mask
EVT DataVT = N->getValue().getValueType();
NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
+ } else if (OpNo == 4) {
+ // Need to sign extend the index since the bits will likely be used.
+ NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo));
} else
NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo));
return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
@@ -3224,8 +3253,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
// Increment the pointer to the other half.
unsigned IncrementSize = NVT.getSizeInBits()/8;
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
Hi = DAG.getTruncStore(
Ch, dl, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), NEVT,
MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo);
@@ -3260,8 +3288,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
MMOFlags, AAInfo);
// Increment the pointer to the other half.
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
// Store the lowest ExcessBits bits in the second half.
Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr,
N->getPointerInfo().getWithOffset(IncrementSize),
@@ -3462,7 +3489,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {
EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
assert(NOutVT.isVector() && "This type must be promoted to a vector type");
- EVT InElemTy = OutVT.getVectorElementType();
EVT OutElemTy = NOutVT.getVectorElementType();
unsigned NumElem = N->getOperand(0).getValueType().getVectorNumElements();
@@ -3471,15 +3497,36 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {
assert(NumElem * NumOperands == NumOutElem &&
"Unexpected number of elements");
+ // If the input type is legal and we can promote it to a legal type with the
+ // same element size, go ahead do that to create a new concat.
+ if (getTypeAction(N->getOperand(0).getValueType()) ==
+ TargetLowering::TypeLegal) {
+ EVT InPromotedTy = EVT::getVectorVT(*DAG.getContext(), OutElemTy, NumElem);
+ if (TLI.isTypeLegal(InPromotedTy)) {
+ SmallVector<SDValue, 8> Ops(NumOperands);
+ for (unsigned i = 0; i < NumOperands; ++i) {
+ Ops[i] = DAG.getNode(ISD::ANY_EXTEND, dl, InPromotedTy,
+ N->getOperand(i));
+ }
+ return DAG.getNode(ISD::CONCAT_VECTORS, dl, NOutVT, Ops);
+ }
+ }
+
// Take the elements from the first vector.
SmallVector<SDValue, 8> Ops(NumOutElem);
for (unsigned i = 0; i < NumOperands; ++i) {
SDValue Op = N->getOperand(i);
+ if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteInteger)
+ Op = GetPromotedInteger(Op);
+ EVT SclrTy = Op.getValueType().getVectorElementType();
+ assert(NumElem == Op.getValueType().getVectorNumElements() &&
+ "Unexpected number of elements");
+
for (unsigned j = 0; j < NumElem; ++j) {
SDValue Ext = DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, InElemTy, Op,
+ ISD::EXTRACT_VECTOR_ELT, dl, SclrTy, Op,
DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
- Ops[i * NumElem + j] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext);
+ Ops[i * NumElem + j] = DAG.getAnyExtOrTrunc(Ext, dl, OutElemTy);
}
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 001eed9fb8f62..b60d7bca498ad 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -14,7 +14,9 @@
//===----------------------------------------------------------------------===//
#include "LegalizeTypes.h"
+#include "SDNodeDbgValue.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Support/CommandLine.h"
@@ -222,15 +224,21 @@ bool DAGTypeLegalizer::run() {
assert(N->getNodeId() == ReadyToProcess &&
"Node should be ready if on worklist!");
- if (IgnoreNodeResults(N))
+ DEBUG(dbgs() << "Legalizing node: "; N->dump());
+ if (IgnoreNodeResults(N)) {
+ DEBUG(dbgs() << "Ignoring node results\n");
goto ScanOperands;
+ }
// Scan the values produced by the node, checking to see if any result
// types are illegal.
for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) {
EVT ResultVT = N->getValueType(i);
+ DEBUG(dbgs() << "Analyzing result type: " <<
+ ResultVT.getEVTString() << "\n");
switch (getTypeAction(ResultVT)) {
case TargetLowering::TypeLegal:
+ DEBUG(dbgs() << "Legal result type\n");
break;
// The following calls must take care of *all* of the node's results,
// not just the illegal result they were passed (this includes results
@@ -287,9 +295,12 @@ ScanOperands:
if (IgnoreNodeResults(N->getOperand(i).getNode()))
continue;
- EVT OpVT = N->getOperand(i).getValueType();
+ const auto Op = N->getOperand(i);
+ DEBUG(dbgs() << "Analyzing operand: "; Op.dump());
+ EVT OpVT = Op.getValueType();
switch (getTypeAction(OpVT)) {
case TargetLowering::TypeLegal:
+ DEBUG(dbgs() << "Legal operand\n");
continue;
// The following calls must either replace all of the node's results
// using ReplaceValueWith, and return "false"; or update the node's
@@ -832,6 +843,18 @@ void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo,
AnalyzeNewValue(Lo);
AnalyzeNewValue(Hi);
+ // Transfer debug values. Don't invalidate the source debug value until it's
+ // been transferred to the high and low bits.
+ if (DAG.getDataLayout().isBigEndian()) {
+ DAG.transferDbgValues(Op, Hi, 0, Hi.getValueSizeInBits(), false);
+ DAG.transferDbgValues(Op, Lo, Hi.getValueSizeInBits(),
+ Lo.getValueSizeInBits());
+ } else {
+ DAG.transferDbgValues(Op, Lo, 0, Lo.getValueSizeInBits(), false);
+ DAG.transferDbgValues(Op, Hi, Lo.getValueSizeInBits(),
+ Hi.getValueSizeInBits());
+ }
+
// Remember that this is the result of the node.
std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op];
assert(!Entry.first.getNode() && "Node already expanded");
@@ -1002,8 +1025,13 @@ bool DAGTypeLegalizer::CustomWidenLowerNode(SDNode *N, EVT VT) {
// Update the widening map.
assert(Results.size() == N->getNumValues() &&
"Custom lowering returned the wrong number of results!");
- for (unsigned i = 0, e = Results.size(); i != e; ++i)
- SetWidenedVector(SDValue(N, i), Results[i]);
+ for (unsigned i = 0, e = Results.size(); i != e; ++i) {
+ // If this is a chain output just replace it.
+ if (Results[i].getValueType() == MVT::Other)
+ ReplaceValueWith(SDValue(N, i), Results[i]);
+ else
+ SetWidenedVector(SDValue(N, i), Results[i]);
+ }
return true;
}
@@ -1117,23 +1145,6 @@ SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT ValVT) {
return DAG.getNode(ExtendCode, dl, BoolVT, Bool);
}
-/// Widen the given target boolean to a target boolean of the given type.
-/// The boolean vector is widened and then promoted to match the target boolean
-/// type of the given ValVT.
-SDValue DAGTypeLegalizer::WidenTargetBoolean(SDValue Bool, EVT ValVT,
- bool WithZeroes) {
- SDLoc dl(Bool);
- EVT BoolVT = Bool.getValueType();
-
- assert(ValVT.getVectorNumElements() > BoolVT.getVectorNumElements() &&
- TLI.isTypeLegal(ValVT) &&
- "Unexpected types in WidenTargetBoolean");
- EVT WideVT = EVT::getVectorVT(*DAG.getContext(), BoolVT.getScalarType(),
- ValVT.getVectorNumElements());
- Bool = ModifyToType(Bool, WideVT, WithZeroes);
- return PromoteTargetBoolean(Bool, ValVT);
-}
-
/// Return the lower LoVT bits of Op in Lo and the upper HiVT bits in Hi.
void DAGTypeLegalizer::SplitInteger(SDValue Op,
EVT LoVT, EVT HiVT,
@@ -1142,9 +1153,14 @@ void DAGTypeLegalizer::SplitInteger(SDValue Op,
assert(LoVT.getSizeInBits() + HiVT.getSizeInBits() ==
Op.getValueSizeInBits() && "Invalid integer splitting!");
Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Op);
+ unsigned ReqShiftAmountInBits =
+ Log2_32_Ceil(Op.getValueType().getSizeInBits());
+ MVT ShiftAmountTy =
+ TLI.getScalarShiftAmountTy(DAG.getDataLayout(), Op.getValueType());
+ if (ReqShiftAmountInBits > ShiftAmountTy.getSizeInBits())
+ ShiftAmountTy = MVT::getIntegerVT(NextPowerOf2(ReqShiftAmountInBits));
Hi = DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op,
- DAG.getConstant(LoVT.getSizeInBits(), dl,
- TLI.getPointerTy(DAG.getDataLayout())));
+ DAG.getConstant(LoVT.getSizeInBits(), dl, ShiftAmountTy));
Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi);
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index c46d1b04804c9..64cb80e0d8538 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -18,9 +18,9 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetLowering.h"
namespace llvm {
@@ -89,7 +89,8 @@ private:
/// Pretend all of this node's results are legal.
bool IgnoreNodeResults(SDNode *N) const {
- return N->getOpcode() == ISD::TargetConstant;
+ return N->getOpcode() == ISD::TargetConstant ||
+ N->getOpcode() == ISD::Register;
}
/// For integer nodes that are below legal width, this map indicates what
@@ -182,10 +183,6 @@ private:
SDValue PromoteTargetBoolean(SDValue Bool, EVT ValVT);
- /// Modify Bit Vector to match SetCC result type of ValVT.
- /// The bit vector is widened with zeroes when WithZeroes is true.
- SDValue WidenTargetBoolean(SDValue Bool, EVT ValVT, bool WithZeroes = false);
-
void ReplaceValueWith(SDValue From, SDValue To);
void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT,
@@ -400,18 +397,22 @@ private:
/// Given an operand Op of Float type, returns the integer if the Op is not
/// supported in target HW and converted to the integer.
/// The integer contains exactly the same bits as Op - only the type changed.
- /// For example, if Op is an f32 which was softened to an i32, then this method
- /// returns an i32, the bits of which coincide with those of Op.
+ /// For example, if Op is an f32 which was softened to an i32, then this
+ /// method returns an i32, the bits of which coincide with those of Op.
/// If the Op can be efficiently supported in target HW or the operand must
/// stay in a register, the Op is not converted to an integer.
/// In that case, the given op is returned.
SDValue GetSoftenedFloat(SDValue Op) {
- SDValue &SoftenedOp = SoftenedFloats[Op];
- if (!SoftenedOp.getNode() &&
- isSimpleLegalType(Op.getValueType()))
+ auto Iter = SoftenedFloats.find(Op);
+ if (Iter == SoftenedFloats.end()) {
+ assert(isSimpleLegalType(Op.getValueType()) &&
+ "Operand wasn't converted to integer?");
return Op;
+ }
+
+ SDValue &SoftenedOp = Iter->second;
+ assert(SoftenedOp.getNode() && "Unconverted op in SoftenedFloats?");
RemapValue(SoftenedOp);
- assert(SoftenedOp.getNode() && "Operand wasn't converted to integer?");
return SoftenedOp;
}
void SetSoftenedFloat(SDValue Op, SDValue Result);
@@ -618,7 +619,6 @@ private:
SDValue ScalarizeVecRes_SETCC(SDNode *N);
SDValue ScalarizeVecRes_UNDEF(SDNode *N);
SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N);
- SDValue ScalarizeVecRes_VSETCC(SDNode *N);
// Vector Operand Scalarization: <1 x ty> -> ty.
bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
@@ -727,7 +727,6 @@ private:
SDValue WidenVecRes_SETCC(SDNode* N);
SDValue WidenVecRes_UNDEF(SDNode *N);
SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N);
- SDValue WidenVecRes_VSETCC(SDNode* N);
SDValue WidenVecRes_Ternary(SDNode *N);
SDValue WidenVecRes_Binary(SDNode *N);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index f3306151d864b..993465ae9dc21 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -484,8 +484,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(), Alignment,
St->getMemOperand()->getFlags(), AAInfo);
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
Hi = DAG.getStore(Chain, dl, Hi, Ptr,
St->getPointerInfo().getWithOffset(IncrementSize),
MinAlign(Alignment, IncrementSize),
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 9355dbe77f94e..74970ab5792c0 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -1,4 +1,4 @@
-//===-- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ---===//
+//===- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ----===//
//
// The LLVM Compiler Infrastructure
//
@@ -27,15 +27,34 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/Target/TargetLowering.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <utility>
+
using namespace llvm;
namespace {
+
class VectorLegalizer {
SelectionDAG& DAG;
const TargetLowering &TLI;
- bool Changed; // Keep track of whether anything changed
+ bool Changed = false; // Keep track of whether anything changed
/// For nodes that are of legal width, and that have more than one use, this
/// map indicates what regularized operand to use. This allows us to avoid
@@ -128,12 +147,15 @@ class VectorLegalizer {
SDValue PromoteFP_TO_INT(SDValue Op, bool isSigned);
public:
+ VectorLegalizer(SelectionDAG& dag) :
+ DAG(dag), TLI(dag.getTargetLoweringInfo()) {}
+
/// \brief Begin legalizer the vector operations in the DAG.
bool Run();
- VectorLegalizer(SelectionDAG& dag) :
- DAG(dag), TLI(dag.getTargetLoweringInfo()), Changed(false) {}
};
+} // end anonymous namespace
+
bool VectorLegalizer::Run() {
// Before we start legalizing vector nodes, check if there are any vectors.
bool HasVectors = false;
@@ -475,10 +497,10 @@ SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op, bool isSigned) {
"Can't promote a vector with multiple results!");
EVT VT = Op.getValueType();
- EVT NewVT;
+ EVT NewVT = VT;
unsigned NewOpc;
- while (1) {
- NewVT = VT.widenIntegerVectorElementType(*DAG.getContext());
+ while (true) {
+ NewVT = NewVT.widenIntegerVectorElementType(*DAG.getContext());
assert(NewVT.isSimple() && "Promoting to a non-simple vector type!");
if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewVT)) {
NewOpc = ISD::FP_TO_SINT;
@@ -490,12 +512,19 @@ SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op, bool isSigned) {
}
}
- SDLoc loc(Op);
- SDValue promoted = DAG.getNode(NewOpc, SDLoc(Op), NewVT, Op.getOperand(0));
- return DAG.getNode(ISD::TRUNCATE, SDLoc(Op), VT, promoted);
+ SDLoc dl(Op);
+ SDValue Promoted = DAG.getNode(NewOpc, dl, NewVT, Op.getOperand(0));
+
+ // Assert that the converted value fits in the original type. If it doesn't
+ // (eg: because the value being converted is too big), then the result of the
+ // original operation was undefined anyway, so the assert is still correct.
+ Promoted = DAG.getNode(Op->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext
+ : ISD::AssertSext,
+ dl, NewVT, Promoted,
+ DAG.getValueType(VT.getScalarType()));
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted);
}
-
SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
@@ -503,7 +532,6 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
EVT SrcEltVT = SrcVT.getScalarType();
unsigned NumElem = SrcVT.getVectorNumElements();
-
SDValue NewChain;
SDValue Value;
if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) {
@@ -534,7 +562,6 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
unsigned Offset = 0;
unsigned RemainingBytes = SrcVT.getStoreSize();
SmallVector<SDValue, 8> LoadVals;
-
while (RemainingBytes > 0) {
SDValue ScalarLoad;
unsigned LoadBytes = WideBytes;
@@ -560,9 +587,8 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
RemainingBytes -= LoadBytes;
Offset += LoadBytes;
- BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR,
- DAG.getConstant(LoadBytes, dl,
- BasePTR.getValueType()));
+
+ BasePTR = DAG.getObjectPtrOffset(dl, BasePTR, LoadBytes);
LoadVals.push_back(ScalarLoad.getValue(0));
LoadChains.push_back(ScalarLoad.getValue(1));
@@ -1117,8 +1143,6 @@ SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) {
return DAG.getBuildVector(VT, dl, Ops);
}
-}
-
bool SelectionDAG::LegalizeVectors() {
return VectorLegalizer(*this).Run();
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 6aa3270883f08..8f2320f52a0f3 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -243,7 +243,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) {
// For instance, this happens on AArch64: v1i1 is illegal but v1i{8,16,32}
// are widened to v8i8, v4i16, and v2i32, which is legal, because v1i64 is
// legal and was not scalarized.
- // See the similar logic in ScalarizeVecRes_VSETCC
+ // See the similar logic in ScalarizeVecRes_SETCC
if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
Op = GetScalarizedVector(Op);
} else {
@@ -307,7 +307,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) {
SDLoc DL(N);
// The vselect result and true/value operands needs scalarizing, but it's
// not a given that the Cond does. For instance, in AVX512 v1i1 is legal.
- // See the similar logic in ScalarizeVecRes_VSETCC
+ // See the similar logic in ScalarizeVecRes_SETCC
if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
Cond = GetScalarizedVector(Cond);
} else {
@@ -380,21 +380,6 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT_CC(SDNode *N) {
N->getOperand(4));
}
-SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) {
- assert(N->getValueType(0).isVector() ==
- N->getOperand(0).getValueType().isVector() &&
- "Scalar/Vector type mismatch");
-
- if (N->getValueType(0).isVector()) return ScalarizeVecRes_VSETCC(N);
-
- SDValue LHS = GetScalarizedVector(N->getOperand(0));
- SDValue RHS = GetScalarizedVector(N->getOperand(1));
- SDLoc DL(N);
-
- // Turn it into a scalar SETCC.
- return DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, N->getOperand(2));
-}
-
SDValue DAGTypeLegalizer::ScalarizeVecRes_UNDEF(SDNode *N) {
return DAG.getUNDEF(N->getValueType(0).getVectorElementType());
}
@@ -408,7 +393,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) {
return GetScalarizedVector(N->getOperand(Op));
}
-SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) {
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) {
assert(N->getValueType(0).isVector() &&
N->getOperand(0).getValueType().isVector() &&
"Operand types must be vectors");
@@ -461,7 +446,8 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
N->dump(&DAG);
dbgs() << "\n";
#endif
- llvm_unreachable("Do not know how to scalarize this operator's operand!");
+ report_fatal_error("Do not know how to scalarize this operator's "
+ "operand!\n");
case ISD::BITCAST:
Res = ScalarizeVecOp_BITCAST(N);
break;
@@ -1068,34 +1054,57 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
if (CustomLowerNode(N, N->getValueType(0), true))
return;
- // Spill the vector to the stack.
+ // Make the vector elements byte-addressable if they aren't already.
EVT VecVT = Vec.getValueType();
EVT EltVT = VecVT.getVectorElementType();
+ if (VecVT.getScalarSizeInBits() < 8) {
+ EltVT = MVT::i8;
+ VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
+ VecVT.getVectorNumElements());
+ Vec = DAG.getNode(ISD::ANY_EXTEND, dl, VecVT, Vec);
+ // Extend the element type to match if needed.
+ if (EltVT.bitsGT(Elt.getValueType()))
+ Elt = DAG.getNode(ISD::ANY_EXTEND, dl, EltVT, Elt);
+ }
+
+ // Spill the vector to the stack.
SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
- SDValue Store =
- DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo());
+ auto &MF = DAG.getMachineFunction();
+ auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo);
// Store the new element. This may be larger than the vector element type,
// so use a truncating store.
SDValue EltPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType);
- Store =
- DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo(), EltVT);
+ Store = DAG.getTruncStore(Store, dl, Elt, EltPtr,
+ MachinePointerInfo::getUnknownStack(MF), EltVT);
+
+ EVT LoVT, HiVT;
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT);
// Load the Lo part from the stack slot.
- Lo =
- DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo());
+ Lo = DAG.getLoad(LoVT, dl, Store, StackPtr, PtrInfo);
// Increment the pointer to the other part.
- unsigned IncrementSize = Lo.getValueSizeInBits() / 8;
+ unsigned IncrementSize = LoVT.getSizeInBits() / 8;
StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
DAG.getConstant(IncrementSize, dl,
StackPtr.getValueType()));
// Load the Hi part from the stack slot.
- Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
+ Hi = DAG.getLoad(HiVT, dl, Store, StackPtr,
+ PtrInfo.getWithOffset(IncrementSize),
MinAlign(Alignment, IncrementSize));
+
+ // If we adjusted the original type, we need to truncate the results.
+ std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+ if (LoVT != Lo.getValueType())
+ Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Lo);
+ if (HiVT != Hi.getValueType())
+ Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi);
}
void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo,
@@ -1130,8 +1139,7 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
LD->getPointerInfo(), LoMemVT, Alignment, MMOFlags, AAInfo);
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset,
LD->getPointerInfo().getWithOffset(IncrementSize), HiMemVT,
Alignment, MMOFlags, AAInfo);
@@ -1283,10 +1291,19 @@ void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDLoc DL(N);
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
- // Split the input.
+ // If the input also splits, handle it directly. Otherwise split it by hand.
SDValue LL, LH, RL, RH;
- std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
- std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
+ if (getTypeAction(N->getOperand(0).getValueType()) ==
+ TargetLowering::TypeSplitVector)
+ GetSplitVector(N->getOperand(0), LL, LH);
+ else
+ std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
+
+ if (getTypeAction(N->getOperand(1).getValueType()) ==
+ TargetLowering::TypeSplitVector)
+ GetSplitVector(N->getOperand(1), RL, RH);
+ else
+ std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
@@ -1753,30 +1770,25 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
// Make the vector elements byte-addressable if they aren't already.
SDLoc dl(N);
EVT EltVT = VecVT.getVectorElementType();
- if (EltVT.getSizeInBits() < 8) {
- SmallVector<SDValue, 4> ElementOps;
- for (unsigned i = 0; i < VecVT.getVectorNumElements(); ++i) {
- ElementOps.push_back(DAG.getAnyExtOrTrunc(
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vec,
- DAG.getConstant(i, dl, MVT::i8)),
- dl, MVT::i8));
- }
-
+ if (VecVT.getScalarSizeInBits() < 8) {
EltVT = MVT::i8;
VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
VecVT.getVectorNumElements());
- Vec = DAG.getBuildVector(VecVT, dl, ElementOps);
+ Vec = DAG.getNode(ISD::ANY_EXTEND, dl, VecVT, Vec);
}
// Store the vector to the stack.
SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
- SDValue Store =
- DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo());
+ auto &MF = DAG.getMachineFunction();
+ auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo);
// Load back the required element.
StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
- return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
- MachinePointerInfo(), EltVT);
+ return DAG.getExtLoad(
+ ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
+ MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), EltVT);
}
SDValue DAGTypeLegalizer::SplitVecOp_ExtVecInRegOp(SDNode *N) {
@@ -1886,9 +1898,6 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
else
std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
- MaskLo = PromoteTargetBoolean(MaskLo, DataLo.getValueType());
- MaskHi = PromoteTargetBoolean(MaskHi, DataHi.getValueType());
-
// if Alignment is equal to the vector size,
// take the half of it for the second part
unsigned SecondHalfAlignment =
@@ -1955,7 +1964,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
else
std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
- SDValue Lo, Hi;
+ SDValue Lo;
MachineMemOperand *MMO = DAG.getMachineFunction().
getMachineMemOperand(N->getPointerInfo(),
MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
@@ -1970,13 +1979,12 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
MachineMemOperand::MOStore, HiMemVT.getStoreSize(),
Alignment, N->getAAInfo(), N->getRanges());
- SDValue OpsHi[] = {Ch, DataHi, MaskHi, Ptr, IndexHi};
- Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
- DL, OpsHi, MMO);
-
- // Build a factor node to remember that this store is independent of the
- // other one.
- return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
+ // The order of the Scatter operation after split is well defined. The "Hi"
+ // part comes after the "Lo". So these two operations should be chained one
+ // after another.
+ SDValue OpsHi[] = {Lo, DataHi, MaskHi, Ptr, IndexHi};
+ return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
+ DL, OpsHi, MMO);
}
SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
@@ -2007,8 +2015,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
AAInfo);
// Increment the pointer to the other half.
- Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
+ Ptr = DAG.getObjectPtrOffset(DL, Ptr, IncrementSize);
if (isTruncating)
Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr,
@@ -2919,30 +2926,16 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
ISD::LoadExtType ExtType = N->getExtensionType();
SDLoc dl(N);
- if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)
- Mask = GetWidenedVector(Mask);
- else {
- EVT BoolVT = getSetCCResultType(WidenVT);
-
- // We can't use ModifyToType() because we should fill the mask with
- // zeroes
- unsigned WidenNumElts = BoolVT.getVectorNumElements();
- unsigned MaskNumElts = MaskVT.getVectorNumElements();
-
- unsigned NumConcat = WidenNumElts / MaskNumElts;
- SmallVector<SDValue, 16> Ops(NumConcat);
- SDValue ZeroVal = DAG.getConstant(0, dl, MaskVT);
- Ops[0] = Mask;
- for (unsigned i = 1; i != NumConcat; ++i)
- Ops[i] = ZeroVal;
-
- Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops);
- }
+ // The mask should be widened as well
+ EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
+ MaskVT.getVectorElementType(),
+ WidenVT.getVectorNumElements());
+ Mask = ModifyToType(Mask, WideMaskVT, true);
SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(),
Mask, Src0, N->getMemoryVT(),
N->getMemOperand(), ExtType,
- N->isExpandingLoad());
+ N->isExpandingLoad());
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -2953,12 +2946,16 @@ SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) {
EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Mask = N->getMask();
+ EVT MaskVT = Mask.getValueType();
SDValue Src0 = GetWidenedVector(N->getValue());
unsigned NumElts = WideVT.getVectorNumElements();
SDLoc dl(N);
// The mask should be widened as well
- Mask = WidenTargetBoolean(Mask, WideVT, true);
+ EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
+ MaskVT.getVectorElementType(),
+ WideVT.getVectorNumElements());
+ Mask = ModifyToType(Mask, WideMaskVT, true);
// Widen the Index operand
SDValue Index = N->getIndex();
@@ -3032,7 +3029,7 @@ SDValue DAGTypeLegalizer::convertMask(SDValue InMask, EVT MaskVT,
// Make a new Mask node, with a legal result VT.
SmallVector<SDValue, 4> Ops;
- for (unsigned i = 0; i < InMask->getNumOperands(); ++i)
+ for (unsigned i = 0, e = InMask->getNumOperands(); i < e; ++i)
Ops.push_back(InMask->getOperand(i));
SDValue Mask = DAG.getNode(InMask->getOpcode(), SDLoc(InMask), MaskVT, Ops);
@@ -3065,12 +3062,9 @@ SDValue DAGTypeLegalizer::convertMask(SDValue InMask, EVT MaskVT,
} else if (CurrMaskNumEls < ToMaskVT.getVectorNumElements()) {
unsigned NumSubVecs = (ToMaskVT.getVectorNumElements() / CurrMaskNumEls);
EVT SubVT = Mask->getValueType(0);
- SmallVector<SDValue, 16> SubConcatOps(NumSubVecs);
- SubConcatOps[0] = Mask;
- for (unsigned i = 1; i < NumSubVecs; ++i)
- SubConcatOps[i] = DAG.getUNDEF(SubVT);
- Mask =
- DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Mask), ToMaskVT, SubConcatOps);
+ SmallVector<SDValue, 16> SubOps(NumSubVecs, DAG.getUNDEF(SubVT));
+ SubOps[0] = Mask;
+ Mask = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Mask), ToMaskVT, SubOps);
}
assert((Mask->getValueType(0) == ToMaskVT) &&
@@ -3105,7 +3099,8 @@ SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) {
// If this is a splitted VSELECT that was previously already handled, do
// nothing.
- if (Cond->getValueType(0).getScalarSizeInBits() != 1)
+ EVT CondVT = Cond->getValueType(0);
+ if (CondVT.getScalarSizeInBits() != 1)
return SDValue();
EVT VSelVT = N->getValueType(0);
@@ -3129,6 +3124,14 @@ SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) {
EVT SetCCResVT = getSetCCResultType(SetCCOpVT);
if (SetCCResVT.getScalarSizeInBits() == 1)
return SDValue();
+ } else if (CondVT.getScalarType() == MVT::i1) {
+ // If there is support for an i1 vector mask (or only scalar i1 conditions),
+ // don't touch.
+ while (TLI.getTypeAction(Ctx, CondVT) != TargetLowering::TypeLegal)
+ CondVT = TLI.getTypeToTransformTo(Ctx, CondVT);
+
+ if (CondVT.getScalarType() == MVT::i1)
+ return SDValue();
}
// Get the VT and operands for VSELECT, and widen if needed.
@@ -3236,19 +3239,6 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) {
N->getOperand(1), InOp1, InOp2, N->getOperand(4));
}
-SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) {
- assert(N->getValueType(0).isVector() ==
- N->getOperand(0).getValueType().isVector() &&
- "Scalar/Vector type mismatch");
- if (N->getValueType(0).isVector()) return WidenVecRes_VSETCC(N);
-
- EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
- SDValue InOp1 = GetWidenedVector(N->getOperand(0));
- SDValue InOp2 = GetWidenedVector(N->getOperand(1));
- return DAG.getNode(ISD::SETCC, SDLoc(N), WidenVT,
- InOp1, InOp2, N->getOperand(2));
-}
-
SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
return DAG.getUNDEF(WidenVT);
@@ -3279,7 +3269,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) {
return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, NewMask);
}
-SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) {
+SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) {
assert(N->getValueType(0).isVector() &&
N->getOperand(0).getValueType().isVector() &&
"Operands must be vectors");
@@ -3556,6 +3546,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
}
SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 3 && "Can widen only data operand of mstore");
MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
SDValue Mask = MST->getMask();
EVT MaskVT = Mask.getValueType();
@@ -3564,25 +3555,13 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
SDValue WideVal = GetWidenedVector(StVal);
SDLoc dl(N);
- if (OpNo == 2 || getTypeAction(MaskVT) == TargetLowering::TypeWidenVector)
- Mask = GetWidenedVector(Mask);
- else {
- // The mask should be widened as well.
- EVT BoolVT = getSetCCResultType(WideVal.getValueType());
- // We can't use ModifyToType() because we should fill the mask with
- // zeroes.
- unsigned WidenNumElts = BoolVT.getVectorNumElements();
- unsigned MaskNumElts = MaskVT.getVectorNumElements();
-
- unsigned NumConcat = WidenNumElts / MaskNumElts;
- SmallVector<SDValue, 16> Ops(NumConcat);
- SDValue ZeroVal = DAG.getConstant(0, dl, MaskVT);
- Ops[0] = Mask;
- for (unsigned i = 1; i != NumConcat; ++i)
- Ops[i] = ZeroVal;
+ // The mask should be widened as well.
+ EVT WideVT = WideVal.getValueType();
+ EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
+ MaskVT.getVectorElementType(),
+ WideVT.getVectorNumElements());
+ Mask = ModifyToType(Mask, WideMaskVT, true);
- Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops);
- }
assert(Mask.getValueType().getVectorNumElements() ==
WideVal.getValueType().getVectorNumElements() &&
"Mask and data vectors should have the same number of elements");
@@ -3596,15 +3575,18 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) {
MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
SDValue DataOp = MSC->getValue();
SDValue Mask = MSC->getMask();
+ EVT MaskVT = Mask.getValueType();
// Widen the value.
SDValue WideVal = GetWidenedVector(DataOp);
EVT WideVT = WideVal.getValueType();
- unsigned NumElts = WideVal.getValueType().getVectorNumElements();
+ unsigned NumElts = WideVT.getVectorNumElements();
SDLoc dl(N);
// The mask should be widened as well.
- Mask = WidenTargetBoolean(Mask, WideVT, true);
+ EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
+ MaskVT.getVectorElementType(), NumElts);
+ Mask = ModifyToType(Mask, WideMaskVT, true);
// Widen index.
SDValue Index = MSC->getIndex();
@@ -3806,8 +3788,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
while (LdWidth > 0) {
unsigned Increment = NewVTWidth / 8;
Offset += Increment;
- BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
- DAG.getConstant(Increment, dl, BasePtr.getValueType()));
+ BasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Increment);
SDValue L;
if (LdWidth < NewVTWidth) {
@@ -3839,7 +3820,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
}
LdOps.push_back(L);
-
+ LdOp = L;
LdWidth -= NewVTWidth;
}
@@ -3929,10 +3910,7 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
LdChain.push_back(Ops[0].getValue(1));
unsigned i = 0, Offset = Increment;
for (i=1; i < NumElts; ++i, Offset += Increment) {
- SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
- BasePtr,
- DAG.getConstant(Offset, dl,
- BasePtr.getValueType()));
+ SDValue NewBasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Offset);
Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr,
LD->getPointerInfo().getWithOffset(Offset), LdEltVT,
Align, MMOFlags, AAInfo);
@@ -3987,9 +3965,8 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
StWidth -= NewVTWidth;
Offset += Increment;
Idx += NumVTElts;
- BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
- DAG.getConstant(Increment, dl,
- BasePtr.getValueType()));
+
+ BasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Increment);
} while (StWidth != 0 && StWidth >= NewVTWidth);
} else {
// Cast the vector to the scalar type we can store.
@@ -4008,9 +3985,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
MinAlign(Align, Offset), MMOFlags, AAInfo));
StWidth -= NewVTWidth;
Offset += Increment;
- BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
- DAG.getConstant(Increment, dl,
- BasePtr.getValueType()));
+ BasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Increment);
} while (StWidth != 0 && StWidth >= NewVTWidth);
// Restore index back to be relative to the original widen element type.
Idx = Idx * NewVTWidth / ValEltWidth;
@@ -4053,10 +4028,7 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain,
MMOFlags, AAInfo));
unsigned Offset = Increment;
for (unsigned i=1; i < NumElts; ++i, Offset += Increment) {
- SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
- BasePtr,
- DAG.getConstant(Offset, dl,
- BasePtr.getValueType()));
+ SDValue NewBasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Offset);
SDValue EOp = DAG.getNode(
ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index a21b4c7332540..379f0dcef513d 100644
--- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -22,12 +22,12 @@
#include "llvm/CodeGen/ResourcePriorityQueue.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
diff --git a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
index 237d541b4cb97..cf92907a8b5f9 100644
--- a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
+++ b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -20,32 +20,31 @@
namespace llvm {
-class MDNode;
+class DIVariable;
+class DIExpression;
class SDNode;
class Value;
-/// SDDbgValue - Holds the information from a dbg_value node through SDISel.
+/// Holds the information from a dbg_value node through SDISel.
/// We do not use SDValue here to avoid including its header.
-
class SDDbgValue {
public:
enum DbgValueKind {
- SDNODE = 0, // value is the result of an expression
- CONST = 1, // value is a constant
- FRAMEIX = 2 // value is contents of a stack location
+ SDNODE = 0, ///< Value is the result of an expression.
+ CONST = 1, ///< Value is a constant.
+ FRAMEIX = 2 ///< Value is contents of a stack location.
};
private:
union {
struct {
- SDNode *Node; // valid for expressions
- unsigned ResNo; // valid for expressions
+ SDNode *Node; ///< Valid for expressions.
+ unsigned ResNo; ///< Valid for expressions.
} s;
- const Value *Const; // valid for constants
- unsigned FrameIx; // valid for stack objects
+ const Value *Const; ///< Valid for constants.
+ unsigned FrameIx; ///< Valid for stack objects.
} u;
- MDNode *Var;
- MDNode *Expr;
- uint64_t Offset;
+ DIVariable *Var;
+ DIExpression *Expr;
DebugLoc DL;
unsigned Order;
enum DbgValueKind kind;
@@ -53,71 +52,65 @@ private:
bool Invalid = false;
public:
- // Constructor for non-constants.
- SDDbgValue(MDNode *Var, MDNode *Expr, SDNode *N, unsigned R, bool indir,
- uint64_t off, DebugLoc dl, unsigned O)
- : Var(Var), Expr(Expr), Offset(off), DL(std::move(dl)), Order(O),
- IsIndirect(indir) {
+ /// Constructor for non-constants.
+ SDDbgValue(DIVariable *Var, DIExpression *Expr, SDNode *N, unsigned R,
+ bool indir, DebugLoc dl, unsigned O)
+ : Var(Var), Expr(Expr), DL(std::move(dl)), Order(O), IsIndirect(indir) {
kind = SDNODE;
u.s.Node = N;
u.s.ResNo = R;
}
- // Constructor for constants.
- SDDbgValue(MDNode *Var, MDNode *Expr, const Value *C, uint64_t off,
- DebugLoc dl, unsigned O)
- : Var(Var), Expr(Expr), Offset(off), DL(std::move(dl)), Order(O),
- IsIndirect(false) {
+ /// Constructor for constants.
+ SDDbgValue(DIVariable *Var, DIExpression *Expr, const Value *C, DebugLoc dl,
+ unsigned O)
+ : Var(Var), Expr(Expr), DL(std::move(dl)), Order(O), IsIndirect(false) {
kind = CONST;
u.Const = C;
}
- // Constructor for frame indices.
- SDDbgValue(MDNode *Var, MDNode *Expr, unsigned FI, uint64_t off, DebugLoc dl,
+ /// Constructor for frame indices.
+ SDDbgValue(DIVariable *Var, DIExpression *Expr, unsigned FI, DebugLoc dl,
unsigned O)
- : Var(Var), Expr(Expr), Offset(off), DL(std::move(dl)), Order(O),
- IsIndirect(false) {
+ : Var(Var), Expr(Expr), DL(std::move(dl)), Order(O), IsIndirect(false) {
kind = FRAMEIX;
u.FrameIx = FI;
}
- // Returns the kind.
+ /// Returns the kind.
DbgValueKind getKind() const { return kind; }
- // Returns the MDNode pointer for the variable.
- MDNode *getVariable() const { return Var; }
+ /// Returns the DIVariable pointer for the variable.
+ DIVariable *getVariable() const { return Var; }
- // Returns the MDNode pointer for the expression.
- MDNode *getExpression() const { return Expr; }
+ /// Returns the DIExpression pointer for the expression.
+ DIExpression *getExpression() const { return Expr; }
- // Returns the SDNode* for a register ref
+ /// Returns the SDNode* for a register ref
SDNode *getSDNode() const { assert (kind==SDNODE); return u.s.Node; }
- // Returns the ResNo for a register ref
+ /// Returns the ResNo for a register ref
unsigned getResNo() const { assert (kind==SDNODE); return u.s.ResNo; }
- // Returns the Value* for a constant
+ /// Returns the Value* for a constant
const Value *getConst() const { assert (kind==CONST); return u.Const; }
- // Returns the FrameIx for a stack object
+ /// Returns the FrameIx for a stack object
unsigned getFrameIx() const { assert (kind==FRAMEIX); return u.FrameIx; }
- // Returns whether this is an indirect value.
+ /// Returns whether this is an indirect value.
bool isIndirect() const { return IsIndirect; }
- // Returns the offset.
- uint64_t getOffset() const { return Offset; }
-
- // Returns the DebugLoc.
+ /// Returns the DebugLoc.
DebugLoc getDebugLoc() const { return DL; }
- // Returns the SDNodeOrder. This is the order of the preceding node in the
- // input.
+ /// Returns the SDNodeOrder. This is the order of the preceding node in the
+ /// input.
unsigned getOrder() const { return Order; }
- // setIsInvalidated / isInvalidated - Setter / getter of the "Invalidated"
- // property. A SDDbgValue is invalid if the SDNode that produces the value is
- // deleted.
+ /// setIsInvalidated / isInvalidated - Setter / getter of the "Invalidated"
+ /// property. A SDDbgValue is invalid if the SDNode that produces the value is
+ /// deleted.
void setIsInvalidated() { Invalid = true; }
bool isInvalidated() const { return Invalid; }
};
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 1379940932772..698e14453d1d7 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -18,13 +18,13 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
#define DEBUG_TYPE "pre-RA-sched"
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 70b1fa77a0991..49f304c8cc869 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -1,4 +1,4 @@
-//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
+//===- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler ------===//
//
// The LLVM Compiler Infrastructure
//
@@ -16,23 +16,47 @@
//===----------------------------------------------------------------------===//
#include "ScheduleDAGSDNodes.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/IR/DataLayout.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/InlineAsm.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
-#include <climits>
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
+#include <iterator>
+#include <limits>
+#include <memory>
+#include <utility>
+#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "pre-RA-sched"
@@ -46,6 +70,7 @@ static RegisterScheduler
burrListDAGScheduler("list-burr",
"Bottom-up register reduction list scheduling",
createBURRListDAGScheduler);
+
static RegisterScheduler
sourceListDAGScheduler("source",
"Similar to list-burr but schedules in source "
@@ -105,6 +130,7 @@ static cl::opt<unsigned> AvgIPC(
cl::desc("Average inst/cycle whan no target itinerary exists."));
namespace {
+
//===----------------------------------------------------------------------===//
/// ScheduleDAGRRList - The actual register reduction list scheduler
/// implementation. This supports both top-down and bottom-up scheduling.
@@ -112,7 +138,6 @@ namespace {
class ScheduleDAGRRList : public ScheduleDAGSDNodes {
private:
/// NeedLatency - True if the scheduler will make use of latency information.
- ///
bool NeedLatency;
/// AvailableQueue - The priority queue to use for the available SUnits.
@@ -122,13 +147,13 @@ private:
/// been issued, but their results are not ready yet (due to the latency of
/// the operation). Once the operands becomes available, the instruction is
/// added to the AvailableQueue.
- std::vector<SUnit*> PendingQueue;
+ std::vector<SUnit *> PendingQueue;
/// HazardRec - The hazard recognizer to use.
ScheduleHazardRecognizer *HazardRec;
/// CurCycle - The current scheduler state corresponds to this cycle.
- unsigned CurCycle;
+ unsigned CurCycle = 0;
/// MinAvailableCycle - Cycle of the soonest available instruction.
unsigned MinAvailableCycle;
@@ -147,7 +172,9 @@ private:
// Collect interferences between physical register use/defs.
// Each interference is an SUnit and set of physical registers.
SmallVector<SUnit*, 4> Interferences;
- typedef DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMapT;
+
+ using LRegsMapT = DenseMap<SUnit *, SmallVector<unsigned, 4>>;
+
LRegsMapT LRegsMap;
/// Topo - A topological ordering for SUnits which permits fast IsReachable
@@ -163,9 +190,8 @@ public:
SchedulingPriorityQueue *availqueue,
CodeGenOpt::Level OptLevel)
: ScheduleDAGSDNodes(mf),
- NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0),
+ NeedLatency(needlatency), AvailableQueue(availqueue),
Topo(SUnits, nullptr) {
-
const TargetSubtargetInfo &STI = mf.getSubtarget();
if (DisableSchedCycles || !NeedLatency)
HazardRec = new ScheduleHazardRecognizer();
@@ -267,6 +293,7 @@ private:
return !NeedLatency;
}
};
+
} // end anonymous namespace
/// GetCostForDef - Looks up the register class and cost for a given definition.
@@ -319,13 +346,13 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
/// Schedule - Schedule the DAG using list scheduling.
void ScheduleDAGRRList::Schedule() {
- DEBUG(dbgs()
- << "********** List Scheduling BB#" << BB->getNumber()
- << " '" << BB->getName() << "' **********\n");
+ DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB)
+ << " '" << BB->getName() << "' **********\n");
CurCycle = 0;
IssueCount = 0;
- MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX;
+ MinAvailableCycle =
+ DisableSchedCycles ? 0 : std::numeric_limits<unsigned>::max();
NumLiveRegs = 0;
// Allocate slots for each physical register, plus one for a special register
// to track the virtual resource of a calling sequence.
@@ -409,7 +436,7 @@ static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
unsigned NestLevel,
const TargetInstrInfo *TII) {
SDNode *N = Outer;
- for (;;) {
+ while (true) {
if (N == Inner)
return true;
// For a TokenFactor, examine each operand. There may be multiple ways
@@ -456,7 +483,7 @@ static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
static SDNode *
FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
const TargetInstrInfo *TII) {
- for (;;) {
+ while (true) {
// For a TokenFactor, examine each operand. There may be multiple ways
// to get to the CALLSEQ_BEGIN, but we need to find the path with the
// most nesting in order to ensure that we find the corresponding match.
@@ -550,6 +577,7 @@ void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
unsigned NestLevel = 0;
unsigned MaxNest = 0;
SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII);
+ assert(N && "Must find call sequence start");
SUnit *Def = &SUnits[N->getNodeId()];
CallSeqEndForStart[Def] = SU;
@@ -571,7 +599,7 @@ void ScheduleDAGRRList::ReleasePending() {
// If the available queue is empty, it is safe to reset MinAvailableCycle.
if (AvailableQueue->empty())
- MinAvailableCycle = UINT_MAX;
+ MinAvailableCycle = std::numeric_limits<unsigned>::max();
// Check to see if any of the pending instructions are ready to issue. If
// so, add them to the available queue.
@@ -791,7 +819,8 @@ void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
AvailableQueue->remove(PredSU);
}
- assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
+ assert(PredSU->NumSuccsLeft < std::numeric_limits<unsigned>::max() &&
+ "NumSuccsLeft will overflow!");
++PredSU->NumSuccsLeft;
}
@@ -821,9 +850,13 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
SUNode = SUNode->getGluedNode()) {
if (SUNode->isMachineOpcode() &&
SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) {
+ SUnit *SeqEnd = CallSeqEndForStart[SU];
+ assert(SeqEnd && "Call sequence start/end must be known");
+ assert(!LiveRegDefs[CallResource]);
+ assert(!LiveRegGens[CallResource]);
++NumLiveRegs;
LiveRegDefs[CallResource] = SU;
- LiveRegGens[CallResource] = CallSeqEndForStart[SU];
+ LiveRegGens[CallResource] = SeqEnd;
}
}
@@ -835,6 +868,8 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
if (SUNode->isMachineOpcode() &&
SUNode->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ assert(LiveRegDefs[CallResource]);
+ assert(LiveRegGens[CallResource]);
--NumLiveRegs;
LiveRegDefs[CallResource] = nullptr;
LiveRegGens[CallResource] = nullptr;
@@ -891,7 +926,7 @@ void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
if (LookAhead == 0)
return;
- std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead);
+ std::vector<SUnit *>::const_iterator I = (Sequence.end() - LookAhead);
unsigned HazardCycle = (*I)->getHeight();
for (auto E = Sequence.end(); I != E; ++I) {
SUnit *SU = *I;
@@ -1319,8 +1354,7 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
// If we're in the middle of scheduling a call, don't begin scheduling
// another call. Also, don't allow any physical registers to be live across
// the call.
- if ((Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) ||
- (Node->getMachineOpcode() == TII->getCallFrameSetupOpcode())) {
+ if (Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) {
// Check the special calling-sequence resource.
unsigned CallResource = TRI->getNumRegs();
if (LiveRegDefs[CallResource]) {
@@ -1390,27 +1424,32 @@ void ScheduleDAGRRList::releaseInterferences(unsigned Reg) {
/// (3) No Interferences: may unschedule to break register interferences.
SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
SUnit *CurSU = AvailableQueue->empty() ? nullptr : AvailableQueue->pop();
- while (CurSU) {
- SmallVector<unsigned, 4> LRegs;
- if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
- break;
- DEBUG(dbgs() << " Interfering reg " <<
- (LRegs[0] == TRI->getNumRegs() ? "CallResource"
- : TRI->getName(LRegs[0]))
- << " SU #" << CurSU->NodeNum << '\n');
- std::pair<LRegsMapT::iterator, bool> LRegsPair =
- LRegsMap.insert(std::make_pair(CurSU, LRegs));
- if (LRegsPair.second) {
- CurSU->isPending = true; // This SU is not in AvailableQueue right now.
- Interferences.push_back(CurSU);
- }
- else {
- assert(CurSU->isPending && "Interferences are pending");
- // Update the interference with current live regs.
- LRegsPair.first->second = LRegs;
+ auto FindAvailableNode = [&]() {
+ while (CurSU) {
+ SmallVector<unsigned, 4> LRegs;
+ if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
+ break;
+ DEBUG(dbgs() << " Interfering reg ";
+ if (LRegs[0] == TRI->getNumRegs())
+ dbgs() << "CallResource";
+ else
+ dbgs() << printReg(LRegs[0], TRI);
+ dbgs() << " SU #" << CurSU->NodeNum << '\n');
+ std::pair<LRegsMapT::iterator, bool> LRegsPair =
+ LRegsMap.insert(std::make_pair(CurSU, LRegs));
+ if (LRegsPair.second) {
+ CurSU->isPending = true; // This SU is not in AvailableQueue right now.
+ Interferences.push_back(CurSU);
+ }
+ else {
+ assert(CurSU->isPending && "Interferences are pending");
+ // Update the interference with current live regs.
+ LRegsPair.first->second = LRegs;
+ }
+ CurSU = AvailableQueue->pop();
}
- CurSU = AvailableQueue->pop();
- }
+ };
+ FindAvailableNode();
if (CurSU)
return CurSU;
@@ -1423,7 +1462,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
// Try unscheduling up to the point where it's safe to schedule
// this node.
SUnit *BtSU = nullptr;
- unsigned LiveCycle = UINT_MAX;
+ unsigned LiveCycle = std::numeric_limits<unsigned>::max();
for (unsigned Reg : LRegs) {
if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
BtSU = LiveRegGens[Reg];
@@ -1447,13 +1486,16 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
// If one or more successors has been unscheduled, then the current
// node is no longer available.
- if (!TrySU->isAvailable || !TrySU->NodeQueueId)
+ if (!TrySU->isAvailable || !TrySU->NodeQueueId) {
+ DEBUG(dbgs() << "TrySU not available; choosing node from queue\n");
CurSU = AvailableQueue->pop();
- else {
+ } else {
+ DEBUG(dbgs() << "TrySU available\n");
// Available and in AvailableQueue
AvailableQueue->remove(TrySU);
CurSU = TrySU;
}
+ FindAvailableNode();
// Interferences has been mutated. We must break.
break;
}
@@ -1540,7 +1582,8 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
while (AvailableQueue->empty() && !PendingQueue.empty()) {
// Advance the cycle to free resources. Skip ahead to the next ready SU.
- assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized");
+ assert(MinAvailableCycle < std::numeric_limits<unsigned>::max() &&
+ "MinAvailableCycle uninitialized");
AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
}
}
@@ -1553,17 +1596,11 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
#endif
}
-//===----------------------------------------------------------------------===//
-// RegReductionPriorityQueue Definition
-//===----------------------------------------------------------------------===//
-//
-// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
-// to reduce register pressure.
-//
namespace {
+
class RegReductionPQBase;
-struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> {
+struct queue_sort {
bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
};
@@ -1571,6 +1608,7 @@ struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> {
template<class SF>
struct reverse_sort : public queue_sort {
SF &SortFunc;
+
reverse_sort(SF &sf) : SortFunc(sf) {}
bool operator()(SUnit* left, SUnit* right) const {
@@ -1590,6 +1628,7 @@ struct bu_ls_rr_sort : public queue_sort {
};
RegReductionPQBase *SPQ;
+
bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
bool operator()(SUnit* left, SUnit* right) const;
@@ -1603,8 +1642,8 @@ struct src_ls_rr_sort : public queue_sort {
};
RegReductionPQBase *SPQ;
- src_ls_rr_sort(RegReductionPQBase *spq)
- : SPQ(spq) {}
+
+ src_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
bool operator()(SUnit* left, SUnit* right) const;
};
@@ -1617,8 +1656,8 @@ struct hybrid_ls_rr_sort : public queue_sort {
};
RegReductionPQBase *SPQ;
- hybrid_ls_rr_sort(RegReductionPQBase *spq)
- : SPQ(spq) {}
+
+ hybrid_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
bool isReady(SUnit *SU, unsigned CurCycle) const;
@@ -1634,8 +1673,8 @@ struct ilp_ls_rr_sort : public queue_sort {
};
RegReductionPQBase *SPQ;
- ilp_ls_rr_sort(RegReductionPQBase *spq)
- : SPQ(spq) {}
+
+ ilp_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
bool isReady(SUnit *SU, unsigned CurCycle) const;
@@ -1644,8 +1683,8 @@ struct ilp_ls_rr_sort : public queue_sort {
class RegReductionPQBase : public SchedulingPriorityQueue {
protected:
- std::vector<SUnit*> Queue;
- unsigned CurQueueId;
+ std::vector<SUnit *> Queue;
+ unsigned CurQueueId = 0;
bool TracksRegPressure;
bool SrcOrder;
@@ -1656,13 +1695,12 @@ protected:
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
const TargetLowering *TLI;
- ScheduleDAGRRList *scheduleDAG;
+ ScheduleDAGRRList *scheduleDAG = nullptr;
// SethiUllmanNumbers - The SethiUllman number for each node.
std::vector<unsigned> SethiUllmanNumbers;
/// RegPressure - Tracking current reg pressure per register class.
- ///
std::vector<unsigned> RegPressure;
/// RegLimit - Tracking the number of allocatable registers per register
@@ -1677,9 +1715,8 @@ public:
const TargetInstrInfo *tii,
const TargetRegisterInfo *tri,
const TargetLowering *tli)
- : SchedulingPriorityQueue(hasReadyFilter),
- CurQueueId(0), TracksRegPressure(tracksrp), SrcOrder(srcorder),
- MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(nullptr) {
+ : SchedulingPriorityQueue(hasReadyFilter), TracksRegPressure(tracksrp),
+ SrcOrder(srcorder), MF(mf), TII(tii), TRI(tri), TLI(tli) {
if (TracksRegPressure) {
unsigned NumRC = TRI->getNumRegClasses();
RegLimit.resize(NumRC);
@@ -1730,7 +1767,7 @@ public:
void remove(SUnit *SU) override {
assert(!Queue.empty() && "Queue is empty!");
assert(SU->NodeQueueId != 0 && "Not in queue!");
- std::vector<SUnit *>::iterator I = find(Queue, SU);
+ std::vector<SUnit *>::iterator I = llvm::find(Queue, SU);
if (I != std::prev(Queue.end()))
std::swap(*I, Queue.back());
Queue.pop_back();
@@ -1759,7 +1796,7 @@ protected:
};
template<class SF>
-static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) {
+static SUnit *popFromQueueImpl(std::vector<SUnit *> &Q, SF &Picker) {
std::vector<SUnit *>::iterator Best = Q.begin();
for (auto I = std::next(Q.begin()), E = Q.end(); I != E; ++I)
if (Picker(*Best, *I))
@@ -1772,7 +1809,7 @@ static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) {
}
template<class SF>
-SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) {
+SUnit *popFromQueue(std::vector<SUnit *> &Q, SF &Picker, ScheduleDAG *DAG) {
#ifndef NDEBUG
if (DAG->StressSched) {
reverse_sort<SF> RPicker(Picker);
@@ -1783,6 +1820,13 @@ SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) {
return popFromQueueImpl(Q, Picker);
}
+//===----------------------------------------------------------------------===//
+// RegReductionPriorityQueue Definition
+//===----------------------------------------------------------------------===//
+//
+// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
+// to reduce register pressure.
+//
template<class SF>
class RegReductionPriorityQueue : public RegReductionPQBase {
SF Picker;
@@ -1815,7 +1859,7 @@ public:
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void dump(ScheduleDAG *DAG) const override {
// Emulate pop() without clobbering NodeQueueIds.
- std::vector<SUnit*> DumpQueue = Queue;
+ std::vector<SUnit *> DumpQueue = Queue;
SF DumpPicker = Picker;
while (!DumpQueue.empty()) {
SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
@@ -1826,17 +1870,11 @@ public:
#endif
};
-typedef RegReductionPriorityQueue<bu_ls_rr_sort>
-BURegReductionPriorityQueue;
-
-typedef RegReductionPriorityQueue<src_ls_rr_sort>
-SrcRegReductionPriorityQueue;
+using BURegReductionPriorityQueue = RegReductionPriorityQueue<bu_ls_rr_sort>;
+using SrcRegReductionPriorityQueue = RegReductionPriorityQueue<src_ls_rr_sort>;
+using HybridBURRPriorityQueue = RegReductionPriorityQueue<hybrid_ls_rr_sort>;
+using ILPBURRPriorityQueue = RegReductionPriorityQueue<ilp_ls_rr_sort>;
-typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
-HybridBURRPriorityQueue;
-
-typedef RegReductionPriorityQueue<ilp_ls_rr_sort>
-ILPBURRPriorityQueue;
} // end anonymous namespace
//===----------------------------------------------------------------------===//
@@ -2855,7 +2893,6 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
/// This results in the store being scheduled immediately
/// after N, which shortens the U->N live range, reducing
/// register pressure.
-///
void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
// Visit all the nodes in topological order, working top-down.
for (SUnit &SU : *SUnits) {
@@ -3022,7 +3059,7 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() {
// Public Constructor Functions
//===----------------------------------------------------------------------===//
-llvm::ScheduleDAGSDNodes *
+ScheduleDAGSDNodes *
llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
CodeGenOpt::Level OptLevel) {
const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
@@ -3036,7 +3073,7 @@ llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
return SD;
}
-llvm::ScheduleDAGSDNodes *
+ScheduleDAGSDNodes *
llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
CodeGenOpt::Level OptLevel) {
const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
@@ -3050,7 +3087,7 @@ llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
return SD;
}
-llvm::ScheduleDAGSDNodes *
+ScheduleDAGSDNodes *
llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
CodeGenOpt::Level OptLevel) {
const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
@@ -3066,7 +3103,7 @@ llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
return SD;
}
-llvm::ScheduleDAGSDNodes *
+ScheduleDAGSDNodes *
llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
CodeGenOpt::Level OptLevel) {
const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 3c8526ebb7029..c09b47af26a66 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -23,14 +23,14 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetLowering.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
using namespace llvm;
#define DEBUG_TYPE "pre-RA-sched"
@@ -709,18 +709,17 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
// source order number as N.
MachineBasicBlock *BB = Emitter.getBlock();
MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos();
- ArrayRef<SDDbgValue*> DVs = DAG->GetDbgValues(N);
- for (unsigned i = 0, e = DVs.size(); i != e; ++i) {
- if (DVs[i]->isInvalidated())
+ for (auto DV : DAG->GetDbgValues(N)) {
+ if (DV->isInvalidated())
continue;
- unsigned DVOrder = DVs[i]->getOrder();
+ unsigned DVOrder = DV->getOrder();
if (!Order || DVOrder == Order) {
- MachineInstr *DbgMI = Emitter.EmitDbgValue(DVs[i], VRBaseMap);
+ MachineInstr *DbgMI = Emitter.EmitDbgValue(DV, VRBaseMap);
if (DbgMI) {
- Orders.push_back(std::make_pair(DVOrder, DbgMI));
+ Orders.push_back({DVOrder, DbgMI});
BB->insert(InsertPos, DbgMI);
}
- DVs[i]->setIsInvalidated();
+ DV->setIsInvalidated();
}
}
}
@@ -742,16 +741,17 @@ ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
}
MachineBasicBlock *BB = Emitter.getBlock();
- if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI() ||
+ auto IP = Emitter.getInsertPos();
+ if (IP == BB->begin() || BB->back().isPHI() ||
// Fast-isel may have inserted some instructions, in which case the
// BB->back().isPHI() test will not fire when we want it to.
- std::prev(Emitter.getInsertPos())->isPHI()) {
+ std::prev(IP)->isPHI()) {
// Did not insert any instruction.
- Orders.push_back(std::make_pair(Order, (MachineInstr*)nullptr));
+ Orders.push_back({Order, (MachineInstr *)nullptr});
return;
}
- Orders.push_back(std::make_pair(Order, &*std::prev(Emitter.getInsertPos())));
+ Orders.push_back({Order, &*std::prev(IP)});
ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order);
}
@@ -856,8 +856,13 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
MachineBasicBlock::iterator BBBegin = BB->getFirstNonPHI();
// Sort the source order instructions and use the order to insert debug
- // values.
- std::sort(Orders.begin(), Orders.end(), less_first());
+ // values. Use stable_sort so that DBG_VALUEs are inserted in the same order
+ // regardless of the host's implementation fo std::sort.
+ std::stable_sort(Orders.begin(), Orders.end(), less_first());
+ std::stable_sort(DAG->DbgBegin(), DAG->DbgEnd(),
+ [](const SDDbgValue *LHS, const SDDbgValue *RHS) {
+ return LHS->getOrder() < RHS->getOrder();
+ });
SDDbgInfo::DbgIterator DI = DAG->DbgBegin();
SDDbgInfo::DbgIterator DE = DAG->DbgEnd();
@@ -869,10 +874,12 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
// Insert all SDDbgValue's whose order(s) are before "Order".
if (!MI)
continue;
- for (; DI != DE &&
- (*DI)->getOrder() >= LastOrder && (*DI)->getOrder() < Order; ++DI) {
+ for (; DI != DE; ++DI) {
+ if ((*DI)->getOrder() < LastOrder || (*DI)->getOrder() >= Order)
+ break;
if ((*DI)->isInvalidated())
continue;
+
MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap);
if (DbgMI) {
if (!LastOrder)
@@ -891,11 +898,13 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
// Add trailing DbgValue's before the terminator. FIXME: May want to add
// some of them before one or more conditional branches?
SmallVector<MachineInstr*, 8> DbgMIs;
- while (DI != DE) {
- if (!(*DI)->isInvalidated())
- if (MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap))
- DbgMIs.push_back(DbgMI);
- ++DI;
+ for (; DI != DE; ++DI) {
+ if ((*DI)->isInvalidated())
+ continue;
+ assert((*DI)->getOrder() >= LastOrder &&
+ "emitting DBG_VALUE out of order");
+ if (MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap))
+ DbgMIs.push_back(DbgMI);
}
MachineBasicBlock *InsertBB = Emitter.getBlock();
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
index 631cb34717c4f..07b46b9183ab7 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -25,13 +25,13 @@
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <climits>
using namespace llvm;
@@ -93,9 +93,8 @@ private:
/// Schedule - Schedule the DAG using list scheduling.
void ScheduleDAGVLIW::Schedule() {
- DEBUG(dbgs()
- << "********** List Scheduling BB#" << BB->getNumber()
- << " '" << BB->getName() << "' **********\n");
+ DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB)
+ << " '" << BB->getName() << "' **********\n");
// Build the scheduling graph.
BuildSchedGraph(AA);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 16f425dc7969a..12a21e74079ec 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -37,6 +37,9 @@
#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
@@ -59,11 +62,8 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Mutex.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
@@ -87,6 +87,15 @@ static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) {
void SelectionDAG::DAGUpdateListener::NodeDeleted(SDNode*, SDNode*) {}
void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {}
+#define DEBUG_TYPE "selectiondag"
+
+static void NewSDValueDbgMsg(SDValue V, StringRef Msg, SelectionDAG *G) {
+ DEBUG(
+ dbgs() << Msg;
+ V.getNode()->dump(G);
+ );
+}
+
//===----------------------------------------------------------------------===//
// ConstantFPSDNode Class
//===----------------------------------------------------------------------===//
@@ -116,8 +125,7 @@ bool ConstantFPSDNode::isValueValidForType(EVT VT,
// ISD Namespace
//===----------------------------------------------------------------------===//
-bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal,
- bool AllowShrink) {
+bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal) {
auto *BV = dyn_cast<BuildVectorSDNode>(N);
if (!BV)
return false;
@@ -126,10 +134,9 @@ bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal,
unsigned SplatBitSize;
bool HasUndefs;
unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits();
- unsigned MinSplatBits = AllowShrink ? 0 : EltSize;
return BV->isConstantSplat(SplatVal, SplatUndef, SplatBitSize, HasUndefs,
- MinSplatBits) &&
- EltSize >= SplatBitSize;
+ EltSize) &&
+ EltSize == SplatBitSize;
}
// FIXME: AllOnes and AllZeros duplicate a lot of code. Could these be
@@ -895,12 +902,14 @@ SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
}
void SelectionDAG::init(MachineFunction &NewMF,
- OptimizationRemarkEmitter &NewORE) {
+ OptimizationRemarkEmitter &NewORE,
+ Pass *PassPtr) {
MF = &NewMF;
+ SDAGISelPass = PassPtr;
ORE = &NewORE;
TLI = getSubtarget().getTargetLowering();
TSI = getSubtarget().getSelectionDAGInfo();
- Context = &MF->getFunction()->getContext();
+ Context = &MF->getFunction().getContext();
}
SelectionDAG::~SelectionDAG() {
@@ -1018,7 +1027,7 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) {
assert(!VT.isVector() &&
"getZeroExtendInReg should use the vector element type instead of "
"the vector type!");
- if (Op.getValueType() == VT) return Op;
+ if (Op.getValueType().getScalarType() == VT) return Op;
unsigned BitWidth = Op.getScalarValueSizeInBits();
APInt Imm = APInt::getLowBitsSet(BitWidth,
VT.getSizeInBits());
@@ -1156,7 +1165,9 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
Ops.insert(Ops.end(), EltParts.begin(), EltParts.end());
- return getNode(ISD::BITCAST, DL, VT, getBuildVector(ViaVecVT, DL, Ops));
+
+ SDValue V = getNode(ISD::BITCAST, DL, VT, getBuildVector(ViaVecVT, DL, Ops));
+ return V;
}
assert(Elt->getBitWidth() == EltVT.getSizeInBits() &&
@@ -1176,11 +1187,13 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
N = newSDNode<ConstantSDNode>(isT, isO, Elt, DL.getDebugLoc(), EltVT);
CSEMap.InsertNode(N, IP);
InsertNode(N);
+ NewSDValueDbgMsg(SDValue(N, 0), "Creating constant: ", this);
}
SDValue Result(N, 0);
if (VT.isVector())
Result = getSplatBuildVector(VT, DL, Result);
+
return Result;
}
@@ -1222,6 +1235,7 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP &V, const SDLoc &DL,
SDValue Result(N, 0);
if (VT.isVector())
Result = getSplatBuildVector(VT, DL, Result);
+ NewSDValueDbgMsg(Result, "Creating fp constant: ", this);
return Result;
}
@@ -1317,7 +1331,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
assert((TargetFlags == 0 || isTarget) &&
"Cannot set target flags on target-independent globals");
if (Alignment == 0)
- Alignment = MF->getFunction()->optForSize()
+ Alignment = MF->getFunction().optForSize()
? getDataLayout().getABITypeAlignment(C->getType())
: getDataLayout().getPrefTypeAlignment(C->getType());
unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
@@ -1471,7 +1485,8 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1,
// Validate that all indices in Mask are within the range of the elements
// input to the shuffle.
int NElts = Mask.size();
- assert(llvm::all_of(Mask, [&](int M) { return M < (NElts * 2); }) &&
+ assert(llvm::all_of(Mask,
+ [&](int M) { return M < (NElts * 2) && M >= -1; }) &&
"Index out of range");
// Copy the mask so we can do any needed cleanup.
@@ -1622,7 +1637,9 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1,
CSEMap.InsertNode(N, IP);
InsertNode(N);
- return SDValue(N, 0);
+ SDValue V = SDValue(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
}
SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) {
@@ -1665,15 +1682,20 @@ SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) {
SDValue SelectionDAG::getEHLabel(const SDLoc &dl, SDValue Root,
MCSymbol *Label) {
+ return getLabelNode(ISD::EH_LABEL, dl, Root, Label);
+}
+
+SDValue SelectionDAG::getLabelNode(unsigned Opcode, const SDLoc &dl,
+ SDValue Root, MCSymbol *Label) {
FoldingSetNodeID ID;
SDValue Ops[] = { Root };
- AddNodeIDNode(ID, ISD::EH_LABEL, getVTList(MVT::Other), Ops);
+ AddNodeIDNode(ID, Opcode, getVTList(MVT::Other), Ops);
ID.AddPointer(Label);
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- auto *N = newSDNode<EHLabelSDNode>(dl.getIROrder(), dl.getDebugLoc(), Label);
+ auto *N = newSDNode<LabelSDNode>(dl.getIROrder(), dl.getDebugLoc(), Label);
createOperands(N, Ops);
CSEMap.InsertNode(N, IP);
@@ -1955,6 +1977,69 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
return SDValue();
}
+/// See if the specified operand can be simplified with the knowledge that only
+/// the bits specified by Mask are used.
+SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &Mask) {
+ switch (V.getOpcode()) {
+ default:
+ break;
+ case ISD::Constant: {
+ const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode());
+ assert(CV && "Const value should be ConstSDNode.");
+ const APInt &CVal = CV->getAPIntValue();
+ APInt NewVal = CVal & Mask;
+ if (NewVal != CVal)
+ return getConstant(NewVal, SDLoc(V), V.getValueType());
+ break;
+ }
+ case ISD::OR:
+ case ISD::XOR:
+ // If the LHS or RHS don't contribute bits to the or, drop them.
+ if (MaskedValueIsZero(V.getOperand(0), Mask))
+ return V.getOperand(1);
+ if (MaskedValueIsZero(V.getOperand(1), Mask))
+ return V.getOperand(0);
+ break;
+ case ISD::SRL:
+ // Only look at single-use SRLs.
+ if (!V.getNode()->hasOneUse())
+ break;
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
+ // See if we can recursively simplify the LHS.
+ unsigned Amt = RHSC->getZExtValue();
+
+ // Watch out for shift count overflow though.
+ if (Amt >= Mask.getBitWidth())
+ break;
+ APInt NewMask = Mask << Amt;
+ if (SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask))
+ return getNode(ISD::SRL, SDLoc(V), V.getValueType(), SimplifyLHS,
+ V.getOperand(1));
+ }
+ break;
+ case ISD::AND: {
+ // X & -1 -> X (ignoring bits which aren't demanded).
+ ConstantSDNode *AndVal = isConstOrConstSplat(V.getOperand(1));
+ if (AndVal && Mask.isSubsetOf(AndVal->getAPIntValue()))
+ return V.getOperand(0);
+ break;
+ }
+ case ISD::ANY_EXTEND: {
+ SDValue Src = V.getOperand(0);
+ unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
+ // Being conservative here - only peek through if we only demand bits in the
+ // non-extended source (even though the extended bits are technically undef).
+ if (Mask.getActiveBits() > SrcBitWidth)
+ break;
+ APInt SrcMask = Mask.trunc(SrcBitWidth);
+ if (SDValue DemandedSrc = GetDemandedBits(Src, SrcMask))
+ return getNode(ISD::ANY_EXTEND, SDLoc(V), V.getValueType(), DemandedSrc);
+ break;
+ }
+ }
+ return SDValue();
+}
+
/// SignBitIsZero - Return true if the sign bit of Op is known to be zero. We
/// use this predicate to simplify operations downstream.
bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
@@ -1972,6 +2057,30 @@ bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask,
return Mask.isSubsetOf(Known.Zero);
}
+/// Helper function that checks to see if a node is a constant or a
+/// build vector of splat constants at least within the demanded elts.
+static ConstantSDNode *isConstOrDemandedConstSplat(SDValue N,
+ const APInt &DemandedElts) {
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
+ return CN;
+ if (N.getOpcode() != ISD::BUILD_VECTOR)
+ return nullptr;
+ EVT VT = N.getValueType();
+ ConstantSDNode *Cst = nullptr;
+ unsigned NumElts = VT.getVectorNumElements();
+ assert(DemandedElts.getBitWidth() == NumElts && "Unexpected vector size");
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (!DemandedElts[i])
+ continue;
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(i));
+ if (!C || (Cst && Cst->getAPIntValue() != C->getAPIntValue()) ||
+ C->getValueType(0) != VT.getScalarType())
+ return nullptr;
+ Cst = C;
+ }
+ return Cst;
+}
+
/// If a SHL/SRA/SRL node has a constant or splat constant shift amount that
/// is less than the element bit-width of the shift node, return it.
static const APInt *getValidShiftAmountConstant(SDValue V) {
@@ -2005,6 +2114,20 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
unsigned BitWidth = Op.getScalarValueSizeInBits();
Known = KnownBits(BitWidth); // Don't know anything.
+
+ if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
+ // We know all of the bits for a constant!
+ Known.One = C->getAPIntValue();
+ Known.Zero = ~Known.One;
+ return;
+ }
+ if (auto *C = dyn_cast<ConstantFPSDNode>(Op)) {
+ // We know all of the bits for a constant fp!
+ Known.One = C->getValueAPF().bitcastToAPInt();
+ Known.Zero = ~Known.One;
+ return;
+ }
+
if (Depth == 6)
return; // Limit search depth.
@@ -2016,11 +2139,6 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
unsigned Opcode = Op.getOpcode();
switch (Opcode) {
- case ISD::Constant:
- // We know all of the bits for a constant!
- Known.One = cast<ConstantSDNode>(Op)->getAPIntValue();
- Known.Zero = ~Known.One;
- break;
case ISD::BUILD_VECTOR:
// Collect the known bits that are shared by every demanded vector element.
assert(NumElts == Op.getValueType().getVectorNumElements() &&
@@ -2045,7 +2163,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
Known.Zero &= Known2.Zero;
// If we don't know any bits, early out.
- if (!Known.One && !Known.Zero)
+ if (Known.isUnknown())
break;
}
break;
@@ -2083,7 +2201,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
Known.Zero &= Known2.Zero;
}
// If we don't know any bits, early out.
- if (!Known.One && !Known.Zero)
+ if (Known.isUnknown())
break;
if (!!DemandedRHS) {
SDValue RHS = Op.getOperand(1);
@@ -2109,11 +2227,45 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
Known.Zero &= Known2.Zero;
}
// If we don't know any bits, early out.
- if (!Known.One && !Known.Zero)
+ if (Known.isUnknown())
break;
}
break;
}
+ case ISD::INSERT_SUBVECTOR: {
+ // If we know the element index, demand any elements from the subvector and
+ // the remainder from the src its inserted into, otherwise demand them all.
+ SDValue Src = Op.getOperand(0);
+ SDValue Sub = Op.getOperand(1);
+ ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+ unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
+ if (SubIdx && SubIdx->getAPIntValue().ule(NumElts - NumSubElts)) {
+ Known.One.setAllBits();
+ Known.Zero.setAllBits();
+ uint64_t Idx = SubIdx->getZExtValue();
+ APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
+ if (!!DemandedSubElts) {
+ computeKnownBits(Sub, Known, DemandedSubElts, Depth + 1);
+ if (Known.isUnknown())
+ break; // early-out.
+ }
+ APInt SubMask = APInt::getBitsSet(NumElts, Idx, Idx + NumSubElts);
+ APInt DemandedSrcElts = DemandedElts & ~SubMask;
+ if (!!DemandedSrcElts) {
+ computeKnownBits(Src, Known2, DemandedSrcElts, Depth + 1);
+ Known.One &= Known2.One;
+ Known.Zero &= Known2.Zero;
+ }
+ } else {
+ computeKnownBits(Sub, Known, Depth + 1);
+ if (Known.isUnknown())
+ break; // early-out.
+ computeKnownBits(Src, Known2, Depth + 1);
+ Known.One &= Known2.One;
+ Known.Zero &= Known2.Zero;
+ }
+ break;
+ }
case ISD::EXTRACT_SUBVECTOR: {
// If we know the element index, just demand that subvector elements,
// otherwise demand them all.
@@ -2132,10 +2284,11 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
}
case ISD::BITCAST: {
SDValue N0 = Op.getOperand(0);
- unsigned SubBitWidth = N0.getScalarValueSizeInBits();
+ EVT SubVT = N0.getValueType();
+ unsigned SubBitWidth = SubVT.getScalarSizeInBits();
- // Ignore bitcasts from floating point.
- if (!N0.getValueType().isInteger())
+ // Ignore bitcasts from unsupported types.
+ if (!(SubVT.isInteger() || SubVT.isFloatingPoint()))
break;
// Fast handling of 'identity' bitcasts.
@@ -2193,7 +2346,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
Known.One &= Known2.One.lshr(Offset).trunc(BitWidth);
Known.Zero &= Known2.Zero.lshr(Offset).trunc(BitWidth);
// If we don't know any bits, early out.
- if (!Known.One && !Known.Zero)
+ if (Known.isUnknown())
break;
}
}
@@ -2264,22 +2417,23 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
break;
}
case ISD::SELECT:
- computeKnownBits(Op.getOperand(2), Known, Depth+1);
+ case ISD::VSELECT:
+ computeKnownBits(Op.getOperand(2), Known, DemandedElts, Depth+1);
// If we don't know any bits, early out.
- if (!Known.One && !Known.Zero)
+ if (Known.isUnknown())
break;
- computeKnownBits(Op.getOperand(1), Known2, Depth+1);
+ computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth+1);
// Only known if known in both the LHS and RHS.
Known.One &= Known2.One;
Known.Zero &= Known2.Zero;
break;
case ISD::SELECT_CC:
- computeKnownBits(Op.getOperand(3), Known, Depth+1);
+ computeKnownBits(Op.getOperand(3), Known, DemandedElts, Depth+1);
// If we don't know any bits, early out.
- if (!Known.One && !Known.Zero)
+ if (Known.isUnknown())
break;
- computeKnownBits(Op.getOperand(2), Known2, Depth+1);
+ computeKnownBits(Op.getOperand(2), Known2, DemandedElts, Depth+1);
// Only known if known in both the LHS and RHS.
Known.One &= Known2.One;
@@ -2308,35 +2462,49 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
case ISD::SHL:
if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
- Known.Zero <<= *ShAmt;
- Known.One <<= *ShAmt;
+ unsigned Shift = ShAmt->getZExtValue();
+ Known.Zero <<= Shift;
+ Known.One <<= Shift;
// Low bits are known zero.
- Known.Zero.setLowBits(ShAmt->getZExtValue());
+ Known.Zero.setLowBits(Shift);
}
break;
case ISD::SRL:
if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
- Known.Zero.lshrInPlace(*ShAmt);
- Known.One.lshrInPlace(*ShAmt);
+ unsigned Shift = ShAmt->getZExtValue();
+ Known.Zero.lshrInPlace(Shift);
+ Known.One.lshrInPlace(Shift);
// High bits are known zero.
- Known.Zero.setHighBits(ShAmt->getZExtValue());
+ Known.Zero.setHighBits(Shift);
+ } else if (auto *BV = dyn_cast<BuildVectorSDNode>(Op.getOperand(1))) {
+ // If the shift amount is a vector of constants see if we can bound
+ // the number of upper zero bits.
+ unsigned ShiftAmountMin = BitWidth;
+ for (unsigned i = 0; i != BV->getNumOperands(); ++i) {
+ if (auto *C = dyn_cast<ConstantSDNode>(BV->getOperand(i))) {
+ const APInt &ShAmt = C->getAPIntValue();
+ if (ShAmt.ult(BitWidth)) {
+ ShiftAmountMin = std::min<unsigned>(ShiftAmountMin,
+ ShAmt.getZExtValue());
+ continue;
+ }
+ }
+ // Don't know anything.
+ ShiftAmountMin = 0;
+ break;
+ }
+
+ Known.Zero.setHighBits(ShiftAmountMin);
}
break;
case ISD::SRA:
if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
- Known.Zero.lshrInPlace(*ShAmt);
- Known.One.lshrInPlace(*ShAmt);
- // If we know the value of the sign bit, then we know it is copied across
- // the high bits by the shift amount.
- APInt SignMask = APInt::getSignMask(BitWidth);
- SignMask.lshrInPlace(*ShAmt); // Adjust to where it is now in the mask.
- if (Known.Zero.intersects(SignMask)) {
- Known.Zero.setHighBits(ShAmt->getZExtValue());// New bits are known zero.
- } else if (Known.One.intersects(SignMask)) {
- Known.One.setHighBits(ShAmt->getZExtValue()); // New bits are known one.
- }
+ unsigned Shift = ShAmt->getZExtValue();
+ // Sign extend known zero/one bit (else is unknown).
+ Known.Zero.ashrInPlace(Shift);
+ Known.One.ashrInPlace(Shift);
}
break;
case ISD::SIGN_EXTEND_INREG: {
@@ -2414,49 +2582,33 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
}
case ISD::ZERO_EXTEND_VECTOR_INREG: {
EVT InVT = Op.getOperand(0).getValueType();
- unsigned InBits = InVT.getScalarSizeInBits();
- Known = Known.trunc(InBits);
- computeKnownBits(Op.getOperand(0), Known,
- DemandedElts.zext(InVT.getVectorNumElements()),
- Depth + 1);
+ APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements());
+ computeKnownBits(Op.getOperand(0), Known, InDemandedElts, Depth + 1);
Known = Known.zext(BitWidth);
- Known.Zero.setBitsFrom(InBits);
+ Known.Zero.setBitsFrom(InVT.getScalarSizeInBits());
break;
}
case ISD::ZERO_EXTEND: {
EVT InVT = Op.getOperand(0).getValueType();
- unsigned InBits = InVT.getScalarSizeInBits();
- Known = Known.trunc(InBits);
computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
Known = Known.zext(BitWidth);
- Known.Zero.setBitsFrom(InBits);
+ Known.Zero.setBitsFrom(InVT.getScalarSizeInBits());
break;
}
// TODO ISD::SIGN_EXTEND_VECTOR_INREG
case ISD::SIGN_EXTEND: {
- EVT InVT = Op.getOperand(0).getValueType();
- unsigned InBits = InVT.getScalarSizeInBits();
-
- Known = Known.trunc(InBits);
computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
-
// If the sign bit is known to be zero or one, then sext will extend
// it to the top bits, else it will just zext.
Known = Known.sext(BitWidth);
break;
}
case ISD::ANY_EXTEND: {
- EVT InVT = Op.getOperand(0).getValueType();
- unsigned InBits = InVT.getScalarSizeInBits();
- Known = Known.trunc(InBits);
computeKnownBits(Op.getOperand(0), Known, Depth+1);
Known = Known.zext(BitWidth);
break;
}
case ISD::TRUNCATE: {
- EVT InVT = Op.getOperand(0).getValueType();
- unsigned InBits = InVT.getScalarSizeInBits();
- Known = Known.zext(InBits);
computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1);
Known = Known.trunc(BitWidth);
break;
@@ -2755,7 +2907,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
computeKnownBits(Op.getOperand(0), Known, DemandedElts,
Depth + 1);
// If we don't know any bits, early out.
- if (!Known.One && !Known.Zero)
+ if (Known.isUnknown())
break;
computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1);
Known.Zero &= Known2.Zero;
@@ -2764,11 +2916,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
}
case ISD::FrameIndex:
case ISD::TargetFrameIndex:
- if (unsigned Align = InferPtrAlignment(Op)) {
- // The low bits are known zero if the pointer is aligned.
- Known.Zero.setLowBits(Log2_32(Align));
- break;
- }
+ TLI->computeKnownBitsForFrameIndex(Op, Known, DemandedElts, *this, Depth);
break;
default:
@@ -2783,7 +2931,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known,
break;
}
- assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?");
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
}
SelectionDAG::OverflowKind SelectionDAG::computeOverflowKind(SDValue N0,
@@ -2873,12 +3021,17 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
unsigned Depth) const {
EVT VT = Op.getValueType();
- assert(VT.isInteger() && "Invalid VT!");
+ assert((VT.isInteger() || VT.isFloatingPoint()) && "Invalid VT!");
unsigned VTBits = VT.getScalarSizeInBits();
unsigned NumElts = DemandedElts.getBitWidth();
unsigned Tmp, Tmp2;
unsigned FirstAnswer = 1;
+ if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
+ const APInt &Val = C->getAPIntValue();
+ return Val.getNumSignBits();
+ }
+
if (Depth == 6)
return 1; // Limit search depth.
@@ -2894,11 +3047,6 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits();
return VTBits-Tmp;
- case ISD::Constant: {
- const APInt &Val = cast<ConstantSDNode>(Op)->getAPIntValue();
- return Val.getNumSignBits();
- }
-
case ISD::BUILD_VECTOR:
Tmp = VTBits;
for (unsigned i = 0, e = Op.getNumOperands(); (i < e) && (Tmp > 1); ++i) {
@@ -2952,32 +3100,63 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
return Tmp;
}
+ case ISD::BITCAST: {
+ SDValue N0 = Op.getOperand(0);
+ EVT SrcVT = N0.getValueType();
+ unsigned SrcBits = SrcVT.getScalarSizeInBits();
+
+ // Ignore bitcasts from unsupported types..
+ if (!(SrcVT.isInteger() || SrcVT.isFloatingPoint()))
+ break;
+
+ // Fast handling of 'identity' bitcasts.
+ if (VTBits == SrcBits)
+ return ComputeNumSignBits(N0, DemandedElts, Depth + 1);
+
+ // Bitcast 'large element' scalar/vector to 'small element' vector.
+ // TODO: Handle cases other than 'sign splat' when we have a use case.
+ // Requires handling of DemandedElts and Endianness.
+ if ((SrcBits % VTBits) == 0) {
+ assert(Op.getValueType().isVector() && "Expected bitcast to vector");
+ Tmp = ComputeNumSignBits(N0, Depth + 1);
+ if (Tmp == SrcBits)
+ return VTBits;
+ }
+ break;
+ }
+
case ISD::SIGN_EXTEND:
- case ISD::SIGN_EXTEND_VECTOR_INREG:
Tmp = VTBits - Op.getOperand(0).getScalarValueSizeInBits();
- return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp;
-
+ return ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1) + Tmp;
case ISD::SIGN_EXTEND_INREG:
// Max of the input and what this extends.
Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getScalarSizeInBits();
Tmp = VTBits-Tmp+1;
-
- Tmp2 = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ Tmp2 = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1);
return std::max(Tmp, Tmp2);
+ case ISD::SIGN_EXTEND_VECTOR_INREG: {
+ SDValue Src = Op.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ APInt DemandedSrcElts = DemandedElts.zext(SrcVT.getVectorNumElements());
+ Tmp = VTBits - SrcVT.getScalarSizeInBits();
+ return ComputeNumSignBits(Src, DemandedSrcElts, Depth+1) + Tmp;
+ }
case ISD::SRA:
Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1);
// SRA X, C -> adds C sign bits.
- if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1))) {
+ if (ConstantSDNode *C =
+ isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts)) {
APInt ShiftVal = C->getAPIntValue();
ShiftVal += Tmp;
Tmp = ShiftVal.uge(VTBits) ? VTBits : ShiftVal.getZExtValue();
}
return Tmp;
case ISD::SHL:
- if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1))) {
+ if (ConstantSDNode *C =
+ isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts)) {
// shl destroys sign bits.
- Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1);
if (C->getAPIntValue().uge(VTBits) || // Bad shift.
C->getAPIntValue().uge(Tmp)) break; // Shifted all sign bits out.
return Tmp - C->getZExtValue();
@@ -2987,9 +3166,9 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
case ISD::OR:
case ISD::XOR: // NOT is handled here.
// Logical binary ops preserve the number of sign bits at the worst.
- Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
+ Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1);
if (Tmp != 1) {
- Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+ Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth+1);
FirstAnswer = std::min(Tmp, Tmp2);
// We computed what we know about the sign bits as our first
// answer. Now proceed to the generic code that uses
@@ -2998,15 +3177,17 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
break;
case ISD::SELECT:
- Tmp = ComputeNumSignBits(Op.getOperand(1), Depth+1);
+ case ISD::VSELECT:
+ Tmp = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth+1);
if (Tmp == 1) return 1; // Early out.
- Tmp2 = ComputeNumSignBits(Op.getOperand(2), Depth+1);
+ Tmp2 = ComputeNumSignBits(Op.getOperand(2), DemandedElts, Depth+1);
return std::min(Tmp, Tmp2);
case ISD::SELECT_CC:
- Tmp = ComputeNumSignBits(Op.getOperand(2), Depth+1);
+ Tmp = ComputeNumSignBits(Op.getOperand(2), DemandedElts, Depth+1);
if (Tmp == 1) return 1; // Early out.
- Tmp2 = ComputeNumSignBits(Op.getOperand(3), Depth+1);
+ Tmp2 = ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth+1);
return std::min(Tmp, Tmp2);
+
case ISD::SMIN:
case ISD::SMAX:
case ISD::UMIN:
@@ -3041,16 +3222,16 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
case ISD::ROTL:
case ISD::ROTR:
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
- unsigned RotAmt = C->getZExtValue() & (VTBits-1);
+ unsigned RotAmt = C->getAPIntValue().urem(VTBits);
// Handle rotate right by N like a rotate left by 32-N.
if (Op.getOpcode() == ISD::ROTR)
- RotAmt = (VTBits-RotAmt) & (VTBits-1);
+ RotAmt = (VTBits - RotAmt) % VTBits;
// If we aren't rotating out all of the known-in sign bits, return the
// number that are left. This handles rotl(sext(x), 1) for example.
Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
- if (Tmp > RotAmt+1) return Tmp-RotAmt;
+ if (Tmp > (RotAmt + 1)) return (Tmp - RotAmt);
}
break;
case ISD::ADD:
@@ -3391,7 +3572,9 @@ static SDValue FoldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
? DAG.getZExtOrTrunc(Op, DL, SVT)
: DAG.getSExtOrTrunc(Op, DL, SVT);
- return DAG.getBuildVector(VT, DL, Elts);
+ SDValue V = DAG.getBuildVector(VT, DL, Elts);
+ NewSDValueDbgMsg(V, "New node fold concat vectors: ", &DAG);
+ return V;
}
/// Gets or creates the specified node.
@@ -3407,7 +3590,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT) {
CSEMap.InsertNode(N, IP);
InsertNode(N);
- return SDValue(N, 0);
+ SDValue V = SDValue(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
@@ -3768,7 +3953,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
InsertNode(N);
- return SDValue(N, 0);
+ SDValue V = SDValue(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
}
static std::pair<APInt, bool> FoldValue(unsigned Opcode, const APInt &C1,
@@ -3906,18 +4093,31 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
assert(BV1->getNumOperands() == BV2->getNumOperands() && "Out of sync!");
EVT SVT = VT.getScalarType();
+ EVT LegalSVT = SVT;
+ if (NewNodesMustHaveLegalTypes && LegalSVT.isInteger()) {
+ LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT);
+ if (LegalSVT.bitsLT(SVT))
+ return SDValue();
+ }
SmallVector<SDValue, 4> Outputs;
for (unsigned I = 0, E = BV1->getNumOperands(); I != E; ++I) {
SDValue V1 = BV1->getOperand(I);
SDValue V2 = BV2->getOperand(I);
- // Avoid BUILD_VECTOR nodes that perform implicit truncation.
- // FIXME: This is valid and could be handled by truncation.
+ if (SVT.isInteger()) {
+ if (V1->getValueType(0).bitsGT(SVT))
+ V1 = getNode(ISD::TRUNCATE, DL, SVT, V1);
+ if (V2->getValueType(0).bitsGT(SVT))
+ V2 = getNode(ISD::TRUNCATE, DL, SVT, V2);
+ }
+
if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT)
return SDValue();
// Fold one vector element.
SDValue ScalarResult = getNode(Opcode, DL, SVT, V1, V2);
+ if (LegalSVT != SVT)
+ ScalarResult = getNode(ISD::SIGN_EXTEND, DL, LegalSVT, ScalarResult);
// Scalar folding only succeeded if the result is a constant or UNDEF.
if (!ScalarResult.isUndef() && ScalarResult.getOpcode() != ISD::Constant &&
@@ -3936,6 +4136,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
return getBuildVector(VT, SDLoc(), Outputs);
}
+// TODO: Merge with FoldConstantArithmetic
SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
const SDLoc &DL, EVT VT,
ArrayRef<SDValue> Ops,
@@ -4027,7 +4228,9 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
ScalarResults.push_back(ScalarResult);
}
- return getBuildVector(VT, DL, ScalarResults);
+ SDValue V = getBuildVector(VT, DL, ScalarResults);
+ NewSDValueDbgMsg(V, "New node fold constant vector: ", this);
+ return V;
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
@@ -4297,6 +4500,15 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), N2);
}
}
+
+ // EXTRACT_VECTOR_ELT of v1iX EXTRACT_SUBVECTOR could be formed
+ // when vector types are scalarized and v1iX is legal.
+ // vextract (v1iX extract_subvector(vNiX, Idx)) -> vextract(vNiX,Idx)
+ if (N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ N1.getValueType().getVectorNumElements() == 1) {
+ return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0),
+ N1.getOperand(1));
+ }
break;
case ISD::EXTRACT_ELEMENT:
assert(N2C && (unsigned)N2C->getZExtValue() < 2 && "Bad EXTRACT_ELEMENT!");
@@ -4518,7 +4730,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
InsertNode(N);
- return SDValue(N, 0);
+ SDValue V = SDValue(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
@@ -4553,8 +4767,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
return V;
// Vector constant folding.
SDValue Ops[] = {N1, N2, N3};
- if (SDValue V = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops))
+ if (SDValue V = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops)) {
+ NewSDValueDbgMsg(V, "New node vector constant folding: ", this);
return V;
+ }
break;
}
case ISD::SELECT:
@@ -4626,7 +4842,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
InsertNode(N);
- return SDValue(N, 0);
+ SDValue V = SDValue(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
@@ -4882,8 +5100,8 @@ static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
// On Darwin, -Os means optimize for size without hurting performance, so
// only really optimize for size when -Oz (MinSize) is used.
if (MF.getTarget().getTargetTriple().isOSDarwin())
- return MF.getFunction()->optForMinSize();
- return MF.getFunction()->optForSize();
+ return MF.getFunction().optForMinSize();
+ return MF.getFunction().optForSize();
}
static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
@@ -5558,21 +5776,15 @@ SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, const SDLoc &dl) {
SDValue SelectionDAG::getMemIntrinsicNode(
unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue> Ops,
- EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align, bool Vol,
- bool ReadMem, bool WriteMem, unsigned Size) {
+ EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align,
+ MachineMemOperand::Flags Flags, unsigned Size) {
if (Align == 0) // Ensure that codegen never sees alignment 0
Align = getEVTAlignment(MemVT);
- MachineFunction &MF = getMachineFunction();
- auto Flags = MachineMemOperand::MONone;
- if (WriteMem)
- Flags |= MachineMemOperand::MOStore;
- if (ReadMem)
- Flags |= MachineMemOperand::MOLoad;
- if (Vol)
- Flags |= MachineMemOperand::MOVolatile;
if (!Size)
Size = MemVT.getStoreSize();
+
+ MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO =
MF.getMachineMemOperand(PtrInfo, Flags, Size, Align);
@@ -5597,6 +5809,8 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl,
if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTList, Ops);
+ ID.AddInteger(getSyntheticNodeSubclassData<MemIntrinsicSDNode>(
+ Opcode, dl.getIROrder(), VTList, MemVT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
@@ -5622,7 +5836,8 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl,
/// MachinePointerInfo record from it. This is particularly useful because the
/// code generator has many cases where it doesn't bother passing in a
/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst".
-static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr,
+static MachinePointerInfo InferPointerInfo(const MachinePointerInfo &Info,
+ SelectionDAG &DAG, SDValue Ptr,
int64_t Offset = 0) {
// If this is FI+Offset, we can model it.
if (const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr))
@@ -5633,7 +5848,7 @@ static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr,
if (Ptr.getOpcode() != ISD::ADD ||
!isa<ConstantSDNode>(Ptr.getOperand(1)) ||
!isa<FrameIndexSDNode>(Ptr.getOperand(0)))
- return MachinePointerInfo();
+ return Info;
int FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
return MachinePointerInfo::getFixedStack(
@@ -5645,14 +5860,15 @@ static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr,
/// MachinePointerInfo record from it. This is particularly useful because the
/// code generator has many cases where it doesn't bother passing in a
/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst".
-static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr,
+static MachinePointerInfo InferPointerInfo(const MachinePointerInfo &Info,
+ SelectionDAG &DAG, SDValue Ptr,
SDValue OffsetOp) {
// If the 'Offset' value isn't a constant, we can't handle this.
if (ConstantSDNode *OffsetNode = dyn_cast<ConstantSDNode>(OffsetOp))
- return InferPointerInfo(DAG, Ptr, OffsetNode->getSExtValue());
+ return InferPointerInfo(Info, DAG, Ptr, OffsetNode->getSExtValue());
if (OffsetOp.isUndef())
- return InferPointerInfo(DAG, Ptr);
- return MachinePointerInfo();
+ return InferPointerInfo(Info, DAG, Ptr);
+ return Info;
}
SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
@@ -5672,7 +5888,7 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
// If we don't have a PtrInfo, infer the trivial frame index case to simplify
// clients.
if (PtrInfo.V.isNull())
- PtrInfo = InferPointerInfo(*this, Ptr, Offset);
+ PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset);
MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO = MF.getMachineMemOperand(
@@ -5791,7 +6007,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val,
assert((MMOFlags & MachineMemOperand::MOLoad) == 0);
if (PtrInfo.V.isNull())
- PtrInfo = InferPointerInfo(*this, Ptr);
+ PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr);
MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO = MF.getMachineMemOperand(
@@ -5841,7 +6057,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val,
assert((MMOFlags & MachineMemOperand::MOLoad) == 0);
if (PtrInfo.V.isNull())
- PtrInfo = InferPointerInfo(*this, Ptr);
+ PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr);
MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO = MF.getMachineMemOperand(
@@ -6118,7 +6334,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
}
InsertNode(N);
- return SDValue(N, 0);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL,
@@ -6171,7 +6389,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
createOperands(N, Ops);
}
InsertNode(N);
- return SDValue(N, 0);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
}
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL,
@@ -6580,14 +6800,16 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
unsigned OrigOpc = Node->getOpcode();
unsigned NewOpc;
bool IsUnary = false;
+ bool IsTernary = false;
switch (OrigOpc) {
- default:
+ default:
llvm_unreachable("mutateStrictFPToFP called with unexpected opcode!");
case ISD::STRICT_FADD: NewOpc = ISD::FADD; break;
case ISD::STRICT_FSUB: NewOpc = ISD::FSUB; break;
case ISD::STRICT_FMUL: NewOpc = ISD::FMUL; break;
case ISD::STRICT_FDIV: NewOpc = ISD::FDIV; break;
case ISD::STRICT_FREM: NewOpc = ISD::FREM; break;
+ case ISD::STRICT_FMA: NewOpc = ISD::FMA; IsTernary = true; break;
case ISD::STRICT_FSQRT: NewOpc = ISD::FSQRT; IsUnary = true; break;
case ISD::STRICT_FPOW: NewOpc = ISD::FPOW; break;
case ISD::STRICT_FPOWI: NewOpc = ISD::FPOWI; break;
@@ -6614,10 +6836,14 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
SDNode *Res = nullptr;
if (IsUnary)
Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1) });
+ else if (IsTernary)
+ Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1),
+ Node->getOperand(2),
+ Node->getOperand(3)});
else
Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1),
Node->getOperand(2) });
-
+
// MorphNodeTo can operate in two ways: if an existing node with the
// specified operands exists, it can just return it. Otherwise, it
// updates the node in place to have the requested operands.
@@ -6630,7 +6856,7 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
RemoveDeadNode(Node);
}
- return Res;
+ return Res;
}
/// getMachineNode - These are used for target selectors to create a new node
@@ -6794,32 +7020,125 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
/// getDbgValue - Creates a SDDbgValue node.
///
/// SDNode
-SDDbgValue *SelectionDAG::getDbgValue(MDNode *Var, MDNode *Expr, SDNode *N,
- unsigned R, bool IsIndirect, uint64_t Off,
+SDDbgValue *SelectionDAG::getDbgValue(DIVariable *Var, DIExpression *Expr,
+ SDNode *N, unsigned R, bool IsIndirect,
const DebugLoc &DL, unsigned O) {
assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
return new (DbgInfo->getAlloc())
- SDDbgValue(Var, Expr, N, R, IsIndirect, Off, DL, O);
+ SDDbgValue(Var, Expr, N, R, IsIndirect, DL, O);
}
/// Constant
-SDDbgValue *SelectionDAG::getConstantDbgValue(MDNode *Var, MDNode *Expr,
- const Value *C, uint64_t Off,
+SDDbgValue *SelectionDAG::getConstantDbgValue(DIVariable *Var,
+ DIExpression *Expr,
+ const Value *C,
const DebugLoc &DL, unsigned O) {
assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
- return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, C, Off, DL, O);
+ return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, C, DL, O);
}
/// FrameIndex
-SDDbgValue *SelectionDAG::getFrameIndexDbgValue(MDNode *Var, MDNode *Expr,
- unsigned FI, uint64_t Off,
+SDDbgValue *SelectionDAG::getFrameIndexDbgValue(DIVariable *Var,
+ DIExpression *Expr, unsigned FI,
const DebugLoc &DL,
unsigned O) {
assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
- return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, FI, Off, DL, O);
+ return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, FI, DL, O);
+}
+
+void SelectionDAG::transferDbgValues(SDValue From, SDValue To,
+ unsigned OffsetInBits, unsigned SizeInBits,
+ bool InvalidateDbg) {
+ SDNode *FromNode = From.getNode();
+ SDNode *ToNode = To.getNode();
+ assert(FromNode && ToNode && "Can't modify dbg values");
+
+ // PR35338
+ // TODO: assert(From != To && "Redundant dbg value transfer");
+ // TODO: assert(FromNode != ToNode && "Intranode dbg value transfer");
+ if (From == To || FromNode == ToNode)
+ return;
+
+ if (!FromNode->getHasDebugValue())
+ return;
+
+ SmallVector<SDDbgValue *, 2> ClonedDVs;
+ for (SDDbgValue *Dbg : GetDbgValues(FromNode)) {
+ if (Dbg->getKind() != SDDbgValue::SDNODE || Dbg->isInvalidated())
+ continue;
+
+ // TODO: assert(!Dbg->isInvalidated() && "Transfer of invalid dbg value");
+
+ // Just transfer the dbg value attached to From.
+ if (Dbg->getResNo() != From.getResNo())
+ continue;
+
+ DIVariable *Var = Dbg->getVariable();
+ auto *Expr = Dbg->getExpression();
+ // If a fragment is requested, update the expression.
+ if (SizeInBits) {
+ // When splitting a larger (e.g., sign-extended) value whose
+ // lower bits are described with an SDDbgValue, do not attempt
+ // to transfer the SDDbgValue to the upper bits.
+ if (auto FI = Expr->getFragmentInfo())
+ if (OffsetInBits + SizeInBits > FI->SizeInBits)
+ continue;
+ auto Fragment = DIExpression::createFragmentExpression(Expr, OffsetInBits,
+ SizeInBits);
+ if (!Fragment)
+ continue;
+ Expr = *Fragment;
+ }
+ // Clone the SDDbgValue and move it to To.
+ SDDbgValue *Clone =
+ getDbgValue(Var, Expr, ToNode, To.getResNo(), Dbg->isIndirect(),
+ Dbg->getDebugLoc(), Dbg->getOrder());
+ ClonedDVs.push_back(Clone);
+
+ if (InvalidateDbg)
+ Dbg->setIsInvalidated();
+ }
+
+ for (SDDbgValue *Dbg : ClonedDVs)
+ AddDbgValue(Dbg, ToNode, false);
+}
+
+void SelectionDAG::salvageDebugInfo(SDNode &N) {
+ if (!N.getHasDebugValue())
+ return;
+ for (auto DV : GetDbgValues(&N)) {
+ if (DV->isInvalidated())
+ continue;
+ switch (N.getOpcode()) {
+ default:
+ break;
+ case ISD::ADD:
+ SDValue N0 = N.getOperand(0);
+ SDValue N1 = N.getOperand(1);
+ if (!isConstantIntBuildVectorOrConstantInt(N0) &&
+ isConstantIntBuildVectorOrConstantInt(N1)) {
+ uint64_t Offset = N.getConstantOperandVal(1);
+ // Rewrite an ADD constant node into a DIExpression. Since we are
+ // performing arithmetic to compute the variable's *value* in the
+ // DIExpression, we need to mark the expression with a
+ // DW_OP_stack_value.
+ auto *DIExpr = DV->getExpression();
+ DIExpr = DIExpression::prepend(DIExpr, DIExpression::NoDeref, Offset,
+ DIExpression::NoDeref,
+ DIExpression::WithStackValue);
+ SDDbgValue *Clone =
+ getDbgValue(DV->getVariable(), DIExpr, N0.getNode(), N0.getResNo(),
+ DV->isIndirect(), DV->getDebugLoc(), DV->getOrder());
+ DV->setIsInvalidated();
+ AddDbgValue(Clone, N0.getNode(), false);
+ DEBUG(dbgs() << "SALVAGE: Rewriting"; N0.getNode()->dumprFull(this);
+ dbgs() << " into " << *DIExpr << '\n');
+ }
+ }
+ }
}
namespace {
@@ -6859,7 +7178,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) {
assert(From != To.getNode() && "Cannot replace uses of with self");
// Preserve Debug Values
- TransferDbgValues(FromN, To);
+ transferDbgValues(FromN, To);
// Iterate over all the existing uses of From. New uses will be added
// to the beginning of the use list, which we avoid visiting.
@@ -6918,7 +7237,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To) {
for (unsigned i = 0, e = From->getNumValues(); i != e; ++i)
if (From->hasAnyUseOfValue(i)) {
assert((i < To->getNumValues()) && "Invalid To location");
- TransferDbgValues(SDValue(From, i), SDValue(To, i));
+ transferDbgValues(SDValue(From, i), SDValue(To, i));
}
// Iterate over just the existing users of From. See the comments in
@@ -6962,7 +7281,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) {
// Preserve Debug Info.
for (unsigned i = 0, e = From->getNumValues(); i != e; ++i)
- TransferDbgValues(SDValue(From, i), *To);
+ transferDbgValues(SDValue(From, i), *To);
// Iterate over just the existing users of From. See the comments in
// the ReplaceAllUsesWith above.
@@ -7009,7 +7328,7 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){
}
// Preserve Debug Info.
- TransferDbgValues(From, To);
+ transferDbgValues(From, To);
// Iterate over just the existing users of From. See the comments in
// the ReplaceAllUsesWith above.
@@ -7087,7 +7406,7 @@ void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From,
if (Num == 1)
return ReplaceAllUsesOfValueWith(*From, *To);
- TransferDbgValues(*From, *To);
+ transferDbgValues(*From, *To);
// Read up all the uses and make records of them. This helps
// processing new uses that are introduced during the
@@ -7236,35 +7555,6 @@ void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) {
DbgInfo->add(DB, SD, isParameter);
}
-/// TransferDbgValues - Transfer SDDbgValues. Called in replace nodes.
-void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) {
- if (From == To || !From.getNode()->getHasDebugValue())
- return;
- SDNode *FromNode = From.getNode();
- SDNode *ToNode = To.getNode();
- ArrayRef<SDDbgValue *> DVs = GetDbgValues(FromNode);
- SmallVector<SDDbgValue *, 2> ClonedDVs;
- for (ArrayRef<SDDbgValue *>::iterator I = DVs.begin(), E = DVs.end();
- I != E; ++I) {
- SDDbgValue *Dbg = *I;
- // Only add Dbgvalues attached to same ResNo.
- if (Dbg->getKind() == SDDbgValue::SDNODE &&
- Dbg->getSDNode() == From.getNode() &&
- Dbg->getResNo() == From.getResNo() && !Dbg->isInvalidated()) {
- assert(FromNode != ToNode &&
- "Should not transfer Debug Values intranode");
- SDDbgValue *Clone =
- getDbgValue(Dbg->getVariable(), Dbg->getExpression(), ToNode,
- To.getResNo(), Dbg->isIndirect(), Dbg->getOffset(),
- Dbg->getDebugLoc(), Dbg->getOrder());
- ClonedDVs.push_back(Clone);
- Dbg->setIsInvalidated();
- }
- }
- for (SDDbgValue *I : ClonedDVs)
- AddDbgValue(I, ToNode, false);
-}
-
SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad,
SDValue NewMemOp) {
assert(isa<MemSDNode>(NewMemOp.getNode()) && "Expected a memop node");
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index 0d69441ebb7f7..544da362be698 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -1,5 +1,4 @@
-//===-- llvm/CodeGen/SelectionDAGAddressAnalysis.cpp ------- DAG Address
-//Analysis ---*- C++ -*-===//
+//==- llvm/CodeGen/SelectionDAGAddressAnalysis.cpp - DAG Address Analysis --==//
//
// The LLVM Compiler Infrastructure
//
@@ -7,15 +6,18 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-//
#include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/Support/Casting.h"
+#include <cstdint>
-namespace llvm {
+using namespace llvm;
bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other,
const SelectionDAG &DAG, int64_t &Off) {
@@ -55,7 +57,7 @@ bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other,
/// Parses tree in Ptr for base, index, offset addresses.
BaseIndexOffset BaseIndexOffset::match(SDValue Ptr, const SelectionDAG &DAG) {
// (((B + I*M) + c)) + c ...
- SDValue Base = Ptr;
+ SDValue Base = DAG.getTargetLoweringInfo().unwrapAddress(Ptr);
SDValue Index = SDValue();
int64_t Offset = 0;
bool IsIndexSignExt = false;
@@ -112,4 +114,3 @@ BaseIndexOffset BaseIndexOffset::match(SDValue Ptr, const SelectionDAG &DAG) {
}
return BaseIndexOffset(Base, Index, Offset, IsIndexSignExt);
}
-} // end namespace llvm
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 127312076207c..71cb8cb78f6d3 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1,4 +1,4 @@
-//===-- SelectionDAGBuilder.cpp - Selection-DAG building ------------------===//
+//===- SelectionDAGBuilder.cpp - Selection-DAG building -------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,63 +12,113 @@
//===----------------------------------------------------------------------===//
#include "SelectionDAGBuilder.h"
-#include "SDNodeDbgValue.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/Analysis.h"
-#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GCMetadata.h"
-#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/CodeGen/WinEHFuncInfo.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constant.h"
#include "llvm/IR/ConstantRange.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
-#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
#include "llvm/IR/Statepoint.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
-#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <iterator>
+#include <limits>
+#include <numeric>
+#include <tuple>
#include <utility>
+#include <vector>
+
using namespace llvm;
#define DEBUG_TYPE "isel"
@@ -78,11 +128,18 @@ using namespace llvm;
static unsigned LimitFloatPrecision;
static cl::opt<unsigned, true>
-LimitFPPrecision("limit-float-precision",
- cl::desc("Generate low-precision inline sequences "
- "for some float libcalls"),
- cl::location(LimitFloatPrecision),
- cl::init(0));
+ LimitFPPrecision("limit-float-precision",
+ cl::desc("Generate low-precision inline sequences "
+ "for some float libcalls"),
+ cl::location(LimitFloatPrecision), cl::Hidden,
+ cl::init(0));
+
+static cl::opt<unsigned> SwitchPeelThreshold(
+ "switch-peel-threshold", cl::Hidden, cl::init(66),
+ cl::desc("Set the case probability threshold for peeling the case from a "
+ "switch statement. A value greater than 100 will void this "
+ "optimization"));
+
// Limit the width of DAG chains. This is important in general to prevent
// DAG-based analysis from blowing up. For example, alias analysis and
// load clustering may not complete in reasonable time. It is difficult to
@@ -101,7 +158,7 @@ static const unsigned MaxParallelChains = 64;
// True if the Value passed requires ABI mangling as it is a parameter to a
// function or a return value from a function which is not an intrinsic.
-static bool isABIRegCopy(const Value * V) {
+static bool isABIRegCopy(const Value *V) {
const bool IsRetInst = V && isa<ReturnInst>(V);
const bool IsCallInst = V && isa<CallInst>(V);
const bool IsInLineAsm =
@@ -554,7 +611,6 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
SDValue Val, SDValue *Parts, unsigned NumParts,
MVT PartVT, const Value *V,
bool IsABIRegCopy) {
-
EVT ValueVT = Val.getValueType();
assert(ValueVT.isVector() && "Not a vector");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -600,7 +656,6 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
Val = DAG.getNode(
ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
-
} else {
assert(PartVT.getSizeInBits() > ValueVT.getSizeInBits() &&
"lossy conversion of vector to scalar type");
@@ -677,8 +732,6 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
}
}
-RegsForValue::RegsForValue() { IsABIMangled = false; }
-
RegsForValue::RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt,
EVT valuevt, bool IsABIMangledValue)
: ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs),
@@ -888,7 +941,24 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
SDValue Res = DAG.getTargetConstant(Flag, dl, MVT::i32);
Ops.push_back(Res);
- unsigned SP = TLI.getStackPointerRegisterToSaveRestore();
+ if (Code == InlineAsm::Kind_Clobber) {
+ // Clobbers should always have a 1:1 mapping with registers, and may
+ // reference registers that have illegal (e.g. vector) types. Hence, we
+ // shouldn't try to apply any sort of splitting logic to them.
+ assert(Regs.size() == RegVTs.size() && Regs.size() == ValueVTs.size() &&
+ "No 1:1 mapping from clobbers to regs?");
+ unsigned SP = TLI.getStackPointerRegisterToSaveRestore();
+ (void)SP;
+ for (unsigned I = 0, E = ValueVTs.size(); I != E; ++I) {
+ Ops.push_back(DAG.getRegister(Regs[I], RegVTs[I]));
+ assert(
+ (Regs[I] != SP ||
+ DAG.getMachineFunction().getFrameInfo().hasOpaqueSPAdjustment()) &&
+ "If we clobbered the stack pointer, MFI should know about it.");
+ }
+ return;
+ }
+
for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]);
MVT RegisterVT = RegVTs[Value];
@@ -896,11 +966,6 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
assert(Reg < Regs.size() && "Mismatch in # registers expected");
unsigned TheReg = Regs[Reg++];
Ops.push_back(DAG.getRegister(TheReg, RegisterVT));
-
- if (TheReg == SP && Code == InlineAsm::Kind_Clobber) {
- // If we clobbered the stack pointer, MFI should know about it.
- assert(DAG.getMachineFunction().getFrameInfo().hasOpaqueSPAdjustment());
- }
}
}
}
@@ -1025,12 +1090,10 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
DIExpression *Expr = DI->getExpression();
assert(Variable->isValidLocationForIntrinsic(dl) &&
"Expected inlined-at fields to agree");
- uint64_t Offset = DI->getOffset();
SDDbgValue *SDV;
if (Val.getNode()) {
- if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, Offset, false,
- Val)) {
- SDV = getDbgValue(Val, Variable, Expr, Offset, dl, DbgSDNodeOrder);
+ if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, false, Val)) {
+ SDV = getDbgValue(Val, Variable, Expr, dl, DbgSDNodeOrder);
DAG.AddDbgValue(SDV, Val.getNode(), false);
}
} else
@@ -1409,7 +1472,9 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
// Leave Outs empty so that LowerReturn won't try to load return
// registers the usual way.
SmallVector<EVT, 1> PtrValueVTs;
- ComputeValueVTs(TLI, DL, PointerType::getUnqual(F->getReturnType()),
+ ComputeValueVTs(TLI, DL,
+ F->getReturnType()->getPointerTo(
+ DAG.getDataLayout().getAllocaAddrSpace()),
PtrValueVTs);
SDValue RetPtr = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
@@ -1421,22 +1486,15 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &Offsets);
unsigned NumValues = ValueVTs.size();
- // An aggregate return value cannot wrap around the address space, so
- // offsets to its parts don't wrap either.
- SDNodeFlags Flags;
- Flags.setNoUnsignedWrap(true);
-
SmallVector<SDValue, 4> Chains(NumValues);
for (unsigned i = 0; i != NumValues; ++i) {
- SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(),
- RetPtr.getValueType(), RetPtr,
- DAG.getIntPtrConstant(Offsets[i],
- getCurSDLoc()),
- Flags);
- Chains[i] = DAG.getStore(Chain, getCurSDLoc(),
- SDValue(RetOp.getNode(), RetOp.getResNo() + i),
- // FIXME: better loc info would be nice.
- Add, MachinePointerInfo());
+ // An aggregate return value cannot wrap around the address space, so
+ // offsets to its parts don't wrap either.
+ SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr, Offsets[i]);
+ Chains[i] = DAG.getStore(
+ Chain, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + i),
+ // FIXME: better loc info would be nice.
+ Ptr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
}
Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(),
@@ -1515,9 +1573,9 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
EVT(TLI.getPointerTy(DL))));
}
- bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
+ bool isVarArg = DAG.getMachineFunction().getFunction().isVarArg();
CallingConv::ID CallConv =
- DAG.getMachineFunction().getFunction()->getCallingConv();
+ DAG.getMachineFunction().getFunction().getCallingConv();
Chain = DAG.getTargetLoweringInfo().LowerReturn(
Chain, CallConv, isVarArg, Outs, OutVals, getCurSDLoc(), DAG);
@@ -1623,7 +1681,6 @@ static bool InBlock(const Value *V, const BasicBlock *BB) {
/// EmitBranchForMergedCondition - Helper method for FindMergedConditions.
/// This function emits a branch and is used at the leaves of an OR or an
/// AND operator tree.
-///
void
SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
MachineBasicBlock *TBB,
@@ -1659,7 +1716,7 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
}
CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr,
- TBB, FBB, CurBB, TProb, FProb);
+ TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
SwitchCases.push_back(CB);
return;
}
@@ -1668,7 +1725,7 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
// Create a CaseBlock record representing this branch.
ISD::CondCode Opc = InvertCond ? ISD::SETNE : ISD::SETEQ;
CaseBlock CB(Opc, Cond, ConstantInt::getTrue(*DAG.getContext()),
- nullptr, TBB, FBB, CurBB, TProb, FProb);
+ nullptr, TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
SwitchCases.push_back(CB);
}
@@ -1712,7 +1769,7 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
// If this node is not part of the or/and tree, emit it as a branch.
if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
- BOpc != Opc || !BOp->hasOneUse() ||
+ BOpc != unsigned(Opc) || !BOp->hasOneUse() ||
BOp->getParent() != CurBB->getBasicBlock() ||
!InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
!InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
@@ -1867,7 +1924,6 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
// je foo
// cmp D, E
// jle foo
- //
if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
Instruction::BinaryOps Opcode = BOp->getOpcode();
if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() &&
@@ -1907,7 +1963,7 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
// Create a CaseBlock record representing this branch.
CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()),
- nullptr, Succ0MBB, Succ1MBB, BrMBB);
+ nullptr, Succ0MBB, Succ1MBB, BrMBB, getCurSDLoc());
// Use visitSwitchCase to actually insert the fast branch sequence for this
// cond branch.
@@ -1920,7 +1976,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
MachineBasicBlock *SwitchBB) {
SDValue Cond;
SDValue CondLHS = getValue(CB.CmpLHS);
- SDLoc dl = getCurSDLoc();
+ SDLoc dl = CB.DL;
// Build the setcc now.
if (!CB.CmpMHS) {
@@ -2054,7 +2110,7 @@ static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
MachineFunction &MF = DAG.getMachineFunction();
- Value *Global = TLI.getSDagStackGuard(*MF.getFunction()->getParent());
+ Value *Global = TLI.getSDagStackGuard(*MF.getFunction().getParent());
MachineSDNode *Node =
DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD, DL, PtrTy, Chain);
if (Global) {
@@ -2088,15 +2144,18 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
SDValue Guard;
SDLoc dl = getCurSDLoc();
SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy);
- const Module &M = *ParentBB->getParent()->getFunction()->getParent();
+ const Module &M = *ParentBB->getParent()->getFunction().getParent();
unsigned Align = DL->getPrefTypeAlignment(Type::getInt8PtrTy(M.getContext()));
// Generate code to load the content of the guard slot.
- SDValue StackSlot = DAG.getLoad(
+ SDValue GuardVal = DAG.getLoad(
PtrTy, dl, DAG.getEntryNode(), StackSlotPtr,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), Align,
MachineMemOperand::MOVolatile);
+ if (TLI.useStackGuardXorFP())
+ GuardVal = TLI.emitStackGuardXorFP(DAG, GuardVal, dl);
+
// Retrieve guard check function, nullptr if instrumentation is inlined.
if (const Value *GuardCheck = TLI.getSSPStackGuardCheck(M)) {
// The target provides a guard check function to validate the guard value.
@@ -2108,7 +2167,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
- Entry.Node = StackSlot;
+ Entry.Node = GuardVal;
Entry.Ty = FnTy->getParamType(0);
if (Fn->hasAttribute(1, Attribute::AttrKind::InReg))
Entry.IsInReg = true;
@@ -2141,7 +2200,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
// Perform the comparison via a subtract/getsetcc.
EVT VT = Guard.getValueType();
- SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, StackSlot);
+ SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, GuardVal);
SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(DAG.getDataLayout(),
*DAG.getContext(),
@@ -2151,7 +2210,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
// If the sub is not 0, then we know the guard/stackslot do not equal, so
// branch to failure MBB.
SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
- MVT::Other, StackSlot.getOperand(0),
+ MVT::Other, GuardVal.getOperand(0),
Cmp, DAG.getBasicBlock(SPD.getFailureMBB()));
// Otherwise branch to success MBB.
SDValue Br = DAG.getNode(ISD::BR, dl,
@@ -2530,7 +2589,7 @@ static bool isVectorReductionOp(const User *I) {
case Instruction::FAdd:
case Instruction::FMul:
if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst))
- if (FPOp->getFastMathFlags().unsafeAlgebra())
+ if (FPOp->getFastMathFlags().isFast())
break;
LLVM_FALLTHROUGH;
default:
@@ -2576,7 +2635,7 @@ static bool isVectorReductionOp(const User *I) {
if (Inst->getOpcode() == OpCode || isa<PHINode>(U)) {
if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst))
- if (!isa<PHINode>(FPOp) && !FPOp->getFastMathFlags().unsafeAlgebra())
+ if (!isa<PHINode>(FPOp) && !FPOp->getFastMathFlags().isFast())
return false;
UsersToVisit.push_back(U);
} else if (const ShuffleVectorInst *ShufInst =
@@ -2670,7 +2729,7 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) {
Flags.setNoInfs(FMF.noInfs());
Flags.setNoNaNs(FMF.noNaNs());
Flags.setNoSignedZeros(FMF.noSignedZeros());
- Flags.setUnsafeAlgebra(FMF.unsafeAlgebra());
+ Flags.setUnsafeAlgebra(FMF.isFast());
SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(),
Op1, Op2, Flags);
@@ -2779,7 +2838,7 @@ void SelectionDAGBuilder::visitFCmp(const User &I) {
// Check if the condition of the select has one use or two users that are both
// selects with the same condition.
static bool hasOnlySelectUsers(const Value *Cond) {
- return all_of(Cond->users(), [](const Value *V) {
+ return llvm::all_of(Cond->users(), [](const Value *V) {
return isa<SelectInst>(V);
});
}
@@ -3447,7 +3506,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
SDValue AllocSize = getValue(I.getArraySize());
- EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout());
+ EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout(), DL.getAllocaAddrSpace());
if (AllocSize.getValueType() != IntPtr)
AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr);
@@ -3468,17 +3527,15 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
// an address inside an alloca.
SDNodeFlags Flags;
Flags.setNoUnsignedWrap(true);
- AllocSize = DAG.getNode(ISD::ADD, dl,
- AllocSize.getValueType(), AllocSize,
- DAG.getIntPtrConstant(StackAlign - 1, dl), Flags);
+ AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize,
+ DAG.getConstant(StackAlign - 1, dl, IntPtr), Flags);
// Mask out the low bits for alignment purposes.
- AllocSize = DAG.getNode(ISD::AND, dl,
- AllocSize.getValueType(), AllocSize,
- DAG.getIntPtrConstant(~(uint64_t)(StackAlign - 1),
- dl));
+ AllocSize =
+ DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize,
+ DAG.getConstant(~(uint64_t)(StackAlign - 1), dl, IntPtr));
- SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align, dl) };
+ SDValue Ops[] = {getRoot(), AllocSize, DAG.getConstant(Align, dl, IntPtr)};
SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);
SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, dl, VTs, Ops);
setValue(&I, DSA);
@@ -3807,18 +3864,16 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
//
// When the first GEP operand is a single pointer - it is the uniform base we
// are looking for. If first operand of the GEP is a splat vector - we
-// extract the spalt value and use it as a uniform base.
+// extract the splat value and use it as a uniform base.
// In all other cases the function returns 'false'.
-//
static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index,
SelectionDAGBuilder* SDB) {
-
SelectionDAG& DAG = SDB->DAG;
LLVMContext &Context = *DAG.getContext();
assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type");
const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
- if (!GEP || GEP->getNumOperands() > 2)
+ if (!GEP)
return false;
const Value *GEPPtr = GEP->getPointerOperand();
@@ -3827,7 +3882,15 @@ static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index,
else if (!(Ptr = getSplatValue(GEPPtr)))
return false;
- Value *IndexVal = GEP->getOperand(1);
+ unsigned FinalIndex = GEP->getNumOperands() - 1;
+ Value *IndexVal = GEP->getOperand(FinalIndex);
+
+ // Ensure all the other indices are 0.
+ for (unsigned i = 1; i < FinalIndex; ++i) {
+ auto *C = dyn_cast<ConstantInt>(GEP->getOperand(i));
+ if (!C || !C->isZero())
+ return false;
+ }
// The operands of the GEP may be defined in another basic block.
// In this case we'll not find nodes for the operands.
@@ -3837,13 +3900,6 @@ static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index,
Base = SDB->getValue(Ptr);
Index = SDB->getValue(IndexVal);
- // Suppress sign extension.
- if (SExtInst* Sext = dyn_cast<SExtInst>(IndexVal)) {
- if (SDB->findValue(Sext->getOperand(0))) {
- IndexVal = Sext->getOperand(0);
- Index = SDB->getValue(IndexVal);
- }
- }
if (!Index.getValueType().isVector()) {
unsigned GEPWidth = GEP->getType()->getVectorNumElements();
EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth);
@@ -4082,7 +4138,8 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
- if (I.getAlignment() < VT.getSizeInBits() / 8)
+ if (!TLI.supportsUnalignedAtomics() &&
+ I.getAlignment() < VT.getStoreSize())
report_fatal_error("Cannot generate unaligned atomic load");
MachineMemOperand *MMO =
@@ -4118,7 +4175,7 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
EVT VT =
TLI.getValueType(DAG.getDataLayout(), I.getValueOperand()->getType());
- if (I.getAlignment() < VT.getSizeInBits() / 8)
+ if (I.getAlignment() < VT.getStoreSize())
report_fatal_error("Cannot generate unaligned atomic store");
SDValue OutChain =
@@ -4157,7 +4214,9 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
// Info is set by getTgtMemInstrinsic
TargetLowering::IntrinsicInfo Info;
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic);
+ bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I,
+ DAG.getMachineFunction(),
+ Intrinsic);
// Add the intrinsic ID as an integer operand if it's not a target intrinsic.
if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
@@ -4183,11 +4242,10 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
SDValue Result;
if (IsTgtIntrinsic) {
// This is target intrinsic that touches memory
- Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(),
- VTs, Ops, Info.memVT,
- MachinePointerInfo(Info.ptrVal, Info.offset),
- Info.align, Info.vol,
- Info.readMem, Info.writeMem, Info.size);
+ Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs,
+ Ops, Info.memVT,
+ MachinePointerInfo(Info.ptrVal, Info.offset), Info.align,
+ Info.flags, Info.size);
} else if (!HasChain) {
Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
} else if (!I.getType()->isVoidTy()) {
@@ -4370,7 +4428,6 @@ static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
/// limited-precision mode.
static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI) {
-
// TODO: What fast-math-flags should be set on the floating-point nodes?
if (Op.getValueType() == MVT::f32 &&
@@ -4469,7 +4526,6 @@ static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
/// limited-precision mode.
static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI) {
-
// TODO: What fast-math-flags should be set on the floating-point nodes?
if (Op.getValueType() == MVT::f32 &&
@@ -4567,7 +4623,6 @@ static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
/// limited-precision mode.
static SDValue expandLog10(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
const TargetLowering &TLI) {
-
// TODO: What fast-math-flags should be set on the floating-point nodes?
if (Op.getValueType() == MVT::f32 &&
@@ -4695,7 +4750,6 @@ static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS,
return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS);
}
-
/// ExpandPowI - Expand a llvm.powi intrinsic.
static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS,
SelectionDAG &DAG) {
@@ -4712,8 +4766,8 @@ static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS,
if (Val == 0)
return DAG.getConstantFP(1.0, DL, LHS.getValueType());
- const Function *F = DAG.getMachineFunction().getFunction();
- if (!F->optForSize() ||
+ const Function &F = DAG.getMachineFunction().getFunction();
+ if (!F.optForSize() ||
// If optimizing for size, don't insert too many multiplies.
// This inserts up to 5 multiplies.
countPopulation(Val) + Log2_32(Val) < 7) {
@@ -4766,12 +4820,12 @@ static unsigned getUnderlyingArgReg(const SDValue &N) {
}
}
-/// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function
-/// argument, create the corresponding DBG_VALUE machine instruction for it now.
-/// At the end of instruction selection, they will be inserted to the entry BB.
+/// If the DbgValueInst is a dbg_value of a function argument, create the
+/// corresponding DBG_VALUE machine instruction for it now. At the end of
+/// instruction selection, they will be inserted to the entry BB.
bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
const Value *V, DILocalVariable *Variable, DIExpression *Expr,
- DILocation *DL, int64_t Offset, bool IsDbgDeclare, const SDValue &N) {
+ DILocation *DL, bool IsDbgDeclare, const SDValue &N) {
const Argument *Arg = dyn_cast<Argument>(V);
if (!Arg)
return false;
@@ -4779,17 +4833,11 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
MachineFunction &MF = DAG.getMachineFunction();
const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
- // Ignore inlined function arguments here.
- //
- // FIXME: Should we be checking DL->inlinedAt() to determine this?
- if (!Variable->getScope()->getSubprogram()->describes(MF.getFunction()))
- return false;
-
bool IsIndirect = false;
Optional<MachineOperand> Op;
// Some arguments' frame index is recorded during argument lowering.
int FI = FuncInfo.getArgumentFrameIndex(Arg);
- if (FI != INT_MAX)
+ if (FI != std::numeric_limits<int>::max())
Op = MachineOperand::CreateFI(FI);
if (!Op && N.getNode()) {
@@ -4806,22 +4854,48 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
}
}
+ if (!Op && N.getNode())
+ // Check if frame index is available.
+ if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode()))
+ if (FrameIndexSDNode *FINode =
+ dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
+ Op = MachineOperand::CreateFI(FINode->getIndex());
+
if (!Op) {
// Check if ValueMap has reg number.
DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
if (VMI != FuncInfo.ValueMap.end()) {
+ const auto &TLI = DAG.getTargetLoweringInfo();
+ RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second,
+ V->getType(), isABIRegCopy(V));
+ unsigned NumRegs =
+ std::accumulate(RFV.RegCount.begin(), RFV.RegCount.end(), 0);
+ if (NumRegs > 1) {
+ unsigned I = 0;
+ unsigned Offset = 0;
+ auto RegisterVT = RFV.RegVTs.begin();
+ for (auto RegCount : RFV.RegCount) {
+ unsigned RegisterSize = (RegisterVT++)->getSizeInBits();
+ for (unsigned E = I + RegCount; I != E; ++I) {
+ // The vregs are guaranteed to be allocated in sequence.
+ Op = MachineOperand::CreateReg(VMI->second + I, false);
+ auto FragmentExpr = DIExpression::createFragmentExpression(
+ Expr, Offset, RegisterSize);
+ if (!FragmentExpr)
+ continue;
+ FuncInfo.ArgDbgValues.push_back(
+ BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsDbgDeclare,
+ Op->getReg(), Variable, *FragmentExpr));
+ Offset += RegisterSize;
+ }
+ }
+ return true;
+ }
Op = MachineOperand::CreateReg(VMI->second, false);
IsIndirect = IsDbgDeclare;
}
}
- if (!Op && N.getNode())
- // Check if frame index is available.
- if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode()))
- if (FrameIndexSDNode *FINode =
- dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
- Op = MachineOperand::CreateFI(FINode->getIndex());
-
if (!Op)
return false;
@@ -4830,12 +4904,12 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
if (Op->isReg())
FuncInfo.ArgDbgValues.push_back(
BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect,
- Op->getReg(), Offset, Variable, Expr));
+ Op->getReg(), Variable, Expr));
else
FuncInfo.ArgDbgValues.push_back(
BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE))
.add(*Op)
- .addImm(Offset)
+ .addImm(0)
.addMetadata(Variable)
.addMetadata(Expr));
@@ -4845,18 +4919,18 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
/// Return the appropriate SDDbgValue based on N.
SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N,
DILocalVariable *Variable,
- DIExpression *Expr, int64_t Offset,
+ DIExpression *Expr,
const DebugLoc &dl,
unsigned DbgSDNodeOrder) {
if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) {
// Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe
// stack slot locations as such instead of as indirectly addressed
// locations.
- return DAG.getFrameIndexDbgValue(Variable, Expr, FISDN->getIndex(), 0, dl,
+ return DAG.getFrameIndexDbgValue(Variable, Expr, FISDN->getIndex(), dl,
DbgSDNodeOrder);
}
- return DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(), false,
- Offset, dl, DbgSDNodeOrder);
+ return DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(), false, dl,
+ DbgSDNodeOrder);
}
// VisualStudio defines setjmp as _setjmp
@@ -4971,8 +5045,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
}
case Intrinsic::memcpy_element_unordered_atomic: {
- const ElementUnorderedAtomicMemCpyInst &MI =
- cast<ElementUnorderedAtomicMemCpyInst>(I);
+ const AtomicMemCpyInst &MI = cast<AtomicMemCpyInst>(I);
SDValue Dst = getValue(MI.getRawDest());
SDValue Src = getValue(MI.getRawSource());
SDValue Length = getValue(MI.getLength());
@@ -5010,7 +5083,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
}
case Intrinsic::memmove_element_unordered_atomic: {
- auto &MI = cast<ElementUnorderedAtomicMemMoveInst>(I);
+ auto &MI = cast<AtomicMemMoveInst>(I);
SDValue Dst = getValue(MI.getRawDest());
SDValue Src = getValue(MI.getRawSource());
SDValue Length = getValue(MI.getLength());
@@ -5048,7 +5121,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
}
case Intrinsic::memset_element_unordered_atomic: {
- auto &MI = cast<ElementUnorderedAtomicMemSetInst>(I);
+ auto &MI = cast<AtomicMemSetInst>(I);
SDValue Dst = getValue(MI.getRawDest());
SDValue Val = getValue(MI.getValue());
SDValue Length = getValue(MI.getLength());
@@ -5086,30 +5159,48 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
DAG.setRoot(CallResult.second);
return nullptr;
}
+ case Intrinsic::dbg_addr:
case Intrinsic::dbg_declare: {
- const DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
+ const DbgInfoIntrinsic &DI = cast<DbgInfoIntrinsic>(I);
DILocalVariable *Variable = DI.getVariable();
DIExpression *Expression = DI.getExpression();
- const Value *Address = DI.getAddress();
assert(Variable && "Missing variable");
- if (!Address) {
- DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
- return nullptr;
- }
// Check if address has undef value.
- if (isa<UndefValue>(Address) ||
+ const Value *Address = DI.getVariableLocation();
+ if (!Address || isa<UndefValue>(Address) ||
(Address->use_empty() && !isa<Argument>(Address))) {
DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
return nullptr;
}
- // Byval arguments with frame indices were already handled after argument
- // lowering and before isel.
- const auto *Arg =
- dyn_cast<Argument>(Address->stripInBoundsConstantOffsets());
- if (Arg && FuncInfo.getArgumentFrameIndex(Arg) != INT_MAX)
+ bool isParameter = Variable->isParameter() || isa<Argument>(Address);
+
+ // Check if this variable can be described by a frame index, typically
+ // either as a static alloca or a byval parameter.
+ int FI = std::numeric_limits<int>::max();
+ if (const auto *AI =
+ dyn_cast<AllocaInst>(Address->stripInBoundsConstantOffsets())) {
+ if (AI->isStaticAlloca()) {
+ auto I = FuncInfo.StaticAllocaMap.find(AI);
+ if (I != FuncInfo.StaticAllocaMap.end())
+ FI = I->second;
+ }
+ } else if (const auto *Arg = dyn_cast<Argument>(
+ Address->stripInBoundsConstantOffsets())) {
+ FI = FuncInfo.getArgumentFrameIndex(Arg);
+ }
+
+ // llvm.dbg.addr is control dependent and always generates indirect
+ // DBG_VALUE instructions. llvm.dbg.declare is handled as a frame index in
+ // the MachineFunction variable table.
+ if (FI != std::numeric_limits<int>::max()) {
+ if (Intrinsic == Intrinsic::dbg_addr)
+ DAG.AddDbgValue(DAG.getFrameIndexDbgValue(Variable, Expression, FI, dl,
+ SDNodeOrder),
+ getRoot().getNode(), isParameter);
return nullptr;
+ }
SDValue &N = NodeMap[Address];
if (!N.getNode() && isa<Argument>(Address))
@@ -5120,26 +5211,25 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
Address = BCI->getOperand(0);
// Parameters are handled specially.
- bool isParameter = Variable->isParameter() || isa<Argument>(Address);
auto FINode = dyn_cast<FrameIndexSDNode>(N.getNode());
if (isParameter && FINode) {
// Byval parameter. We have a frame index at this point.
SDV = DAG.getFrameIndexDbgValue(Variable, Expression,
- FINode->getIndex(), 0, dl, SDNodeOrder);
+ FINode->getIndex(), dl, SDNodeOrder);
} else if (isa<Argument>(Address)) {
// Address is an argument, so try to emit its dbg value using
// virtual register info from the FuncInfo.ValueMap.
- EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, true, N);
+ EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true, N);
return nullptr;
} else {
SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(),
- true, 0, dl, SDNodeOrder);
+ true, dl, SDNodeOrder);
}
DAG.AddDbgValue(SDV, N.getNode(), isParameter);
} else {
// If Address is an argument then try to emit its dbg value using
// virtual register info from the FuncInfo.ValueMap.
- if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, true,
+ if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true,
N)) {
DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
}
@@ -5152,15 +5242,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
DILocalVariable *Variable = DI.getVariable();
DIExpression *Expression = DI.getExpression();
- uint64_t Offset = DI.getOffset();
const Value *V = DI.getValue();
if (!V)
return nullptr;
SDDbgValue *SDV;
if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V)) {
- SDV = DAG.getConstantDbgValue(Variable, Expression, V, Offset, dl,
- SDNodeOrder);
+ SDV = DAG.getConstantDbgValue(Variable, Expression, V, dl, SDNodeOrder);
DAG.AddDbgValue(SDV, nullptr, false);
return nullptr;
}
@@ -5171,10 +5259,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
if (!N.getNode() && isa<Argument>(V)) // Check unused arguments map.
N = UnusedArgNodeMap[V];
if (N.getNode()) {
- if (EmitFuncArgumentDbgValue(V, Variable, Expression, dl, Offset, false,
- N))
+ if (EmitFuncArgumentDbgValue(V, Variable, Expression, dl, false, N))
return nullptr;
- SDV = getDbgValue(N, Variable, Expression, Offset, dl, SDNodeOrder);
+ SDV = getDbgValue(N, Variable, Expression, dl, SDNodeOrder);
DAG.AddDbgValue(SDV, N.getNode(), false);
return nullptr;
}
@@ -5213,12 +5300,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::eh_unwind_init:
DAG.getMachineFunction().setCallsUnwindInit(true);
return nullptr;
- case Intrinsic::eh_dwarf_cfa: {
+ case Intrinsic::eh_dwarf_cfa:
setValue(&I, DAG.getNode(ISD::EH_DWARF_CFA, sdl,
TLI.getPointerTy(DAG.getDataLayout()),
getValue(I.getArgOperand(0))));
return nullptr;
- }
case Intrinsic::eh_sjlj_callsite: {
MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(0));
@@ -5247,17 +5333,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
DAG.setRoot(Op.getValue(1));
return nullptr;
}
- case Intrinsic::eh_sjlj_longjmp: {
+ case Intrinsic::eh_sjlj_longjmp:
DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, sdl, MVT::Other,
getRoot(), getValue(I.getArgOperand(0))));
return nullptr;
- }
- case Intrinsic::eh_sjlj_setup_dispatch: {
+ case Intrinsic::eh_sjlj_setup_dispatch:
DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_SETUP_DISPATCH, sdl, MVT::Other,
getRoot()));
return nullptr;
- }
-
case Intrinsic::masked_gather:
visitMaskedGather(I);
return nullptr;
@@ -5430,6 +5513,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::experimental_constrained_fmul:
case Intrinsic::experimental_constrained_fdiv:
case Intrinsic::experimental_constrained_frem:
+ case Intrinsic::experimental_constrained_fma:
case Intrinsic::experimental_constrained_sqrt:
case Intrinsic::experimental_constrained_pow:
case Intrinsic::experimental_constrained_powi:
@@ -5534,11 +5618,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
DAG.setRoot(Res.getValue(1));
return nullptr;
}
- case Intrinsic::stackrestore: {
+ case Intrinsic::stackrestore:
Res = getValue(I.getArgOperand(0));
DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, sdl, MVT::Other, getRoot(), Res));
return nullptr;
- }
case Intrinsic::get_dynamic_area_offset: {
SDValue Op = getRoot();
EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
@@ -5557,7 +5640,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::stackguard: {
EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
MachineFunction &MF = DAG.getMachineFunction();
- const Module &M = *MF.getFunction()->getParent();
+ const Module &M = *MF.getFunction().getParent();
SDValue Chain = getRoot();
if (TLI.useLoadStackGuardNode()) {
Res = getLoadStackGuard(DAG, sdl, Chain);
@@ -5568,6 +5651,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
MachinePointerInfo(Global, 0), Align,
MachineMemOperand::MOVolatile);
}
+ if (TLI.useStackGuardXorFP())
+ Res = TLI.emitStackGuardXorFP(DAG, Res, sdl);
DAG.setRoot(Chain);
setValue(&I, Res);
return nullptr;
@@ -5624,9 +5709,22 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
return nullptr;
case Intrinsic::assume:
case Intrinsic::var_annotation:
- // Discard annotate attributes and assumptions
+ case Intrinsic::sideeffect:
+ // Discard annotate attributes, assumptions, and artificial side-effects.
return nullptr;
+ case Intrinsic::codeview_annotation: {
+ // Emit a label associated with this metadata.
+ MachineFunction &MF = DAG.getMachineFunction();
+ MCSymbol *Label =
+ MF.getMMI().getContext().createTempSymbol("annotation", true);
+ Metadata *MD = cast<MetadataAsValue>(I.getArgOperand(0))->getMetadata();
+ MF.addCodeViewAnnotation(Label, cast<MDNode>(MD));
+ Res = DAG.getLabelNode(ISD::ANNOTATION_LABEL, sdl, getRoot(), Label);
+ DAG.setRoot(Res);
+ return nullptr;
+ }
+
case Intrinsic::init_trampoline: {
const Function *F = cast<Function>(I.getArgOperand(1)->stripPointerCasts());
@@ -5643,17 +5741,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
DAG.setRoot(Res);
return nullptr;
}
- case Intrinsic::adjust_trampoline: {
+ case Intrinsic::adjust_trampoline:
setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl,
TLI.getPointerTy(DAG.getDataLayout()),
getValue(I.getArgOperand(0))));
return nullptr;
- }
case Intrinsic::gcroot: {
- MachineFunction &MF = DAG.getMachineFunction();
- const Function *F = MF.getFunction();
- (void)F;
- assert(F->hasGC() &&
+ assert(DAG.getMachineFunction().getFunction().hasGC() &&
"only valid in functions with gc specified, enforced by Verifier");
assert(GFI && "implied by previous");
const Value *Alloca = I.getArgOperand(0)->stripPointerCasts();
@@ -5670,11 +5764,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, sdl, MVT::i32));
return nullptr;
- case Intrinsic::expect: {
+ case Intrinsic::expect:
// Just replace __builtin_expect(exp, c) with EXP.
setValue(&I, getValue(I.getArgOperand(0)));
return nullptr;
- }
case Intrinsic::debugtrap:
case Intrinsic::trap: {
@@ -5728,6 +5821,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::prefetch: {
SDValue Ops[5];
unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
+ auto Flags = rw == 0 ? MachineMemOperand::MOLoad :MachineMemOperand::MOStore;
Ops[0] = getRoot();
Ops[1] = getValue(I.getArgOperand(0));
Ops[2] = getValue(I.getArgOperand(1));
@@ -5738,9 +5832,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
EVT::getIntegerVT(*Context, 8),
MachinePointerInfo(I.getArgOperand(0)),
0, /* align */
- false, /* volatile */
- rw==0, /* read */
- rw==1)); /* write */
+ Flags));
return nullptr;
}
case Intrinsic::lifetime_start:
@@ -5792,27 +5884,22 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::donothing:
// ignore
return nullptr;
- case Intrinsic::experimental_stackmap: {
+ case Intrinsic::experimental_stackmap:
visitStackmap(I);
return nullptr;
- }
case Intrinsic::experimental_patchpoint_void:
- case Intrinsic::experimental_patchpoint_i64: {
+ case Intrinsic::experimental_patchpoint_i64:
visitPatchpoint(&I);
return nullptr;
- }
- case Intrinsic::experimental_gc_statepoint: {
+ case Intrinsic::experimental_gc_statepoint:
LowerStatepoint(ImmutableStatepoint(&I));
return nullptr;
- }
- case Intrinsic::experimental_gc_result: {
+ case Intrinsic::experimental_gc_result:
visitGCResult(cast<GCResultInst>(I));
return nullptr;
- }
- case Intrinsic::experimental_gc_relocate: {
+ case Intrinsic::experimental_gc_relocate:
visitGCRelocate(cast<GCRelocateInst>(I));
return nullptr;
- }
case Intrinsic::instrprof_increment:
llvm_unreachable("instrprof failed to lower an increment");
case Intrinsic::instrprof_value_profile:
@@ -5851,7 +5938,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
// Get the symbol that defines the frame offset.
auto *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts());
auto *Idx = cast<ConstantInt>(I.getArgOperand(2));
- unsigned IdxVal = unsigned(Idx->getLimitedValue(INT_MAX));
+ unsigned IdxVal =
+ unsigned(Idx->getLimitedValue(std::numeric_limits<int>::max()));
MCSymbol *FrameAllocSym =
MF.getMMI().getContext().getOrCreateFrameAllocSymbol(
GlobalValue::dropLLVMManglingEscape(Fn->getName()), IdxVal);
@@ -5932,12 +6020,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::experimental_vector_reduce_umax:
case Intrinsic::experimental_vector_reduce_umin:
case Intrinsic::experimental_vector_reduce_fmax:
- case Intrinsic::experimental_vector_reduce_fmin: {
+ case Intrinsic::experimental_vector_reduce_fmin:
visitVectorReduce(I, Intrinsic);
return nullptr;
}
-
- }
}
void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
@@ -5961,6 +6047,9 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
case Intrinsic::experimental_constrained_frem:
Opcode = ISD::STRICT_FREM;
break;
+ case Intrinsic::experimental_constrained_fma:
+ Opcode = ISD::STRICT_FMA;
+ break;
case Intrinsic::experimental_constrained_sqrt:
Opcode = ISD::STRICT_FSQRT;
break;
@@ -6007,10 +6096,15 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
SDVTList VTs = DAG.getVTList(ValueVTs);
SDValue Result;
if (FPI.isUnaryOp())
- Result = DAG.getNode(Opcode, sdl, VTs,
+ Result = DAG.getNode(Opcode, sdl, VTs,
{ Chain, getValue(FPI.getArgOperand(0)) });
+ else if (FPI.isTernaryOp())
+ Result = DAG.getNode(Opcode, sdl, VTs,
+ { Chain, getValue(FPI.getArgOperand(0)),
+ getValue(FPI.getArgOperand(1)),
+ getValue(FPI.getArgOperand(2)) });
else
- Result = DAG.getNode(Opcode, sdl, VTs,
+ Result = DAG.getNode(Opcode, sdl, VTs,
{ Chain, getValue(FPI.getArgOperand(0)),
getValue(FPI.getArgOperand(1)) });
@@ -6081,7 +6175,7 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
if (MF.hasEHFunclets()) {
assert(CLI.CS);
WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo();
- EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CS->getInstruction()),
+ EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CS.getInstruction()),
BeginLabel, EndLabel);
} else {
MF.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel);
@@ -6189,7 +6283,6 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
SelectionDAGBuilder &Builder) {
-
// Check to see if this load can be trivially constant folded, e.g. if the
// input is from a string literal.
if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) {
@@ -6553,10 +6646,10 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
// Check for well-known libc/libm calls. If the function is internal, it
// can't be a library call. Don't do the check if marked as nobuiltin for
- // some reason.
+ // some reason or the call site requires strict floating point semantics.
LibFunc Func;
- if (!I.isNoBuiltin() && !F->hasLocalLinkage() && F->hasName() &&
- LibInfo->getLibFunc(*F, Func) &&
+ if (!I.isNoBuiltin() && !I.isStrictFP() && !F->hasLocalLinkage() &&
+ F->hasName() && LibInfo->getLibFunc(*F, Func) &&
LibInfo->hasOptimizedCodeGen(Func)) {
switch (Func) {
default: break;
@@ -6735,7 +6828,7 @@ public:
RegsForValue AssignedRegs;
explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info)
- : TargetLowering::AsmOperandInfo(info), CallOperand(nullptr,0) {
+ : TargetLowering::AsmOperandInfo(info), CallOperand(nullptr, 0) {
}
/// Whether or not this operand accesses memory
@@ -6767,7 +6860,7 @@ public:
// If this is an indirect operand, the operand is a pointer to the
// accessed type.
if (isIndirect) {
- llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
+ PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
if (!PtrTy)
report_fatal_error("Indirect operand for inline asm not a pointer!");
OpTy = PtrTy->getElementType();
@@ -6799,7 +6892,7 @@ public:
}
};
-typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector;
+using SDISelAsmOperandInfoVector = SmallVector<SDISelAsmOperandInfo, 16>;
} // end anonymous namespace
@@ -6879,7 +6972,6 @@ static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location,
/// allocation. This produces generally horrible, but correct, code.
///
/// OpInfo describes the operand.
-///
static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI,
const SDLoc &DL,
SDISelAsmOperandInfo &OpInfo) {
@@ -7013,6 +7105,8 @@ static bool createVirtualRegs(SmallVector<unsigned, 4> &Regs, unsigned NumRegs,
return true;
}
+namespace {
+
class ExtraFlags {
unsigned Flags = 0;
@@ -7028,7 +7122,7 @@ public:
Flags |= IA->getDialect() * InlineAsm::Extra_AsmDialect;
}
- void update(const llvm::TargetLowering::AsmOperandInfo &OpInfo) {
+ void update(const TargetLowering::AsmOperandInfo &OpInfo) {
// Ideally, we would only check against memory constraints. However, the
// meaning of an Other constraint can be target-specific and we can't easily
// reason about it. Therefore, be conservative and set MayLoad/MayStore
@@ -7047,8 +7141,9 @@ public:
unsigned get() const { return Flags; }
};
+} // end anonymous namespace
+
/// visitInlineAsm - Handle a call to an InlineAsm object.
-///
void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
@@ -7207,13 +7302,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
RegsForValue RetValRegs;
// IndirectStoresToEmit - The set of stores to emit after the inline asm node.
- std::vector<std::pair<RegsForValue, Value*> > IndirectStoresToEmit;
+ std::vector<std::pair<RegsForValue, Value *>> IndirectStoresToEmit;
for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
switch (OpInfo.Type) {
- case InlineAsm::isOutput: {
+ case InlineAsm::isOutput:
if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass &&
OpInfo.ConstraintType != TargetLowering::C_Register) {
// Memory output, or 'other' output (e.g. 'X' constraint).
@@ -7264,7 +7359,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
: InlineAsm::Kind_RegDef,
false, 0, getCurSDLoc(), DAG, AsmNodeOperands);
break;
- }
+
case InlineAsm::isInput: {
SDValue InOperandVal = OpInfo.CallOperand;
@@ -7397,7 +7492,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
dl, DAG, AsmNodeOperands);
break;
}
- case InlineAsm::isClobber: {
+ case InlineAsm::isClobber:
// Add the clobbered value to the operand list, so that the register
// allocator is aware that the physreg got clobbered.
if (!OpInfo.AssignedRegs.Regs.empty())
@@ -7406,7 +7501,6 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
AsmNodeOperands);
break;
}
- }
}
// Finish up input operands. Set the input chain and add the flag last.
@@ -7453,7 +7547,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
return;
}
- std::vector<std::pair<SDValue, const Value *> > StoresToEmit;
+ std::vector<std::pair<SDValue, const Value *>> StoresToEmit;
// Process indirect outputs, first output all of the flagged copies out of
// physregs.
@@ -7865,13 +7959,13 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I,
switch (Intrinsic) {
case Intrinsic::experimental_vector_reduce_fadd:
- if (FMF.unsafeAlgebra())
+ if (FMF.isFast())
Res = DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2);
else
Res = DAG.getNode(ISD::VECREDUCE_STRICT_FADD, dl, VT, Op1, Op2);
break;
case Intrinsic::experimental_vector_reduce_fmul:
- if (FMF.unsafeAlgebra())
+ if (FMF.isFast())
Res = DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2);
else
Res = DAG.getNode(ISD::VECREDUCE_STRICT_FMUL, dl, VT, Op1, Op2);
@@ -7903,14 +7997,12 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I,
case Intrinsic::experimental_vector_reduce_umin:
Res = DAG.getNode(ISD::VECREDUCE_UMIN, dl, VT, Op1);
break;
- case Intrinsic::experimental_vector_reduce_fmax: {
+ case Intrinsic::experimental_vector_reduce_fmax:
Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1, SDFlags);
break;
- }
- case Intrinsic::experimental_vector_reduce_fmin: {
+ case Intrinsic::experimental_vector_reduce_fmin:
Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1, SDFlags);
break;
- }
default:
llvm_unreachable("Unhandled vector reduce intrinsic");
}
@@ -7955,10 +8047,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
uint64_t Offset = OldOffsets[i];
MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), RetVT);
unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), RetVT);
- unsigned RegisterVTSize = RegisterVT.getSizeInBits();
+ unsigned RegisterVTByteSZ = RegisterVT.getSizeInBits() / 8;
RetTys.append(NumRegs, RegisterVT);
for (unsigned j = 0; j != NumRegs; ++j)
- Offsets.push_back(Offset + j * RegisterVTSize);
+ Offsets.push_back(Offset + j * RegisterVTByteSZ);
}
}
@@ -7996,6 +8088,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
Entry.IsSwiftError = false;
Entry.Alignment = Align;
CLI.getArgs().insert(CLI.getArgs().begin(), Entry);
+ CLI.NumFixedArgs += 1;
CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext());
// sret demotion isn't compatible with tail-calls, since the sret argument
@@ -8148,8 +8241,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
}
getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT,
- CLI.CS ? CLI.CS->getInstruction() : nullptr, ExtendKind,
- true);
+ CLI.CS.getInstruction(), ExtendKind, true);
for (unsigned j = 0; j != NumParts; ++j) {
// if it isn't first piece, alignment must be 1
@@ -8209,7 +8301,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// The instruction result is the result of loading from the
// hidden sret parameter.
SmallVector<EVT, 1> PVTs;
- Type *PtrRetTy = PointerType::getUnqual(OrigRetTy);
+ Type *PtrRetTy = OrigRetTy->getPointerTo(DL.getAllocaAddrSpace());
ComputeValueVTs(*this, DL, PtrRetTy, PVTs);
assert(PVTs.size() == 1 && "Pointers should fit in one register");
@@ -8326,9 +8418,9 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) {
return true;
}
-typedef DenseMap<const Argument *,
- std::pair<const AllocaInst *, const StoreInst *>>
- ArgCopyElisionMapTy;
+using ArgCopyElisionMapTy =
+ DenseMap<const Argument *,
+ std::pair<const AllocaInst *, const StoreInst *>>;
/// Scan the entry block of the function in FuncInfo for arguments that look
/// like copies into a local alloca. Record any copied arguments in
@@ -8503,7 +8595,9 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// Put in an sret pointer parameter before all the other parameters.
SmallVector<EVT, 1> ValueVTs;
ComputeValueVTs(*TLI, DAG.getDataLayout(),
- PointerType::getUnqual(F.getReturnType()), ValueVTs);
+ F.getReturnType()->getPointerTo(
+ DAG.getDataLayout().getAllocaAddrSpace()),
+ ValueVTs);
// NOTE: Assuming that a pointer will never break down to more than one VT
// or one register.
@@ -8657,7 +8751,9 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// from the sret argument into it.
SmallVector<EVT, 1> ValueVTs;
ComputeValueVTs(*TLI, DAG.getDataLayout(),
- PointerType::getUnqual(F.getReturnType()), ValueVTs);
+ F.getReturnType()->getPointerTo(
+ DAG.getDataLayout().getAllocaAddrSpace()),
+ ValueVTs);
MVT VT = ValueVTs[0].getSimpleVT();
MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
Optional<ISD::NodeType> AssertOp = None;
@@ -8749,11 +8845,19 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
SDB->setValue(&Arg, Res);
if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) {
+ // We want to associate the argument with the frame index, among
+ // involved operands, that correspond to the lowest address. The
+ // getCopyFromParts function, called earlier, is swapping the order of
+ // the operands to BUILD_PAIR depending on endianness. The result of
+ // that swapping is that the least significant bits of the argument will
+ // be in the first operand of the BUILD_PAIR node, and the most
+ // significant bits will be in the second operand.
+ unsigned LowAddressOp = DAG.getDataLayout().isBigEndian() ? 1 : 0;
if (LoadSDNode *LNode =
- dyn_cast<LoadSDNode>(Res.getOperand(0).getNode()))
+ dyn_cast<LoadSDNode>(Res.getOperand(LowAddressOp).getNode()))
if (FrameIndexSDNode *FI =
dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
- FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
+ FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
}
// Update the SwiftErrorVRegDefMap.
@@ -8813,7 +8917,6 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
/// directly add them, because expansion might result in multiple MBB's for one
/// BB. As such, the start of the BB might correspond to a different MBB than
/// the end.
-///
void
SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
const TerminatorInst *TI = LLVMBB->getTerminator();
@@ -9249,10 +9352,12 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters,
BitTestInfo BTI;
std::sort(CBV.begin(), CBV.end(), [](const CaseBits &a, const CaseBits &b) {
- // Sort by probability first, number of bits second.
+ // Sort by probability first, number of bits second, bit mask third.
if (a.ExtraProb != b.ExtraProb)
return a.ExtraProb > b.ExtraProb;
- return a.Bits > b.Bits;
+ if (a.Bits != b.Bits)
+ return a.Bits > b.Bits;
+ return a.Mask < b.Mask;
});
for (auto &CB : CBV) {
@@ -9441,10 +9546,15 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
}
if (TM.getOptLevel() != CodeGenOpt::None) {
- // Order cases by probability so the most likely case will be checked first.
+ // Here, we order cases by probability so the most likely case will be
+ // checked first. However, two clusters can have the same probability in
+ // which case their relative ordering is non-deterministic. So we use Low
+ // as a tie-breaker as clusters are guaranteed to never overlap.
std::sort(W.FirstCluster, W.LastCluster + 1,
[](const CaseCluster &a, const CaseCluster &b) {
- return a.Prob > b.Prob;
+ return a.Prob != b.Prob ?
+ a.Prob > b.Prob :
+ a.Low->getValue().slt(b.Low->getValue());
});
// Rearrange the case blocks so that the last one falls through if possible
@@ -9570,8 +9680,8 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
}
// The false probability is the sum of all unhandled cases.
- CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, I->Prob,
- UnhandledProbs);
+ CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB,
+ getCurSDLoc(), I->Prob, UnhandledProbs);
if (CurMBB == SwitchMBB)
visitSwitchCase(CB, SwitchMBB);
@@ -9627,7 +9737,7 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
I++;
}
- for (;;) {
+ while (true) {
// Our binary search tree differs from a typical BST in that ours can have up
// to three values in each leaf. The pivot selection above doesn't take that
// into account, which means the tree might require more nodes and be less
@@ -9722,7 +9832,7 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
// Create the CaseBlock record that will be used to lower the branch.
CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB,
- LeftProb, RightProb);
+ getCurSDLoc(), LeftProb, RightProb);
if (W.MBB == SwitchMBB)
visitSwitchCase(CB, SwitchMBB);
@@ -9730,6 +9840,76 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
SwitchCases.push_back(CB);
}
+// Scale CaseProb after peeling a case with the probablity of PeeledCaseProb
+// from the swith statement.
+static BranchProbability scaleCaseProbality(BranchProbability CaseProb,
+ BranchProbability PeeledCaseProb) {
+ if (PeeledCaseProb == BranchProbability::getOne())
+ return BranchProbability::getZero();
+ BranchProbability SwitchProb = PeeledCaseProb.getCompl();
+
+ uint32_t Numerator = CaseProb.getNumerator();
+ uint32_t Denominator = SwitchProb.scale(CaseProb.getDenominator());
+ return BranchProbability(Numerator, std::max(Numerator, Denominator));
+}
+
+// Try to peel the top probability case if it exceeds the threshold.
+// Return current MachineBasicBlock for the switch statement if the peeling
+// does not occur.
+// If the peeling is performed, return the newly created MachineBasicBlock
+// for the peeled switch statement. Also update Clusters to remove the peeled
+// case. PeeledCaseProb is the BranchProbability for the peeled case.
+MachineBasicBlock *SelectionDAGBuilder::peelDominantCaseCluster(
+ const SwitchInst &SI, CaseClusterVector &Clusters,
+ BranchProbability &PeeledCaseProb) {
+ MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
+ // Don't perform if there is only one cluster or optimizing for size.
+ if (SwitchPeelThreshold > 100 || !FuncInfo.BPI || Clusters.size() < 2 ||
+ TM.getOptLevel() == CodeGenOpt::None ||
+ SwitchMBB->getParent()->getFunction().optForMinSize())
+ return SwitchMBB;
+
+ BranchProbability TopCaseProb = BranchProbability(SwitchPeelThreshold, 100);
+ unsigned PeeledCaseIndex = 0;
+ bool SwitchPeeled = false;
+ for (unsigned Index = 0; Index < Clusters.size(); ++Index) {
+ CaseCluster &CC = Clusters[Index];
+ if (CC.Prob < TopCaseProb)
+ continue;
+ TopCaseProb = CC.Prob;
+ PeeledCaseIndex = Index;
+ SwitchPeeled = true;
+ }
+ if (!SwitchPeeled)
+ return SwitchMBB;
+
+ DEBUG(dbgs() << "Peeled one top case in switch stmt, prob: " << TopCaseProb
+ << "\n");
+
+ // Record the MBB for the peeled switch statement.
+ MachineFunction::iterator BBI(SwitchMBB);
+ ++BBI;
+ MachineBasicBlock *PeeledSwitchMBB =
+ FuncInfo.MF->CreateMachineBasicBlock(SwitchMBB->getBasicBlock());
+ FuncInfo.MF->insert(BBI, PeeledSwitchMBB);
+
+ ExportFromCurrentBlock(SI.getCondition());
+ auto PeeledCaseIt = Clusters.begin() + PeeledCaseIndex;
+ SwitchWorkListItem W = {SwitchMBB, PeeledCaseIt, PeeledCaseIt,
+ nullptr, nullptr, TopCaseProb.getCompl()};
+ lowerWorkItem(W, SI.getCondition(), SwitchMBB, PeeledSwitchMBB);
+
+ Clusters.erase(PeeledCaseIt);
+ for (CaseCluster &CC : Clusters) {
+ DEBUG(dbgs() << "Scale the probablity for one cluster, before scaling: "
+ << CC.Prob << "\n");
+ CC.Prob = scaleCaseProbality(CC.Prob, TopCaseProb);
+ DEBUG(dbgs() << "After scaling: " << CC.Prob << "\n");
+ }
+ PeeledCaseProb = TopCaseProb;
+ return PeeledSwitchMBB;
+}
+
void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
// Extract cases from the switch.
BranchProbabilityInfo *BPI = FuncInfo.BPI;
@@ -9783,9 +9963,15 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
}
}
+ // The branch probablity of the peeled case.
+ BranchProbability PeeledCaseProb = BranchProbability::getZero();
+ MachineBasicBlock *PeeledSwitchMBB =
+ peelDominantCaseCluster(SI, Clusters, PeeledCaseProb);
+
// If there is only the default destination, jump there directly.
MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
if (Clusters.empty()) {
+ assert(PeeledSwitchMBB == SwitchMBB);
SwitchMBB->addSuccessor(DefaultMBB);
if (DefaultMBB != NextBlock(SwitchMBB)) {
DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
@@ -9817,8 +10003,14 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
SwitchWorkList WorkList;
CaseClusterIt First = Clusters.begin();
CaseClusterIt Last = Clusters.end() - 1;
- auto DefaultProb = getEdgeProbability(SwitchMBB, DefaultMBB);
- WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr, DefaultProb});
+ auto DefaultProb = getEdgeProbability(PeeledSwitchMBB, DefaultMBB);
+ // Scale the branchprobability for DefaultMBB if the peel occurs and
+ // DefaultMBB is not replaced.
+ if (PeeledCaseProb != BranchProbability::getZero() &&
+ DefaultMBB == FuncInfo.MBBMap[SI.getDefaultDest()])
+ DefaultProb = scaleCaseProbality(DefaultProb, PeeledCaseProb);
+ WorkList.push_back(
+ {PeeledSwitchMBB, First, Last, nullptr, nullptr, DefaultProb});
while (!WorkList.empty()) {
SwitchWorkListItem W = WorkList.back();
@@ -9826,7 +10018,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
unsigned NumClusters = W.LastCluster - W.FirstCluster + 1;
if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None &&
- !DefaultMBB->getParent()->getFunction()->optForMinSize()) {
+ !DefaultMBB->getParent()->getFunction().optForMinSize()) {
// For optimized builds, lower large range as a balanced binary tree.
splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB);
continue;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index ac1d6aae65a52..9e7c2bc6821bf 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -1,4 +1,4 @@
-//===-- SelectionDAGBuilder.h - Selection-DAG building --------*- C++ -*---===//
+//===- SelectionDAGBuilder.h - Selection-DAG building -----------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -16,67 +16,75 @@
#include "StatepointLowering.h"
#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/CallSite.h"
-#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Statepoint.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/CodeGen.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetLowering.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
#include <utility>
#include <vector>
namespace llvm {
-class AddrSpaceCastInst;
class AllocaInst;
+class AtomicCmpXchgInst;
+class AtomicRMWInst;
class BasicBlock;
-class BitCastInst;
class BranchInst;
class CallInst;
+class CatchPadInst;
+class CatchReturnInst;
+class CatchSwitchInst;
+class CleanupPadInst;
+class CleanupReturnInst;
+class Constant;
+class ConstantInt;
+class ConstrainedFPIntrinsic;
class DbgValueInst;
-class ExtractElementInst;
-class FCmpInst;
-class FPExtInst;
-class FPToSIInst;
-class FPToUIInst;
-class FPTruncInst;
-class Function;
+class DataLayout;
+class DIExpression;
+class DILocalVariable;
+class DILocation;
+class FenceInst;
class FunctionLoweringInfo;
-class GetElementPtrInst;
class GCFunctionInfo;
-class ICmpInst;
-class IntToPtrInst;
+class GCRelocateInst;
+class GCResultInst;
class IndirectBrInst;
class InvokeInst;
-class InsertElementInst;
-class Instruction;
+class LandingPadInst;
+class LLVMContext;
class LoadInst;
class MachineBasicBlock;
-class MachineInstr;
-class MachineRegisterInfo;
-class MDNode;
-class MVT;
class PHINode;
-class PtrToIntInst;
+class ResumeInst;
class ReturnInst;
class SDDbgValue;
-class SExtInst;
-class SelectInst;
-class ShuffleVectorInst;
-class SIToFPInst;
class StoreInst;
class SwitchInst;
-class DataLayout;
class TargetLibraryInfo;
-class TargetLowering;
-class TruncInst;
-class UIToFPInst;
-class UnreachableInst;
+class TargetMachine;
+class Type;
class VAArgInst;
-class ZExtInst;
+class UnreachableInst;
+class Use;
+class User;
+class Value;
//===----------------------------------------------------------------------===//
/// SelectionDAGBuilder - This is the common target-independent lowering
@@ -84,7 +92,7 @@ class ZExtInst;
///
class SelectionDAGBuilder {
/// CurInst - The current instruction being visited
- const Instruction *CurInst;
+ const Instruction *CurInst = nullptr;
DenseMap<const Value*, SDValue> NodeMap;
@@ -94,13 +102,15 @@ class SelectionDAGBuilder {
/// DanglingDebugInfo - Helper type for DanglingDebugInfoMap.
class DanglingDebugInfo {
- const DbgValueInst* DI;
+ const DbgValueInst* DI = nullptr;
DebugLoc dl;
- unsigned SDNodeOrder;
+ unsigned SDNodeOrder = 0;
+
public:
- DanglingDebugInfo() : DI(nullptr), dl(DebugLoc()), SDNodeOrder(0) { }
+ DanglingDebugInfo() = default;
DanglingDebugInfo(const DbgValueInst *di, DebugLoc DL, unsigned SDNO)
: DI(di), dl(std::move(DL)), SDNodeOrder(SDNO) {}
+
const DbgValueInst* getDI() { return DI; }
DebugLoc getdl() { return dl; }
unsigned getSDNodeOrder() { return SDNodeOrder; }
@@ -120,8 +130,8 @@ public:
/// State used while lowering a statepoint sequence (gc_statepoint,
/// gc_relocate, and gc_result). See StatepointLowering.hpp/cpp for details.
StatepointLoweringState StatepointLowering;
-private:
+private:
/// PendingExports - CopyToReg nodes that copy values to virtual registers
/// for export to other blocks need to be emitted before any terminator
/// instruction, but they have no other ordering requirements. We bunch them
@@ -189,23 +199,22 @@ private:
}
};
- typedef std::vector<CaseCluster> CaseClusterVector;
- typedef CaseClusterVector::iterator CaseClusterIt;
+ using CaseClusterVector = std::vector<CaseCluster>;
+ using CaseClusterIt = CaseClusterVector::iterator;
struct CaseBits {
- uint64_t Mask;
- MachineBasicBlock* BB;
- unsigned Bits;
+ uint64_t Mask = 0;
+ MachineBasicBlock* BB = nullptr;
+ unsigned Bits = 0;
BranchProbability ExtraProb;
+ CaseBits() = default;
CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits,
BranchProbability Prob):
- Mask(mask), BB(bb), Bits(bits), ExtraProb(Prob) { }
-
- CaseBits() : Mask(0), BB(nullptr), Bits(0) {}
+ Mask(mask), BB(bb), Bits(bits), ExtraProb(Prob) {}
};
- typedef std::vector<CaseBits> CaseBitsVector;
+ using CaseBitsVector = std::vector<CaseBits>;
/// Sort Clusters and merge adjacent cases.
void sortAndRangeify(CaseClusterVector &Clusters);
@@ -214,15 +223,6 @@ private:
/// SelectionDAGBuilder and SDISel for the code generation of additional basic
/// blocks needed by multi-case switch statements.
struct CaseBlock {
- CaseBlock(ISD::CondCode cc, const Value *cmplhs, const Value *cmprhs,
- const Value *cmpmiddle, MachineBasicBlock *truebb,
- MachineBasicBlock *falsebb, MachineBasicBlock *me,
- BranchProbability trueprob = BranchProbability::getUnknown(),
- BranchProbability falseprob = BranchProbability::getUnknown())
- : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs),
- TrueBB(truebb), FalseBB(falsebb), ThisBB(me), TrueProb(trueprob),
- FalseProb(falseprob) {}
-
// CC - the condition code to use for the case block's setcc node
ISD::CondCode CC;
@@ -237,14 +237,25 @@ private:
// ThisBB - the block into which to emit the code for the setcc and branches
MachineBasicBlock *ThisBB;
+ /// The debug location of the instruction this CaseBlock was
+ /// produced from.
+ SDLoc DL;
+
// TrueProb/FalseProb - branch weights.
BranchProbability TrueProb, FalseProb;
+
+ CaseBlock(ISD::CondCode cc, const Value *cmplhs, const Value *cmprhs,
+ const Value *cmpmiddle, MachineBasicBlock *truebb,
+ MachineBasicBlock *falsebb, MachineBasicBlock *me,
+ SDLoc dl,
+ BranchProbability trueprob = BranchProbability::getUnknown(),
+ BranchProbability falseprob = BranchProbability::getUnknown())
+ : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs),
+ TrueBB(truebb), FalseBB(falsebb), ThisBB(me), DL(dl),
+ TrueProb(trueprob), FalseProb(falseprob) {}
};
struct JumpTable {
- JumpTable(unsigned R, unsigned J, MachineBasicBlock *M,
- MachineBasicBlock *D): Reg(R), JTI(J), MBB(M), Default(D) {}
-
/// Reg - the virtual register containing the index of the jump table entry
//. to jump to.
unsigned Reg;
@@ -255,39 +266,38 @@ private:
/// Default - the MBB of the default bb, which is a successor of the range
/// check MBB. This is when updating PHI nodes in successors.
MachineBasicBlock *Default;
+
+ JumpTable(unsigned R, unsigned J, MachineBasicBlock *M,
+ MachineBasicBlock *D): Reg(R), JTI(J), MBB(M), Default(D) {}
};
struct JumpTableHeader {
- JumpTableHeader(APInt F, APInt L, const Value *SV, MachineBasicBlock *H,
- bool E = false)
- : First(std::move(F)), Last(std::move(L)), SValue(SV), HeaderBB(H),
- Emitted(E) {}
APInt First;
APInt Last;
const Value *SValue;
MachineBasicBlock *HeaderBB;
bool Emitted;
+
+ JumpTableHeader(APInt F, APInt L, const Value *SV, MachineBasicBlock *H,
+ bool E = false)
+ : First(std::move(F)), Last(std::move(L)), SValue(SV), HeaderBB(H),
+ Emitted(E) {}
};
- typedef std::pair<JumpTableHeader, JumpTable> JumpTableBlock;
+ using JumpTableBlock = std::pair<JumpTableHeader, JumpTable>;
struct BitTestCase {
- BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr,
- BranchProbability Prob):
- Mask(M), ThisBB(T), TargetBB(Tr), ExtraProb(Prob) { }
uint64_t Mask;
MachineBasicBlock *ThisBB;
MachineBasicBlock *TargetBB;
BranchProbability ExtraProb;
+
+ BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr,
+ BranchProbability Prob):
+ Mask(M), ThisBB(T), TargetBB(Tr), ExtraProb(Prob) {}
};
- typedef SmallVector<BitTestCase, 3> BitTestInfo;
+ using BitTestInfo = SmallVector<BitTestCase, 3>;
struct BitTestBlock {
- BitTestBlock(APInt F, APInt R, const Value *SV, unsigned Rg, MVT RgVT,
- bool E, bool CR, MachineBasicBlock *P, MachineBasicBlock *D,
- BitTestInfo C, BranchProbability Pr)
- : First(std::move(F)), Range(std::move(R)), SValue(SV), Reg(Rg),
- RegVT(RgVT), Emitted(E), ContiguousRange(CR), Parent(P), Default(D),
- Cases(std::move(C)), Prob(Pr) {}
APInt First;
APInt Range;
const Value *SValue;
@@ -300,6 +310,13 @@ private:
BitTestInfo Cases;
BranchProbability Prob;
BranchProbability DefaultProb;
+
+ BitTestBlock(APInt F, APInt R, const Value *SV, unsigned Rg, MVT RgVT,
+ bool E, bool CR, MachineBasicBlock *P, MachineBasicBlock *D,
+ BitTestInfo C, BranchProbability Pr)
+ : First(std::move(F)), Range(std::move(R)), SValue(SV), Reg(Rg),
+ RegVT(RgVT), Emitted(E), ContiguousRange(CR), Parent(P), Default(D),
+ Cases(std::move(C)), Prob(Pr) {}
};
/// Return the range of value in [First..Last].
@@ -336,7 +353,7 @@ private:
const ConstantInt *LT;
BranchProbability DefaultProb;
};
- typedef SmallVector<SwitchWorkListItem, 4> SwitchWorkList;
+ using SwitchWorkList = SmallVector<SwitchWorkListItem, 4>;
/// Determine the rank by weight of CC in [First,Last]. If CC has more weight
/// than each cluster in the range, its rank is 0.
@@ -352,6 +369,10 @@ private:
MachineBasicBlock *SwitchMBB,
MachineBasicBlock *DefaultMBB);
+ /// Peel the top probability case if it exceeds the threshold
+ MachineBasicBlock *peelDominantCaseCluster(const SwitchInst &SI,
+ CaseClusterVector &Clusters,
+ BranchProbability &PeeledCaseProb);
/// A class which encapsulates all of the information needed to generate a
/// stack protector check and signals to isel via its state being initialized
@@ -466,8 +487,7 @@ private:
/// the same function, use the same failure basic block).
class StackProtectorDescriptor {
public:
- StackProtectorDescriptor()
- : ParentMBB(nullptr), SuccessMBB(nullptr), FailureMBB(nullptr) {}
+ StackProtectorDescriptor() = default;
/// Returns true if all fields of the stack protector descriptor are
/// initialized implying that we should/are ready to emit a stack protector.
@@ -533,15 +553,15 @@ private:
/// replace it with a compare/branch to the successor mbbs
/// SuccessMBB/FailureMBB depending on whether or not the stack protector
/// was violated.
- MachineBasicBlock *ParentMBB;
+ MachineBasicBlock *ParentMBB = nullptr;
/// A basic block visited on stack protector check success that contains the
/// terminators of ParentMBB.
- MachineBasicBlock *SuccessMBB;
+ MachineBasicBlock *SuccessMBB = nullptr;
/// This basic block visited on stack protector check failure that will
/// contain a call to __stack_chk_fail().
- MachineBasicBlock *FailureMBB;
+ MachineBasicBlock *FailureMBB = nullptr;
/// Add a successor machine basic block to ParentMBB. If the successor mbb
/// has not been created yet (i.e. if SuccMBB = 0), then the machine basic
@@ -554,25 +574,29 @@ private:
private:
const TargetMachine &TM;
+
public:
/// Lowest valid SDNodeOrder. The special case 0 is reserved for scheduling
/// nodes without a corresponding SDNode.
static const unsigned LowestSDNodeOrder = 1;
SelectionDAG &DAG;
- const DataLayout *DL;
- AliasAnalysis *AA;
+ const DataLayout *DL = nullptr;
+ AliasAnalysis *AA = nullptr;
const TargetLibraryInfo *LibInfo;
/// SwitchCases - Vector of CaseBlock structures used to communicate
/// SwitchInst code generation information.
std::vector<CaseBlock> SwitchCases;
+
/// JTCases - Vector of JumpTable structures used to communicate
/// SwitchInst code generation information.
std::vector<JumpTableBlock> JTCases;
+
/// BitTestCases - Vector of BitTestBlock structures used to communicate
/// SwitchInst code generation information.
std::vector<BitTestBlock> BitTestCases;
+
/// A StackProtectorDescriptor structure used to communicate stack protector
/// information in between SelectBasicBlock and FinishBasicBlock.
StackProtectorDescriptor SPDescriptor;
@@ -589,22 +613,19 @@ public:
GCFunctionInfo *GFI;
/// LPadToCallSiteMap - Map a landing pad to the call site indexes.
- DenseMap<MachineBasicBlock*, SmallVector<unsigned, 4> > LPadToCallSiteMap;
+ DenseMap<MachineBasicBlock *, SmallVector<unsigned, 4>> LPadToCallSiteMap;
/// HasTailCall - This is set to true if a call in the current
/// block has been translated as a tail call. In this case,
/// no subsequent DAG nodes should be created.
- ///
- bool HasTailCall;
+ bool HasTailCall = false;
LLVMContext *Context;
SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo,
CodeGenOpt::Level ol)
- : CurInst(nullptr), SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()),
- DAG(dag), DL(nullptr), AA(nullptr), FuncInfo(funcinfo),
- HasTailCall(false) {
- }
+ : SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()), DAG(dag),
+ FuncInfo(funcinfo) {}
void init(GCFunctionInfo *gfi, AliasAnalysis *AA,
const TargetLibraryInfo *li);
@@ -653,6 +674,7 @@ public:
// resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
// generate the debug data structures now that we've seen its definition.
void resolveDanglingDebugInfo(const Value *V, SDValue Val);
+
SDValue getValue(const Value *V);
bool findValue(const Value *V) const;
@@ -923,13 +945,12 @@ private:
void emitInlineAsmError(ImmutableCallSite CS, const Twine &Message);
- /// EmitFuncArgumentDbgValue - If V is an function argument then create
- /// corresponding DBG_VALUE machine instruction for it now. At the end of
- /// instruction selection, they will be inserted to the entry BB.
+ /// If V is an function argument then create corresponding DBG_VALUE machine
+ /// instruction for it now. At the end of instruction selection, they will be
+ /// inserted to the entry BB.
bool EmitFuncArgumentDbgValue(const Value *V, DILocalVariable *Variable,
DIExpression *Expr, DILocation *DL,
- int64_t Offset, bool IsDbgDeclare,
- const SDValue &N);
+ bool IsDbgDeclare, const SDValue &N);
/// Return the next block after MBB, or nullptr if there is none.
MachineBasicBlock *NextBlock(MachineBasicBlock *MBB);
@@ -940,8 +961,8 @@ private:
/// Return the appropriate SDDbgValue based on N.
SDDbgValue *getDbgValue(SDValue N, DILocalVariable *Variable,
- DIExpression *Expr, int64_t Offset,
- const DebugLoc &dl, unsigned DbgSDNodeOrder);
+ DIExpression *Expr, const DebugLoc &dl,
+ unsigned DbgSDNodeOrder);
};
/// RegsForValue - This struct represents the registers (physical or virtual)
@@ -978,13 +999,11 @@ struct RegsForValue {
/// Records if this value needs to be treated in an ABI dependant manner,
/// different to normal type legalization.
- bool IsABIMangled;
-
- RegsForValue();
+ bool IsABIMangled = false;
+ RegsForValue() = default;
RegsForValue(const SmallVector<unsigned, 4> &regs, MVT regvt, EVT valuevt,
bool IsABIMangledValue = false);
-
RegsForValue(LLVMContext &Context, const TargetLowering &TLI,
const DataLayout &DL, unsigned Reg, Type *Ty,
bool IsABIMangledValue = false);
@@ -1024,4 +1043,4 @@ struct RegsForValue {
} // end namespace llvm
-#endif
+#endif // LLVM_LIB_CODEGEN_SELECTIONDAG_SELECTIONDAGBUILDER_H
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 3dd58975b1f10..dd30dc16378c4 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -1,4 +1,4 @@
-//===-- SelectionDAGDumper.cpp - Implement SelectionDAG::dump() -----------===//
+//===- SelectionDAGDumper.cpp - Implement SelectionDAG::dump() ------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,24 +11,42 @@
//
//===----------------------------------------------------------------------===//
-#include "ScheduleDAGSDNodes.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Printable.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include <cstdint>
+#include <iterator>
+
using namespace llvm;
static cl::opt<bool>
@@ -385,6 +403,7 @@ static Printable PrintNodeId(const SDNode &Node) {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void SDNode::dump() const { dump(nullptr); }
+
LLVM_DUMP_METHOD void SDNode::dump(const SelectionDAG *G) const {
print(dbgs(), G);
dbgs() << '\n';
@@ -402,6 +421,36 @@ void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const {
}
void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
+ if (getFlags().hasNoUnsignedWrap())
+ OS << " nuw";
+
+ if (getFlags().hasNoSignedWrap())
+ OS << " nsw";
+
+ if (getFlags().hasExact())
+ OS << " exact";
+
+ if (getFlags().hasUnsafeAlgebra())
+ OS << " unsafe";
+
+ if (getFlags().hasNoNaNs())
+ OS << " nnan";
+
+ if (getFlags().hasNoInfs())
+ OS << " ninf";
+
+ if (getFlags().hasNoSignedZeros())
+ OS << " nsz";
+
+ if (getFlags().hasAllowReciprocal())
+ OS << " arcp";
+
+ if (getFlags().hasAllowContract())
+ OS << " contract";
+
+ if (getFlags().hasVectorReduction())
+ OS << " vector-reduction";
+
if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) {
if (!MN->memoperands_empty()) {
OS << "<";
@@ -429,9 +478,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
} else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) {
OS << '<' << CSDN->getAPIntValue() << '>';
} else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) {
- if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle())
+ if (&CSDN->getValueAPF().getSemantics() == &APFloat::IEEEsingle())
OS << '<' << CSDN->getValueAPF().convertToFloat() << '>';
- else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble())
+ else if (&CSDN->getValueAPF().getSemantics() == &APFloat::IEEEdouble())
OS << '<' << CSDN->getValueAPF().convertToDouble() << '>';
else {
OS << "<APFloat(";
@@ -479,7 +528,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
OS << LBB->getName() << " ";
OS << (const void*)BBDN->getBasicBlock() << ">";
} else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) {
- OS << ' ' << PrintReg(R->getReg(),
+ OS << ' ' << printReg(R->getReg(),
G ? G->getSubtarget().getRegisterInfo() : nullptr);
} else if (const ExternalSymbolSDNode *ES =
dyn_cast<ExternalSymbolSDNode>(this)) {
@@ -640,7 +689,8 @@ static bool printOperand(raw_ostream &OS, const SelectionDAG *G,
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-typedef SmallPtrSet<const SDNode *, 32> VisitedSDNodeSet;
+using VisitedSDNodeSet = SmallPtrSet<const SDNode *, 32>;
+
static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent,
const SelectionDAG *G, VisitedSDNodeSet &once) {
if (!once.insert(N).second) // If we've been here before, return now.
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index bdf57e8058426..18f6997ef83c3 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -11,6 +11,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/SelectionDAGISel.h"
#include "ScheduleDAGSDNodes.h"
#include "SelectionDAGBuilder.h"
#include "llvm/ADT/APInt.h"
@@ -26,7 +27,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
@@ -45,9 +46,12 @@
#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/StackProtector.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
@@ -80,13 +84,9 @@
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
-#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include <algorithm>
#include <cassert>
@@ -212,7 +212,7 @@ namespace llvm {
IS.OptLevel = NewOptLevel;
IS.TM.setOptLevel(NewOptLevel);
DEBUG(dbgs() << "\nChanging optimization level for Function "
- << IS.MF->getFunction()->getName() << "\n");
+ << IS.MF->getFunction().getName() << "\n");
DEBUG(dbgs() << "\tBefore: -O" << SavedOptLevel
<< " ; After: -O" << NewOptLevel << "\n");
SavedFastISel = IS.TM.Options.EnableFastISel;
@@ -228,7 +228,7 @@ namespace llvm {
if (IS.OptLevel == SavedOptLevel)
return;
DEBUG(dbgs() << "\nRestoring optimization level for Function "
- << IS.MF->getFunction()->getName() << "\n");
+ << IS.MF->getFunction().getName() << "\n");
DEBUG(dbgs() << "\tBefore: -O" << IS.OptLevel
<< " ; After: -O" << SavedOptLevel << "\n");
IS.OptLevel = SavedOptLevel;
@@ -384,7 +384,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
assert((!EnableFastISelAbort || TM.Options.EnableFastISel) &&
"-fast-isel-abort > 0 requires -fast-isel");
- const Function &Fn = *mf.getFunction();
+ const Function &Fn = mf.getFunction();
MF = &mf;
// Reset the target options before resetting the optimization
@@ -414,7 +414,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
SplitCriticalSideEffectEdges(const_cast<Function &>(Fn), DT, LI);
- CurDAG->init(*MF, *ORE);
+ CurDAG->init(*MF, *ORE, this);
FuncInfo->set(Fn, *MF, CurDAG);
// Now get the optional analyzes if we want to.
@@ -494,10 +494,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
DenseMap<unsigned, unsigned> LiveInMap;
if (!FuncInfo->ArgDbgValues.empty())
- for (MachineRegisterInfo::livein_iterator LI = RegInfo->livein_begin(),
- E = RegInfo->livein_end(); LI != E; ++LI)
- if (LI->second)
- LiveInMap.insert(std::make_pair(LI->first, LI->second));
+ for (std::pair<unsigned, unsigned> LI : RegInfo->liveins())
+ if (LI.second)
+ LiveInMap.insert(LI);
// Insert DBG_VALUE instructions for function arguments to the entry block.
for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) {
@@ -529,12 +528,14 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
const MDNode *Expr = MI->getDebugExpression();
DebugLoc DL = MI->getDebugLoc();
bool IsIndirect = MI->isIndirectDebugValue();
- unsigned Offset = IsIndirect ? MI->getOperand(1).getImm() : 0;
+ if (IsIndirect)
+ assert(MI->getOperand(1).getImm() == 0 &&
+ "DBG_VALUE with nonzero offset");
assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
// Def is never a terminator here, so it is ok to increment InsertPos.
BuildMI(*EntryMBB, ++InsertPos, DL, TII->get(TargetOpcode::DBG_VALUE),
- IsIndirect, LDI->second, Offset, Variable, Expr);
+ IsIndirect, LDI->second, Variable, Expr);
// If this vreg is directly copied into an exported register then
// that COPY instructions also need DBG_VALUE, if it is the only
@@ -556,7 +557,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// declared, rather than whatever is attached to CopyUseMI.
MachineInstr *NewMI =
BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect,
- CopyUseMI->getOperand(0).getReg(), Offset, Variable, Expr);
+ CopyUseMI->getOperand(0).getReg(), Variable, Expr);
MachineBasicBlock::iterator Pos = CopyUseMI;
EntryMBB->insertAfter(Pos, NewMI);
}
@@ -644,6 +645,9 @@ static void reportFastISelFailure(MachineFunction &MF,
void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
BasicBlock::const_iterator End,
bool &HadTailCall) {
+ // Allow creating illegal types during DAG building for the basic block.
+ CurDAG->NewNodesMustHaveLegalTypes = false;
+
// Lower the instructions. If a call is emitted as a tail call, cease emitting
// nodes for this block.
for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I) {
@@ -726,8 +730,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
BlockName =
(MF->getName() + ":" + FuncInfo->MBB->getBasicBlock()->getName()).str();
}
- DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber
- << " '" << BlockName << "'\n"; CurDAG->dump());
+ DEBUG(dbgs() << "Initial selection DAG: " << printMBBReference(*FuncInfo->MBB)
+ << " '" << BlockName << "'\n";
+ CurDAG->dump());
if (ViewDAGCombine1 && MatchFilterBB)
CurDAG->viewGraph("dag-combine1 input for " + BlockName);
@@ -739,8 +744,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Combine(BeforeLegalizeTypes, AA, OptLevel);
}
- DEBUG(dbgs() << "Optimized lowered selection DAG: BB#" << BlockNumber
- << " '" << BlockName << "'\n"; CurDAG->dump());
+ DEBUG(dbgs() << "Optimized lowered selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
// Second step, hack on the DAG until it only uses operations and types that
// the target supports.
@@ -754,8 +761,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
Changed = CurDAG->LegalizeTypes();
}
- DEBUG(dbgs() << "Type-legalized selection DAG: BB#" << BlockNumber
- << " '" << BlockName << "'\n"; CurDAG->dump());
+ DEBUG(dbgs() << "Type-legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
// Only allow creation of legal node types.
CurDAG->NewNodesMustHaveLegalTypes = true;
@@ -771,8 +780,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Combine(AfterLegalizeTypes, AA, OptLevel);
}
- DEBUG(dbgs() << "Optimized type-legalized selection DAG: BB#" << BlockNumber
- << " '" << BlockName << "'\n"; CurDAG->dump());
+ DEBUG(dbgs() << "Optimized type-legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
}
{
@@ -782,8 +793,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
}
if (Changed) {
- DEBUG(dbgs() << "Vector-legalized selection DAG: BB#" << BlockNumber
- << " '" << BlockName << "'\n"; CurDAG->dump());
+ DEBUG(dbgs() << "Vector-legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
{
NamedRegionTimer T("legalize_types2", "Type Legalization 2", GroupName,
@@ -791,8 +804,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->LegalizeTypes();
}
- DEBUG(dbgs() << "Vector/type-legalized selection DAG: BB#" << BlockNumber
- << " '" << BlockName << "'\n"; CurDAG->dump());
+ DEBUG(dbgs() << "Vector/type-legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
if (ViewDAGCombineLT && MatchFilterBB)
CurDAG->viewGraph("dag-combine-lv input for " + BlockName);
@@ -804,8 +819,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Combine(AfterLegalizeVectorOps, AA, OptLevel);
}
- DEBUG(dbgs() << "Optimized vector-legalized selection DAG: BB#"
- << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump());
+ DEBUG(dbgs() << "Optimized vector-legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
}
if (ViewLegalizeDAGs && MatchFilterBB)
@@ -817,8 +834,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Legalize();
}
- DEBUG(dbgs() << "Legalized selection DAG: BB#" << BlockNumber
- << " '" << BlockName << "'\n"; CurDAG->dump());
+ DEBUG(dbgs() << "Legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
if (ViewDAGCombine2 && MatchFilterBB)
CurDAG->viewGraph("dag-combine2 input for " + BlockName);
@@ -830,8 +849,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
CurDAG->Combine(AfterLegalizeDAG, AA, OptLevel);
}
- DEBUG(dbgs() << "Optimized legalized selection DAG: BB#" << BlockNumber
- << " '" << BlockName << "'\n"; CurDAG->dump());
+ DEBUG(dbgs() << "Optimized legalized selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
if (OptLevel != CodeGenOpt::None)
ComputeLiveOutVRegInfo();
@@ -847,8 +868,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
DoInstructionSelection();
}
- DEBUG(dbgs() << "Selected selection DAG: BB#" << BlockNumber
- << " '" << BlockName << "'\n"; CurDAG->dump());
+ DEBUG(dbgs() << "Selected selection DAG: "
+ << printMBBReference(*FuncInfo->MBB) << " '" << BlockName
+ << "'\n";
+ CurDAG->dump());
if (ViewSchedDAGs && MatchFilterBB)
CurDAG->viewGraph("scheduler input for " + BlockName);
@@ -915,9 +938,9 @@ public:
} // end anonymous namespace
void SelectionDAGISel::DoInstructionSelection() {
- DEBUG(dbgs() << "===== Instruction selection begins: BB#"
- << FuncInfo->MBB->getNumber()
- << " '" << FuncInfo->MBB->getName() << "'\n");
+ DEBUG(dbgs() << "===== Instruction selection begins: "
+ << printMBBReference(*FuncInfo->MBB) << " '"
+ << FuncInfo->MBB->getName() << "'\n");
PreprocessISelDAG();
@@ -1138,7 +1161,7 @@ static void processDbgDeclares(FunctionLoweringInfo *FuncInfo) {
// Look through casts and constant offset GEPs. These mostly come from
// inalloca.
- APInt Offset(DL.getPointerSizeInBits(0), 0);
+ APInt Offset(DL.getTypeSizeInBits(Address->getType()), 0);
Address = Address->stripAndAccumulateInBoundsConstantOffsets(DL, Offset);
// Check if the variable is a static alloca or a byval or inalloca
@@ -1177,12 +1200,7 @@ static void propagateSwiftErrorVRegs(FunctionLoweringInfo *FuncInfo) {
// For each machine basic block in reverse post order.
ReversePostOrderTraversal<MachineFunction *> RPOT(FuncInfo->MF);
- for (ReversePostOrderTraversal<MachineFunction *>::rpo_iterator
- It = RPOT.begin(),
- E = RPOT.end();
- It != E; ++It) {
- MachineBasicBlock *MBB = *It;
-
+ for (MachineBasicBlock *MBB : RPOT) {
// For each swifterror value in the function.
for(const auto *SwiftErrorVal : FuncInfo->SwiftErrorVals) {
auto Key = std::make_pair(MBB, SwiftErrorVal);
@@ -1253,6 +1271,8 @@ static void propagateSwiftErrorVRegs(FunctionLoweringInfo *FuncInfo) {
// If we don't need a phi create a copy to the upward exposed vreg.
if (!needPHI) {
assert(UpwardsUse);
+ assert(!VRegs.empty() &&
+ "No predecessors? Is the Calling Convention correct?");
unsigned DestReg = UUseVReg;
BuildMI(*MBB, MBB->getFirstNonPHI(), DLoc, TII->get(TargetOpcode::COPY),
DestReg)
@@ -1282,10 +1302,10 @@ static void propagateSwiftErrorVRegs(FunctionLoweringInfo *FuncInfo) {
}
}
-void preassignSwiftErrorRegs(const TargetLowering *TLI,
- FunctionLoweringInfo *FuncInfo,
- BasicBlock::const_iterator Begin,
- BasicBlock::const_iterator End) {
+static void preassignSwiftErrorRegs(const TargetLowering *TLI,
+ FunctionLoweringInfo *FuncInfo,
+ BasicBlock::const_iterator Begin,
+ BasicBlock::const_iterator End) {
if (!TLI->supportSwiftError() || FuncInfo->SwiftErrorVals.empty())
return;
@@ -2774,6 +2794,12 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table,
Result = !::CheckType(Table, Index, N, SDISel.TLI,
SDISel.CurDAG->getDataLayout());
return Index;
+ case SelectionDAGISel::OPC_CheckTypeRes: {
+ unsigned Res = Table[Index++];
+ Result = !::CheckType(Table, Index, N.getValue(Res), SDISel.TLI,
+ SDISel.CurDAG->getDataLayout());
+ return Index;
+ }
case SelectionDAGISel::OPC_CheckChild0Type:
case SelectionDAGISel::OPC_CheckChild1Type:
case SelectionDAGISel::OPC_CheckChild2Type:
@@ -2906,6 +2932,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
case ISD::CopyFromReg:
case ISD::CopyToReg:
case ISD::EH_LABEL:
+ case ISD::ANNOTATION_LABEL:
case ISD::LIFETIME_START:
case ISD::LIFETIME_END:
NodeToMatch->setNodeId(-1); // Mark selected.
@@ -3175,6 +3202,14 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
break;
continue;
+ case OPC_CheckTypeRes: {
+ unsigned Res = MatcherTable[MatcherIndex++];
+ if (!::CheckType(MatcherTable, MatcherIndex, N.getValue(Res), TLI,
+ CurDAG->getDataLayout()))
+ break;
+ continue;
+ }
+
case OPC_SwitchOpcode: {
unsigned CurNodeOpcode = N.getOpcode();
unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
@@ -3548,6 +3583,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
"NodeToMatch was removed partway through selection");
SelectionDAG::DAGNodeDeletedListener NDL(*CurDAG, [&](SDNode *N,
SDNode *E) {
+ CurDAG->salvageDebugInfo(*N);
auto &Chain = ChainNodesMatched;
assert((!E || !is_contained(Chain, N)) &&
"Chain node replaced during MorphNode");
@@ -3725,6 +3761,25 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
}
}
+bool SelectionDAGISel::isOrEquivalentToAdd(const SDNode *N) const {
+ assert(N->getOpcode() == ISD::OR && "Unexpected opcode");
+ auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!C)
+ return false;
+
+ // Detect when "or" is used to add an offset to a stack object.
+ if (auto *FN = dyn_cast<FrameIndexSDNode>(N->getOperand(0))) {
+ MachineFrameInfo &MFI = MF->getFrameInfo();
+ unsigned A = MFI.getObjectAlignment(FN->getIndex());
+ assert(isPowerOf2_32(A) && "Unexpected alignment");
+ int32_t Off = C->getSExtValue();
+ // If the alleged offset fits in the zero bits guaranteed by
+ // the alignment, then this or is really an add.
+ return (Off >= 0) && (((A - 1) & Off) == unsigned(Off));
+ }
+ return false;
+}
+
void SelectionDAGISel::CannotYetSelect(SDNode *N) {
std::string msg;
raw_string_ostream Msg(msg);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index 11561dfa59474..be4ab094bf495 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -16,15 +16,13 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/Constants.h"
-#include "llvm/IR/DebugInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
using namespace llvm;
#define DEBUG_TYPE "dag-printer"
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp
index 55f70f7d9fd3f..3a283bc5fdc0c 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp
@@ -1,4 +1,4 @@
-//===-- SelectionDAGTargetInfo.cpp - SelectionDAG Info --------------------===//
+//===- SelectionDAGTargetInfo.cpp - SelectionDAG Info ---------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+
using namespace llvm;
-SelectionDAGTargetInfo::~SelectionDAGTargetInfo() {}
+SelectionDAGTargetInfo::~SelectionDAGTargetInfo() = default;
diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 5d78bba86d73b..3f64b49e35556 100644
--- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -1,4 +1,4 @@
-//===-- StatepointLowering.cpp - SDAGBuilder's statepoint code -----------===//
+//===- StatepointLowering.cpp - SDAGBuilder's statepoint code -------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,21 +14,44 @@
#include "StatepointLowering.h"
#include "SelectionDAGBuilder.h"
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/GCStrategy.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Statepoint.h"
-#include "llvm/Target/TargetLowering.h"
-#include <algorithm>
+#include "llvm/IR/Type.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
+#include <tuple>
+#include <utility>
+
using namespace llvm;
#define DEBUG_TYPE "statepoint-lowering"
@@ -73,7 +96,7 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType,
NumSlotsAllocatedForStatepoints++;
MachineFrameInfo &MFI = Builder.DAG.getMachineFunction().getFrameInfo();
- unsigned SpillSize = ValueType.getSizeInBits() / 8;
+ unsigned SpillSize = ValueType.getStoreSize();
assert((SpillSize * 8) == ValueType.getSizeInBits() && "Size not in bytes?");
// First look for a previously created stack slot which is not in
@@ -200,7 +223,6 @@ static Optional<int> findPreviousSpillSlot(const Value *Val,
/// values on the stack between calls.
static void reservePreviousStackSlotForValue(const Value *IncomingValue,
SelectionDAGBuilder &Builder) {
-
SDValue Incoming = Builder.getValue(IncomingValue);
if (isa<ConstantSDNode>(Incoming) || isa<FrameIndexSDNode>(Incoming)) {
@@ -292,7 +314,6 @@ removeDuplicateGCPtrs(SmallVectorImpl<const Value *> &Bases,
static std::pair<SDValue, SDNode *> lowerCallFromStatepointLoweringInfo(
SelectionDAGBuilder::StatepointLoweringInfo &SI,
SelectionDAGBuilder &Builder, SmallVectorImpl<SDValue> &PendingExports) {
-
SDValue ReturnValue, CallEndVal;
std::tie(ReturnValue, CallEndVal) =
Builder.lowerInvokable(SI.CLI, SI.EHPadBB);
diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.h b/lib/CodeGen/SelectionDAG/StatepointLowering.h
index b043184003a09..372c82a359f6b 100644
--- a/lib/CodeGen/SelectionDAG/StatepointLowering.h
+++ b/lib/CodeGen/SelectionDAG/StatepointLowering.h
@@ -1,4 +1,4 @@
-//===-- StatepointLowering.h - SDAGBuilder's statepoint code -*- C++ -*---===//
+//===- StatepointLowering.h - SDAGBuilder's statepoint code ---*- C++ -*---===//
//
// The LLVM Compiler Infrastructure
//
@@ -16,11 +16,16 @@
#define LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallBitVector.h"
-#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include <cassert>
namespace llvm {
+
+class CallInst;
class SelectionDAGBuilder;
/// This class tracks both per-statepoint and per-selectiondag information.
@@ -30,7 +35,7 @@ class SelectionDAGBuilder;
/// works in concert with information in FunctionLoweringInfo.
class StatepointLoweringState {
public:
- StatepointLoweringState() : NextSlotToAllocate(0) {}
+ StatepointLoweringState() = default;
/// Reset all state tracking for a newly encountered safepoint. Also
/// performs some consistency checking.
@@ -69,7 +74,7 @@ public:
/// before the next statepoint. If we weren't expecting to see
/// it, we'll report an assertion.
void relocCallVisited(const CallInst &RelocCall) {
- auto I = find(PendingGCRelocateCalls, &RelocCall);
+ auto I = llvm::find(PendingGCRelocateCalls, &RelocCall);
assert(I != PendingGCRelocateCalls.end() &&
"Visited unexpected gcrelocate call");
PendingGCRelocateCalls.erase(I);
@@ -108,11 +113,12 @@ private:
SmallBitVector AllocatedStackSlots;
/// Points just beyond the last slot known to have been allocated
- unsigned NextSlotToAllocate;
+ unsigned NextSlotToAllocate = 0;
/// Keep track of pending gcrelocate calls for consistency check
SmallVector<const CallInst *, 10> PendingGCRelocateCalls;
};
+
} // end namespace llvm
#endif // LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 8652df7bbd706..58276052c10bd 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -11,7 +11,7 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Target/TargetLowering.h"
+#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/CallingConvLower.h"
@@ -20,6 +20,9 @@
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLoweringObjectFile.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GlobalVariable.h"
@@ -29,10 +32,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
#include <cctype>
using namespace llvm;
@@ -52,11 +52,11 @@ bool TargetLowering::isPositionIndependent() const {
/// so, it sets Chain to the input chain of the tail call.
bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
SDValue &Chain) const {
- const Function *F = DAG.getMachineFunction().getFunction();
+ const Function &F = DAG.getMachineFunction().getFunction();
// Conservatively require the attributes of the call to match those of
// the return. Ignore noalias because it doesn't affect the call sequence.
- AttributeList CallerAttrs = F->getAttributes();
+ AttributeList CallerAttrs = F.getAttributes();
if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
.removeAttribute(Attribute::NoAlias)
.hasAttributes())
@@ -408,7 +408,7 @@ bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
// Search for the smallest integer type with free casts to and from
// Op's type. For expedience, just check power-of-2 integer types.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- unsigned DemandedSize = BitWidth - Demanded.countLeadingZeros();
+ unsigned DemandedSize = Demanded.getActiveBits();
unsigned SmallVTBits = DemandedSize;
if (!isPowerOf2_32(SmallVTBits))
SmallVTBits = NextPowerOf2(SmallVTBits);
@@ -421,9 +421,8 @@ bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
Op.getOpcode(), dl, SmallVT,
DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
- bool NeedZext = DemandedSize > SmallVTBits;
- SDValue Z = DAG.getNode(NeedZext ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND,
- dl, Op.getValueType(), X);
+ assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
+ SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), X);
return TLO.CombineTo(Op, Z);
}
}
@@ -459,7 +458,7 @@ TargetLowering::SimplifyDemandedBits(SDNode *User, unsigned OpIdx,
// If Old has more than one use then it must be Op, because the
// AssumeSingleUse flag is not propogated to recursive calls of
// SimplifyDemanded bits, so the only node with multiple use that
- // it will attempt to combine will be opt.
+ // it will attempt to combine will be Op.
assert(TLO.Old == Op);
SmallVector <SDValue, 4> NewOps;
@@ -470,7 +469,7 @@ TargetLowering::SimplifyDemandedBits(SDNode *User, unsigned OpIdx,
}
NewOps.push_back(User->getOperand(i));
}
- TLO.DAG.UpdateNodeOperands(User, NewOps);
+ User = TLO.DAG.UpdateNodeOperands(User, NewOps);
// Op has less users now, so we may be able to perform additional combines
// with it.
DCI.AddToWorklist(Op.getNode());
@@ -480,7 +479,7 @@ TargetLowering::SimplifyDemandedBits(SDNode *User, unsigned OpIdx,
return true;
}
-bool TargetLowering::SimplifyDemandedBits(SDValue Op, APInt &DemandedMask,
+bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -517,6 +516,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// Don't know anything.
Known = KnownBits(BitWidth);
+ if (Op.getOpcode() == ISD::Constant) {
+ // We know all of the bits for a constant!
+ Known.One = cast<ConstantSDNode>(Op)->getAPIntValue();
+ Known.Zero = ~Known.One;
+ return false;
+ }
+
// Other users may use these bits.
if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) {
if (Depth != 0) {
@@ -539,11 +545,6 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
KnownBits Known2, KnownOut;
switch (Op.getOpcode()) {
- case ISD::Constant:
- // We know all of the bits for a constant!
- Known.One = cast<ConstantSDNode>(Op)->getAPIntValue();
- Known.Zero = ~Known.One;
- return false; // Don't fall through, will infinitely loop.
case ISD::BUILD_VECTOR:
// Collect the known bits that are shared by every constant vector element.
Known.Zero.setAllBits(); Known.One.setAllBits();
@@ -780,33 +781,38 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
break;
}
case ISD::SHL:
- if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
- unsigned ShAmt = SA->getZExtValue();
+ if (ConstantSDNode *SA = isConstOrConstSplat(Op.getOperand(1))) {
SDValue InOp = Op.getOperand(0);
// If the shift count is an invalid immediate, don't do anything.
- if (ShAmt >= BitWidth)
+ if (SA->getAPIntValue().uge(BitWidth))
break;
+ unsigned ShAmt = SA->getZExtValue();
+
// If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
// single shift. We can do this if the bottom bits (which are shifted
// out) are never demanded.
- if (InOp.getOpcode() == ISD::SRL &&
- isa<ConstantSDNode>(InOp.getOperand(1))) {
- if (ShAmt && (NewMask & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) {
- unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue();
- unsigned Opc = ISD::SHL;
- int Diff = ShAmt-C1;
- if (Diff < 0) {
- Diff = -Diff;
- Opc = ISD::SRL;
- }
+ if (InOp.getOpcode() == ISD::SRL) {
+ if (ConstantSDNode *SA2 = isConstOrConstSplat(InOp.getOperand(1))) {
+ if (ShAmt && (NewMask & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) {
+ if (SA2->getAPIntValue().ult(BitWidth)) {
+ unsigned C1 = SA2->getZExtValue();
+ unsigned Opc = ISD::SHL;
+ int Diff = ShAmt-C1;
+ if (Diff < 0) {
+ Diff = -Diff;
+ Opc = ISD::SRL;
+ }
- SDValue NewSA =
- TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType());
- EVT VT = Op.getValueType();
- return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
- InOp.getOperand(0), NewSA));
+ SDValue NewSA =
+ TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType());
+ EVT VT = Op.getValueType();
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
+ InOp.getOperand(0),
+ NewSA));
+ }
+ }
}
}
@@ -818,7 +824,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (InOp.getNode()->getOpcode() == ISD::ANY_EXTEND) {
SDValue InnerOp = InOp.getOperand(0);
EVT InnerVT = InnerOp.getValueType();
- unsigned InnerBits = InnerVT.getSizeInBits();
+ unsigned InnerBits = InnerVT.getScalarSizeInBits();
if (ShAmt < InnerBits && NewMask.getActiveBits() <= InnerBits &&
isTypeDesirableForOp(ISD::SHL, InnerVT)) {
EVT ShTy = getShiftAmountTy(InnerVT, DL);
@@ -837,45 +843,42 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// (shl (anyext x), c2-c1). This requires that the bottom c1 bits
// aren't demanded (as above) and that the shifted upper c1 bits of
// x aren't demanded.
- if (InOp.hasOneUse() &&
- InnerOp.getOpcode() == ISD::SRL &&
- InnerOp.hasOneUse() &&
- isa<ConstantSDNode>(InnerOp.getOperand(1))) {
- unsigned InnerShAmt = cast<ConstantSDNode>(InnerOp.getOperand(1))
- ->getZExtValue();
- if (InnerShAmt < ShAmt &&
- InnerShAmt < InnerBits &&
- NewMask.getActiveBits() <= (InnerBits - InnerShAmt + ShAmt) &&
- NewMask.countTrailingZeros() >= ShAmt) {
- SDValue NewSA =
- TLO.DAG.getConstant(ShAmt - InnerShAmt, dl,
- Op.getOperand(1).getValueType());
- EVT VT = Op.getValueType();
- SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
- InnerOp.getOperand(0));
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT,
- NewExt, NewSA));
+ if (InOp.hasOneUse() && InnerOp.getOpcode() == ISD::SRL &&
+ InnerOp.hasOneUse()) {
+ if (ConstantSDNode *SA2 = isConstOrConstSplat(InnerOp.getOperand(1))) {
+ unsigned InnerShAmt = SA2->getLimitedValue(InnerBits);
+ if (InnerShAmt < ShAmt &&
+ InnerShAmt < InnerBits &&
+ NewMask.getActiveBits() <= (InnerBits - InnerShAmt + ShAmt) &&
+ NewMask.countTrailingZeros() >= ShAmt) {
+ SDValue NewSA =
+ TLO.DAG.getConstant(ShAmt - InnerShAmt, dl,
+ Op.getOperand(1).getValueType());
+ EVT VT = Op.getValueType();
+ SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
+ InnerOp.getOperand(0));
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT,
+ NewExt, NewSA));
+ }
}
}
}
- Known.Zero <<= SA->getZExtValue();
- Known.One <<= SA->getZExtValue();
+ Known.Zero <<= ShAmt;
+ Known.One <<= ShAmt;
// low bits known zero.
- Known.Zero.setLowBits(SA->getZExtValue());
+ Known.Zero.setLowBits(ShAmt);
}
break;
case ISD::SRL:
- if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
- EVT VT = Op.getValueType();
- unsigned ShAmt = SA->getZExtValue();
- unsigned VTSize = VT.getSizeInBits();
+ if (ConstantSDNode *SA = isConstOrConstSplat(Op.getOperand(1))) {
SDValue InOp = Op.getOperand(0);
// If the shift count is an invalid immediate, don't do anything.
- if (ShAmt >= BitWidth)
+ if (SA->getAPIntValue().uge(BitWidth))
break;
+ unsigned ShAmt = SA->getZExtValue();
APInt InDemandedMask = (NewMask << ShAmt);
// If the shift is exact, then it does demand the low bits (and knows that
@@ -886,21 +889,27 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
// single shift. We can do this if the top bits (which are shifted out)
// are never demanded.
- if (InOp.getOpcode() == ISD::SHL &&
- isa<ConstantSDNode>(InOp.getOperand(1))) {
- if (ShAmt && (NewMask & APInt::getHighBitsSet(VTSize, ShAmt)) == 0) {
- unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue();
- unsigned Opc = ISD::SRL;
- int Diff = ShAmt-C1;
- if (Diff < 0) {
- Diff = -Diff;
- Opc = ISD::SHL;
- }
+ if (InOp.getOpcode() == ISD::SHL) {
+ if (ConstantSDNode *SA2 = isConstOrConstSplat(InOp.getOperand(1))) {
+ if (ShAmt &&
+ (NewMask & APInt::getHighBitsSet(BitWidth, ShAmt)) == 0) {
+ if (SA2->getAPIntValue().ult(BitWidth)) {
+ unsigned C1 = SA2->getZExtValue();
+ unsigned Opc = ISD::SRL;
+ int Diff = ShAmt-C1;
+ if (Diff < 0) {
+ Diff = -Diff;
+ Opc = ISD::SHL;
+ }
- SDValue NewSA =
- TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType());
- return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
- InOp.getOperand(0), NewSA));
+ SDValue NewSA =
+ TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType());
+ EVT VT = Op.getValueType();
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
+ InOp.getOperand(0),
+ NewSA));
+ }
+ }
}
}
@@ -924,14 +933,14 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(),
Op.getOperand(0), Op.getOperand(1)));
- if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+ if (ConstantSDNode *SA = isConstOrConstSplat(Op.getOperand(1))) {
EVT VT = Op.getValueType();
- unsigned ShAmt = SA->getZExtValue();
// If the shift count is an invalid immediate, don't do anything.
- if (ShAmt >= BitWidth)
+ if (SA->getAPIntValue().uge(BitWidth))
break;
+ unsigned ShAmt = SA->getZExtValue();
APInt InDemandedMask = (NewMask << ShAmt);
// If the shift is exact, then it does demand the low bits (and knows that
@@ -979,15 +988,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
break;
case ISD::SIGN_EXTEND_INREG: {
EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ unsigned ExVTBits = ExVT.getScalarSizeInBits();
- APInt MsbMask = APInt::getHighBitsSet(BitWidth, 1);
// If we only care about the highest bit, don't bother shifting right.
- if (MsbMask == NewMask) {
- unsigned ShAmt = ExVT.getScalarSizeInBits();
+ if (NewMask.isSignMask()) {
SDValue InOp = Op.getOperand(0);
- unsigned VTBits = Op->getValueType(0).getScalarSizeInBits();
bool AlreadySignExtended =
- TLO.DAG.ComputeNumSignBits(InOp) >= VTBits-ShAmt+1;
+ TLO.DAG.ComputeNumSignBits(InOp) >= BitWidth-ExVTBits+1;
// However if the input is already sign extended we expect the sign
// extension to be dropped altogether later and do not simplify.
if (!AlreadySignExtended) {
@@ -997,7 +1004,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL);
- SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, dl,
+ SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ExVTBits, dl,
ShiftAmtTy);
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
Op.getValueType(), InOp,
@@ -1005,26 +1012,15 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
}
}
- // Sign extension. Compute the demanded bits in the result that are not
- // present in the input.
- APInt NewBits =
- APInt::getHighBitsSet(BitWidth,
- BitWidth - ExVT.getScalarSizeInBits());
-
// If none of the extended bits are demanded, eliminate the sextinreg.
- if ((NewBits & NewMask) == 0)
+ if (NewMask.getActiveBits() <= ExVTBits)
return TLO.CombineTo(Op, Op.getOperand(0));
- APInt InSignBit =
- APInt::getSignMask(ExVT.getScalarSizeInBits()).zext(BitWidth);
- APInt InputDemandedBits =
- APInt::getLowBitsSet(BitWidth,
- ExVT.getScalarSizeInBits()) &
- NewMask;
+ APInt InputDemandedBits = NewMask.getLoBits(ExVTBits);
// Since the sign extended bits are demanded, we know that the sign
// bit is demanded.
- InputDemandedBits |= InSignBit;
+ InputDemandedBits.setBit(ExVTBits - 1);
if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits,
Known, TLO, Depth+1))
@@ -1035,16 +1031,17 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// top bits of the result.
// If the input sign bit is known zero, convert this into a zero extension.
- if (Known.Zero.intersects(InSignBit))
+ if (Known.Zero[ExVTBits - 1])
return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(
Op.getOperand(0), dl, ExVT.getScalarType()));
- if (Known.One.intersects(InSignBit)) { // Input sign bit known set
- Known.One |= NewBits;
- Known.Zero &= ~NewBits;
+ APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
+ if (Known.One[ExVTBits - 1]) { // Input sign bit known set
+ Known.One.setBitsFrom(ExVTBits);
+ Known.Zero &= Mask;
} else { // Input sign bit unknown
- Known.Zero &= ~NewBits;
- Known.One &= ~NewBits;
+ Known.Zero &= Mask;
+ Known.One &= Mask;
}
break;
}
@@ -1072,61 +1069,47 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
}
case ISD::ZERO_EXTEND: {
unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
- APInt InMask = NewMask.trunc(OperandBitWidth);
// If none of the top bits are demanded, convert this into an any_extend.
- APInt NewBits =
- APInt::getHighBitsSet(BitWidth, BitWidth - OperandBitWidth) & NewMask;
- if (!NewBits.intersects(NewMask))
+ if (NewMask.getActiveBits() <= OperandBitWidth)
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl,
Op.getValueType(),
Op.getOperand(0)));
+ APInt InMask = NewMask.trunc(OperandBitWidth);
if (SimplifyDemandedBits(Op.getOperand(0), InMask, Known, TLO, Depth+1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known = Known.zext(BitWidth);
- Known.Zero |= NewBits;
+ Known.Zero.setBitsFrom(OperandBitWidth);
break;
}
case ISD::SIGN_EXTEND: {
- EVT InVT = Op.getOperand(0).getValueType();
- unsigned InBits = InVT.getScalarSizeInBits();
- APInt InMask = APInt::getLowBitsSet(BitWidth, InBits);
- APInt InSignBit = APInt::getOneBitSet(BitWidth, InBits - 1);
- APInt NewBits = ~InMask & NewMask;
+ unsigned InBits = Op.getOperand(0).getValueType().getScalarSizeInBits();
// If none of the top bits are demanded, convert this into an any_extend.
- if (NewBits == 0)
+ if (NewMask.getActiveBits() <= InBits)
return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl,
Op.getValueType(),
Op.getOperand(0)));
// Since some of the sign extended bits are demanded, we know that the sign
// bit is demanded.
- APInt InDemandedBits = InMask & NewMask;
- InDemandedBits |= InSignBit;
- InDemandedBits = InDemandedBits.trunc(InBits);
+ APInt InDemandedBits = NewMask.trunc(InBits);
+ InDemandedBits.setBit(InBits - 1);
if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, Known, TLO,
Depth+1))
return true;
- Known = Known.zext(BitWidth);
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ // If the sign bit is known one, the top bits match.
+ Known = Known.sext(BitWidth);
// If the sign bit is known zero, convert this to a zero extend.
- if (Known.Zero.intersects(InSignBit))
+ if (Known.isNonNegative())
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl,
Op.getValueType(),
Op.getOperand(0)));
-
- // If the sign bit is known one, the top bits match.
- if (Known.One.intersects(InSignBit)) {
- Known.One |= NewBits;
- assert((Known.Zero & NewBits) == 0);
- } else { // Otherwise, top bits aren't known.
- assert((Known.One & NewBits) == 0);
- assert((Known.Zero & NewBits) == 0);
- }
break;
}
case ISD::ANY_EXTEND: {
@@ -1305,6 +1288,19 @@ void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
Known.resetAll();
}
+void TargetLowering::computeKnownBitsForFrameIndex(const SDValue Op,
+ KnownBits &Known,
+ const APInt &DemandedElts,
+ const SelectionDAG &DAG,
+ unsigned Depth) const {
+ assert(isa<FrameIndexSDNode>(Op) && "expected FrameIndex");
+
+ if (unsigned Align = DAG.InferPtrAlignment(Op)) {
+ // The low bits are known zero if the pointer is aligned.
+ Known.Zero.setLowBits(Log2_32(Align));
+ }
+}
+
/// This method can be implemented by targets that want to expose additional
/// information about sign bits to the DAG Combiner.
unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
@@ -2967,7 +2963,7 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d,
SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
SelectionDAG &DAG,
std::vector<SDNode *> *Created) const {
- AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes();
+ AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (TLI.isIntDivCheap(N->getValueType(0), Attr))
return SDValue(N,0); // Lower SDIV as SDIV
@@ -3436,8 +3432,6 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
// The type of data as saved in memory.
EVT MemSclVT = StVT.getScalarType();
- EVT PtrVT = BasePtr.getValueType();
-
// Store Stride in bytes
unsigned Stride = MemSclVT.getSizeInBits() / 8;
EVT IdxVT = getVectorIdxTy(DAG.getDataLayout());
@@ -3450,8 +3444,7 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
DAG.getConstant(Idx, SL, IdxVT));
- SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
- DAG.getConstant(Idx * Stride, SL, PtrVT));
+ SDValue Ptr = DAG.getObjectPtrOffset(SL, BasePtr, Idx * Stride);
// This scalar TruncStore may be illegal, but we legalize it later.
SDValue Store = DAG.getTruncStore(
@@ -3474,6 +3467,8 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
EVT VT = LD->getValueType(0);
EVT LoadedVT = LD->getMemoryVT();
SDLoc dl(LD);
+ auto &MF = DAG.getMachineFunction();
+
if (VT.isFloatingPoint() || VT.isVector()) {
EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
@@ -3498,13 +3493,13 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
// Copy the value to a (aligned) stack slot using (unaligned) integer
// loads and stores, then do a (aligned) load from the stack slot.
MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
- unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8;
+ unsigned LoadedBytes = LoadedVT.getStoreSize();
unsigned RegBytes = RegVT.getSizeInBits() / 8;
unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
// Make sure the stack slot is also aligned for the register type.
SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
-
+ auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
SmallVector<SDValue, 8> Stores;
SDValue StackPtr = StackBase;
unsigned Offset = 0;
@@ -3523,13 +3518,14 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
MinAlign(LD->getAlignment(), Offset), LD->getMemOperand()->getFlags(),
LD->getAAInfo());
// Follow the load with a store to the stack slot. Remember the store.
- Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr,
- MachinePointerInfo()));
+ Stores.push_back(DAG.getStore(
+ Load.getValue(1), dl, Load, StackPtr,
+ MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
// Increment the pointers.
Offset += RegBytes;
- Ptr = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, PtrIncrement);
- StackPtr = DAG.getNode(ISD::ADD, dl, StackPtrVT, StackPtr,
- StackPtrIncrement);
+
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
+ StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
}
// The last copy may be partial. Do an extending load.
@@ -3543,15 +3539,17 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
// Follow the load with a store to the stack slot. Remember the store.
// On big-endian machines this requires a truncating store to ensure
// that the bits end up in the right place.
- Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr,
- MachinePointerInfo(), MemVT));
+ Stores.push_back(DAG.getTruncStore(
+ Load.getValue(1), dl, Load, StackPtr,
+ MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
// The order of the stores doesn't matter - say it with a TokenFactor.
SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
// Finally, perform the original load only redirected to the stack slot.
Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
- MachinePointerInfo(), LoadedVT);
+ MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
+ LoadedVT);
// Callers expect a MERGE_VALUES node.
return std::make_pair(Load, TF);
@@ -3581,8 +3579,8 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
LD->getAAInfo());
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
+
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
NewLoadedVT, MinAlign(Alignment, IncrementSize),
@@ -3591,8 +3589,8 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
LD->getAAInfo());
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
+
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
LD->getPointerInfo().getWithOffset(IncrementSize),
NewLoadedVT, MinAlign(Alignment, IncrementSize),
@@ -3621,6 +3619,7 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
SDValue Val = ST->getValue();
EVT VT = Val.getValueType();
int Alignment = ST->getAlignment();
+ auto &MF = DAG.getMachineFunction();
SDLoc dl(ST);
if (ST->getMemoryVT().isFloatingPoint() ||
@@ -3649,16 +3648,18 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
EVT::getIntegerVT(*DAG.getContext(),
StoredVT.getSizeInBits()));
EVT PtrVT = Ptr.getValueType();
- unsigned StoredBytes = StoredVT.getSizeInBits() / 8;
+ unsigned StoredBytes = StoredVT.getStoreSize();
unsigned RegBytes = RegVT.getSizeInBits() / 8;
unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
// Make sure the stack slot is also aligned for the register type.
SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT);
+ auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
// Perform the original store, only redirected to the stack slot.
- SDValue Store = DAG.getTruncStore(Chain, dl, Val, StackPtr,
- MachinePointerInfo(), StoredVT);
+ SDValue Store = DAG.getTruncStore(
+ Chain, dl, Val, StackPtr,
+ MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoredVT);
EVT StackPtrVT = StackPtr.getValueType();
@@ -3670,8 +3671,9 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
// Do all but one copies using the full register width.
for (unsigned i = 1; i < NumRegs; i++) {
// Load one integer register's worth from the stack slot.
- SDValue Load =
- DAG.getLoad(RegVT, dl, Store, StackPtr, MachinePointerInfo());
+ SDValue Load = DAG.getLoad(
+ RegVT, dl, Store, StackPtr,
+ MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
// Store it to the final location. Remember the store.
Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
ST->getPointerInfo().getWithOffset(Offset),
@@ -3679,9 +3681,8 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
ST->getMemOperand()->getFlags()));
// Increment the pointers.
Offset += RegBytes;
- StackPtr = DAG.getNode(ISD::ADD, dl, StackPtrVT,
- StackPtr, StackPtrIncrement);
- Ptr = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, PtrIncrement);
+ StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
}
// The last store may be partial. Do a truncating store. On big-endian
@@ -3691,8 +3692,9 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
8 * (StoredBytes - Offset));
// Load from the stack slot.
- SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
- MachinePointerInfo(), MemVT);
+ SDValue Load = DAG.getExtLoad(
+ ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
+ MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT);
Stores.push_back(
DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
@@ -3726,9 +3728,7 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
ST->getMemOperand()->getFlags());
- EVT PtrVT = Ptr.getValueType();
- Ptr = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr,
- DAG.getConstant(IncrementSize, dl, PtrVT));
+ Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
Alignment = MinAlign(Alignment, IncrementSize);
Store2 = DAG.getTruncStore(
Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
@@ -3767,7 +3767,7 @@ TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
AddrVT);
Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
} else
- Increment = DAG.getConstant(DataVT.getSizeInBits() / 8, DL, AddrVT);
+ Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
}
@@ -3797,7 +3797,7 @@ SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
SDValue Index) const {
SDLoc dl(Index);
// Make sure the index type is big enough to compute in.
- Index = DAG.getZExtOrTrunc(Index, dl, getPointerTy(DAG.getDataLayout()));
+ Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
EVT EltVT = VecVT.getVectorElementType();