diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2017-12-18 20:10:56 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2017-12-18 20:10:56 +0000 |
commit | 044eb2f6afba375a914ac9d8024f8f5142bb912e (patch) | |
tree | 1475247dc9f9fe5be155ebd4c9069c75aadf8c20 /lib/CodeGen/SelectionDAG | |
parent | eb70dddbd77e120e5d490bd8fbe7ff3f8fa81c6b (diff) |
Notes
Diffstat (limited to 'lib/CodeGen/SelectionDAG')
29 files changed, 3834 insertions, 2329 deletions
diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt index ae9c5adb03979..fd1e5e2cfc567 100644 --- a/lib/CodeGen/SelectionDAG/CMakeLists.txt +++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt @@ -24,7 +24,7 @@ add_llvm_library(LLVMSelectionDAG SelectionDAGTargetInfo.cpp StatepointLowering.cpp TargetLowering.cpp - + DEPENDS intrinsics_gen ) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 432c86dd6f1e1..f97732c1c49d0 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1,4 +1,4 @@ -//===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===// +//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===// // // The LLVM Compiler Infrastructure // @@ -16,32 +16,64 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/CodeGen/DAGCombine.h" +#include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineValueType.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> +#include <cassert> +#include <cstdint> +#include <functional> +#include <iterator> +#include <string> +#include <tuple> +#include <utility> +#include <vector> + using namespace llvm; #define DEBUG_TYPE "dagcombine" @@ -53,43 +85,41 @@ STATISTIC(OpsNarrowed , "Number of load/op/store narrowed"); STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int"); STATISTIC(SlicedLoads, "Number of load sliced"); -namespace { - static cl::opt<bool> - CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, - cl::desc("Enable DAG combiner's use of IR alias analysis")); +static cl::opt<bool> +CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, + cl::desc("Enable DAG combiner's use of IR alias analysis")); - static cl::opt<bool> - UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), - cl::desc("Enable DAG combiner's use of TBAA")); +static cl::opt<bool> +UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), + cl::desc("Enable DAG combiner's use of TBAA")); #ifndef NDEBUG - static cl::opt<std::string> - CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, - cl::desc("Only use DAG-combiner alias analysis in this" - " function")); +static cl::opt<std::string> +CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, + cl::desc("Only use DAG-combiner alias analysis in this" + " function")); #endif - /// Hidden option to stress test load slicing, i.e., when this option - /// is enabled, load slicing bypasses most of its profitability guards. - static cl::opt<bool> - StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, - cl::desc("Bypass the profitability model of load " - "slicing"), - cl::init(false)); +/// Hidden option to stress test load slicing, i.e., when this option +/// is enabled, load slicing bypasses most of its profitability guards. +static cl::opt<bool> +StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, + cl::desc("Bypass the profitability model of load slicing"), + cl::init(false)); - static cl::opt<bool> - MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), - cl::desc("DAG combiner may split indexing from loads")); +static cl::opt<bool> + MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), + cl::desc("DAG combiner may split indexing from loads")); -//------------------------------ DAGCombiner ---------------------------------// +namespace { class DAGCombiner { SelectionDAG &DAG; const TargetLowering &TLI; CombineLevel Level; CodeGenOpt::Level OptLevel; - bool LegalOperations; - bool LegalTypes; + bool LegalOperations = false; + bool LegalTypes = false; bool ForCodeSize; /// \brief Worklist of all of the nodes that need to be simplified. @@ -128,6 +158,19 @@ namespace { SDValue visit(SDNode *N); public: + DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL) + : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), + OptLevel(OL), AA(AA) { + ForCodeSize = DAG.getMachineFunction().getFunction().optForSize(); + + MaximumLegalStoreInBits = 0; + for (MVT VT : MVT::all_valuetypes()) + if (EVT(VT).isSimple() && VT != MVT::Other && + TLI.isTypeLegal(EVT(VT)) && + VT.getSizeInBits() >= MaximumLegalStoreInBits) + MaximumLegalStoreInBits = VT.getSizeInBits(); + } + /// Add to the worklist making sure its instance is at the back (next to be /// processed.) void AddToWorklist(SDNode *N) { @@ -285,7 +328,7 @@ namespace { SDValue visitSIGN_EXTEND(SDNode *N); SDValue visitZERO_EXTEND(SDNode *N); SDValue visitANY_EXTEND(SDNode *N); - SDValue visitAssertZext(SDNode *N); + SDValue visitAssertExt(SDNode *N); SDValue visitSIGN_EXTEND_INREG(SDNode *N); SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N); SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N); @@ -348,6 +391,7 @@ namespace { SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt); SDValue foldSelectOfConstants(SDNode *N); + SDValue foldVSelectOfConstants(SDNode *N); SDValue foldBinOpIntoSelect(SDNode *BO); bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS); SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N); @@ -371,6 +415,7 @@ namespace { SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); SDValue CombineExtLoad(SDNode *N); SDValue combineRepeatedFPDivisors(SDNode *N); + SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex); SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); SDValue BuildSDIV(SDNode *N); SDValue BuildSDIVPow2(SDNode *N); @@ -400,14 +445,11 @@ namespace { SDValue reduceBuildVecExtToExtBuildVec(SDNode *N); SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N); SDValue reduceBuildVecToShuffle(SDNode *N); - SDValue reduceBuildVecToTrunc(SDNode *N); SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N, ArrayRef<int> VectorMask, SDValue VecIn1, SDValue VecIn2, unsigned LeftIdx); SDValue matchVSelectOpSizesWithSetCC(SDNode *N); - SDValue GetDemandedBits(SDValue V, const APInt &Mask); - /// Walk up chain skipping non-aliasing memory nodes, /// looking for aliasing nodes and adding them to the Aliases vector. void GatherAllAliases(SDNode *N, SDValue OriginalChain, @@ -434,12 +476,14 @@ namespace { /// Holds a pointer to an LSBaseSDNode as well as information on where it /// is located in a sequence of memory operations connected by a chain. struct MemOpLink { - MemOpLink(LSBaseSDNode *N, int64_t Offset) - : MemNode(N), OffsetFromBase(Offset) {} // Ptr to the mem node. LSBaseSDNode *MemNode; + // Offset from the base ptr. int64_t OffsetFromBase; + + MemOpLink(LSBaseSDNode *N, int64_t Offset) + : MemNode(N), OffsetFromBase(Offset) {} }; /// This is a helper function for visitMUL to check the profitability @@ -450,38 +494,49 @@ namespace { SDValue &AddNode, SDValue &ConstNode); - /// This is a helper function for visitAND and visitZERO_EXTEND. Returns /// true if the (and (load x) c) pattern matches an extload. ExtVT returns - /// the type of the loaded value to be extended. LoadedVT returns the type - /// of the original loaded value. NarrowLoad returns whether the load would - /// need to be narrowed in order to match. + /// the type of the loaded value to be extended. bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, - EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT, - bool &NarrowLoad); + EVT LoadResultTy, EVT &ExtVT); + + /// Helper function to calculate whether the given Load can have its + /// width reduced to ExtVT. + bool isLegalNarrowLoad(LoadSDNode *LoadN, ISD::LoadExtType ExtType, + EVT &ExtVT, unsigned ShAmt = 0); + + /// Used by BackwardsPropagateMask to find suitable loads. + bool SearchForAndLoads(SDNode *N, SmallPtrSetImpl<LoadSDNode*> &Loads, + SmallPtrSetImpl<SDNode*> &NodeWithConsts, + ConstantSDNode *Mask, SDNode *&UncombinedNode); + /// Attempt to propagate a given AND node back to load leaves so that they + /// can be combined into narrow loads. + bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG); /// Helper function for MergeConsecutiveStores which merges the /// component store chains. SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores); - /// This is a helper function for MergeConsecutiveStores. When the source - /// elements of the consecutive stores are all constants or all extracted - /// vector elements, try to merge them into one larger store. - /// \return True if a merged store was created. + /// This is a helper function for MergeConsecutiveStores. When the + /// source elements of the consecutive stores are all constants or + /// all extracted vector elements, try to merge them into one + /// larger store introducing bitcasts if necessary. \return True + /// if a merged store was created. bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores, bool IsConstantSrc, bool UseVector, bool UseTrunc); - /// This is a helper function for MergeConsecutiveStores. - /// Stores that may be merged are placed in StoreNodes. + /// This is a helper function for MergeConsecutiveStores. Stores + /// that potentially may be merged with St are placed in + /// StoreNodes. void getStoreMergeCandidates(StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes); /// Helper function for MergeConsecutiveStores. Checks if - /// Candidate stores have indirect dependency through their - /// operands. \return True if safe to merge + /// candidate stores have indirect dependency through their + /// operands. \return True if safe to merge. bool checkMergeStoreCandidatesForDependencies( SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores); @@ -500,19 +555,6 @@ namespace { SDValue distributeTruncateThroughAnd(SDNode *N); public: - DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL) - : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), - OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(AA) { - ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize(); - - MaximumLegalStoreInBits = 0; - for (MVT VT : MVT::all_valuetypes()) - if (EVT(VT).isSimple() && VT != MVT::Other && - TLI.isTypeLegal(EVT(VT)) && - VT.getSizeInBits() >= MaximumLegalStoreInBits) - MaximumLegalStoreInBits = VT.getSizeInBits(); - } - /// Runs the dag combiner on all nodes in the work list void Run(CombineLevel AtLevel); @@ -541,14 +583,12 @@ namespace { return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); } }; -} - -namespace { /// This class is a DAGUpdateListener that removes any deleted /// nodes from the worklist. class WorklistRemover : public SelectionDAG::DAGUpdateListener { DAGCombiner &DC; + public: explicit WorklistRemover(DAGCombiner &dc) : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {} @@ -557,7 +597,8 @@ public: DC.removeFromWorklist(N); } }; -} + +} // end anonymous namespace //===----------------------------------------------------------------------===// // TargetLowering::DAGCombinerInfo implementation @@ -577,7 +618,6 @@ CombineTo(SDNode *N, SDValue Res, bool AddTo) { return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo); } - SDValue TargetLowering::DAGCombinerInfo:: CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) { return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo); @@ -873,6 +913,56 @@ static bool isAnyConstantBuildVector(const SDNode *N) { ISD::isBuildVectorOfConstantFPSDNodes(N); } +// Attempt to match a unary predicate against a scalar/splat constant or +// every element of a constant BUILD_VECTOR. +static bool matchUnaryPredicate(SDValue Op, + std::function<bool(ConstantSDNode *)> Match) { + if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) + return Match(Cst); + + if (ISD::BUILD_VECTOR != Op.getOpcode()) + return false; + + EVT SVT = Op.getValueType().getScalarType(); + for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { + auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(i)); + if (!Cst || Cst->getValueType(0) != SVT || !Match(Cst)) + return false; + } + return true; +} + +// Attempt to match a binary predicate against a pair of scalar/splat constants +// or every element of a pair of constant BUILD_VECTORs. +static bool matchBinaryPredicate( + SDValue LHS, SDValue RHS, + std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match) { + if (LHS.getValueType() != RHS.getValueType()) + return false; + + if (auto *LHSCst = dyn_cast<ConstantSDNode>(LHS)) + if (auto *RHSCst = dyn_cast<ConstantSDNode>(RHS)) + return Match(LHSCst, RHSCst); + + if (ISD::BUILD_VECTOR != LHS.getOpcode() || + ISD::BUILD_VECTOR != RHS.getOpcode()) + return false; + + EVT SVT = LHS.getValueType().getScalarType(); + for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) { + auto *LHSCst = dyn_cast<ConstantSDNode>(LHS.getOperand(i)); + auto *RHSCst = dyn_cast<ConstantSDNode>(RHS.getOperand(i)); + if (!LHSCst || !RHSCst) + return false; + if (LHSCst->getValueType(0) != SVT || + LHSCst->getValueType(0) != RHSCst->getValueType(0)) + return false; + if (!Match(LHSCst, RHSCst)) + return false; + } + return true; +} + SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, SDValue N1) { EVT VT = N0.getValueType(); @@ -1123,10 +1213,10 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { Replace0 &= !N0->hasOneUse(); Replace1 &= (N0 != N1) && !N1->hasOneUse(); - // Combine Op here so it is presreved past replacements. + // Combine Op here so it is preserved past replacements. CombineTo(Op.getNode(), RV); - // If operands have a use ordering, make sur we deal with + // If operands have a use ordering, make sure we deal with // predecessor first. if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) { std::swap(N0, N1); @@ -1473,7 +1563,8 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); case ISD::ANY_EXTEND: return visitANY_EXTEND(N); - case ISD::AssertZext: return visitAssertZext(N); + case ISD::AssertSext: + case ISD::AssertZext: return visitAssertExt(N); case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N); case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N); @@ -1572,15 +1663,15 @@ SDValue DAGCombiner::combine(SDNode *N) { } } - // If N is a commutative binary node, try commuting it to enable more - // sdisel CSE. + // If N is a commutative binary node, try eliminate it if the commuted + // version is already present in the DAG. if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) && N->getNumValues() == 1) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); // Constant operands are canonicalized to RHS. - if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) { + if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) { SDValue Ops[] = {N1, N0}; SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops, N->getFlags()); @@ -1632,7 +1723,6 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { // Check each of the operands. for (const SDValue &Op : TF->op_values()) { - switch (Op.getOpcode()) { case ISD::EntryToken: // Entry tokens don't need to be added to the list. They are @@ -1907,6 +1997,15 @@ SDValue DAGCombiner::visitADD(SDNode *N) { return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not); } } + + // Undo the add -> or combine to merge constant offsets from a frame index. + if (N0.getOpcode() == ISD::OR && + isa<FrameIndexSDNode>(N0.getOperand(0)) && + isa<ConstantSDNode>(N0.getOperand(1)) && + DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) { + SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1)); + return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0); + } } if (SDValue NewSel = foldBinOpIntoSelect(N)) @@ -2064,7 +2163,8 @@ SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) } // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry) - if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) + if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) && + N1.getResNo() == 0) return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(), N0, N1.getOperand(0), N1.getOperand(2)); @@ -2537,6 +2637,12 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0); N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1); + assert((!N0IsConst || + ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) && + "Splat APInt should be element width"); + assert((!N1IsConst || + ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) && + "Splat APInt should be element width"); } else { N0IsConst = isa<ConstantSDNode>(N0); if (N0IsConst) { @@ -2562,12 +2668,8 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { // fold (mul x, 0) -> 0 if (N1IsConst && ConstValue1.isNullValue()) return N1; - // We require a splat of the entire scalar bit width for non-contiguous - // bit patterns. - bool IsFullSplat = - ConstValue1.getBitWidth() == VT.getScalarSizeInBits(); // fold (mul x, 1) -> x - if (N1IsConst && ConstValue1.isOneValue() && IsFullSplat) + if (N1IsConst && ConstValue1.isOneValue()) return N0; if (SDValue NewSel = foldBinOpIntoSelect(N)) @@ -2580,16 +2682,20 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { DAG.getConstant(0, DL, VT), N0); } // fold (mul x, (1 << c)) -> x << c - if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isPowerOf2() && - IsFullSplat) { + if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) && + DAG.isKnownToBeAPowerOfTwo(N1) && + (!VT.isVector() || Level <= AfterLegalizeVectorOps)) { SDLoc DL(N); - return DAG.getNode(ISD::SHL, DL, VT, N0, - DAG.getConstant(ConstValue1.logBase2(), DL, - getShiftAmountTy(N0.getValueType()))); + SDValue LogBase2 = BuildLogBase2(N1, DL); + AddToWorklist(LogBase2.getNode()); + + EVT ShiftVT = getShiftAmountTy(N0.getValueType()); + SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT); + AddToWorklist(Trunc.getNode()); + return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc); } // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c - if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2() && - IsFullSplat) { + if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) { unsigned Log2Val = (-ConstValue1).logBase2(); SDLoc DL(N); // FIXME: If the input is something that is easily negated (e.g. a @@ -2835,7 +2941,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { // If integer divide is expensive and we satisfy the requirements, emit an // alternate sequence. Targets may check function attributes for size/speed // trade-offs. - AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes(); + AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr)) if (SDValue Op = BuildSDIV(N)) return Op; @@ -2906,7 +3012,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { } // fold (udiv x, c) -> alternate - AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes(); + AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr)) if (SDValue Op = BuildUDIV(N)) return Op; @@ -2965,7 +3071,7 @@ SDValue DAGCombiner::visitREM(SDNode *N) { } } - AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes(); + AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); // If X/C can be simplified by the division-by-constant logic, lower // X%C to the equivalent of X-X/C*C. @@ -3003,19 +3109,26 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); + if (VT.isVector()) { + // fold (mulhs x, 0) -> 0 + if (ISD::isBuildVectorAllZeros(N1.getNode())) + return N1; + if (ISD::isBuildVectorAllZeros(N0.getNode())) + return N0; + } + // fold (mulhs x, 0) -> 0 if (isNullConstant(N1)) return N1; // fold (mulhs x, 1) -> (sra x, size(x)-1) - if (isOneConstant(N1)) { - SDLoc DL(N); + if (isOneConstant(N1)) return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0, DAG.getConstant(N0.getValueSizeInBits() - 1, DL, getShiftAmountTy(N0.getValueType()))); - } + // fold (mulhs x, undef) -> 0 if (N0.isUndef() || N1.isUndef()) - return DAG.getConstant(0, SDLoc(N), VT); + return DAG.getConstant(0, DL, VT); // If the type twice as wide is legal, transform the mulhs to a wider multiply // plus a shift. @@ -3043,6 +3156,14 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); + if (VT.isVector()) { + // fold (mulhu x, 0) -> 0 + if (ISD::isBuildVectorAllZeros(N1.getNode())) + return N1; + if (ISD::isBuildVectorAllZeros(N0.getNode())) + return N0; + } + // fold (mulhu x, 0) -> 0 if (isNullConstant(N1)) return N1; @@ -3216,7 +3337,7 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) { if (SDValue FoldedVOp = SimplifyVBinOp(N)) return FoldedVOp; - // fold (add c1, c2) -> c1+c2 + // fold operation with constant operands. ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); if (N0C && N1C) @@ -3599,22 +3720,20 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) { } bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, - EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT, - bool &NarrowLoad) { - uint32_t ActiveBits = AndC->getAPIntValue().getActiveBits(); - - if (ActiveBits == 0 || !AndC->getAPIntValue().isMask(ActiveBits)) + EVT LoadResultTy, EVT &ExtVT) { + if (!AndC->getAPIntValue().isMask()) return false; + unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes(); + ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); - LoadedVT = LoadN->getMemoryVT(); + EVT LoadedVT = LoadN->getMemoryVT(); if (ExtVT == LoadedVT && (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) { // ZEXTLOAD will match without needing to change the size of the value being // loaded. - NarrowLoad = false; return true; } @@ -3634,10 +3753,185 @@ bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT)) return false; - NarrowLoad = true; return true; } +bool DAGCombiner::isLegalNarrowLoad(LoadSDNode *LoadN, ISD::LoadExtType ExtType, + EVT &ExtVT, unsigned ShAmt) { + // Don't transform one with multiple uses, this would require adding a new + // load. + if (!SDValue(LoadN, 0).hasOneUse()) + return false; + + if (LegalOperations && + !TLI.isLoadExtLegal(ExtType, LoadN->getValueType(0), ExtVT)) + return false; + + // Do not generate loads of non-round integer types since these can + // be expensive (and would be wrong if the type is not byte sized). + if (!ExtVT.isRound()) + return false; + + // Don't change the width of a volatile load. + if (LoadN->isVolatile()) + return false; + + // Verify that we are actually reducing a load width here. + if (LoadN->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits()) + return false; + + // For the transform to be legal, the load must produce only two values + // (the value loaded and the chain). Don't transform a pre-increment + // load, for example, which produces an extra value. Otherwise the + // transformation is not equivalent, and the downstream logic to replace + // uses gets things wrong. + if (LoadN->getNumValues() > 2) + return false; + + // If the load that we're shrinking is an extload and we're not just + // discarding the extension we can't simply shrink the load. Bail. + // TODO: It would be possible to merge the extensions in some cases. + if (LoadN->getExtensionType() != ISD::NON_EXTLOAD && + LoadN->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt) + return false; + + if (!TLI.shouldReduceLoadWidth(LoadN, ExtType, ExtVT)) + return false; + + // It's not possible to generate a constant of extended or untyped type. + EVT PtrType = LoadN->getOperand(1).getValueType(); + if (PtrType == MVT::Untyped || PtrType.isExtended()) + return false; + + return true; +} + +bool DAGCombiner::SearchForAndLoads(SDNode *N, + SmallPtrSetImpl<LoadSDNode*> &Loads, + SmallPtrSetImpl<SDNode*> &NodesWithConsts, + ConstantSDNode *Mask, + SDNode *&NodeToMask) { + // Recursively search for the operands, looking for loads which can be + // narrowed. + for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) { + SDValue Op = N->getOperand(i); + + if (Op.getValueType().isVector()) + return false; + + // Some constants may need fixing up later if they are too large. + if (auto *C = dyn_cast<ConstantSDNode>(Op)) { + if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) && + (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue()) + NodesWithConsts.insert(N); + continue; + } + + if (!Op.hasOneUse()) + return false; + + switch(Op.getOpcode()) { + case ISD::LOAD: { + auto *Load = cast<LoadSDNode>(Op); + EVT ExtVT; + if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) && + isLegalNarrowLoad(Load, ISD::ZEXTLOAD, ExtVT)) { + // Only add this load if we can make it more narrow. + if (ExtVT.bitsLT(Load->getMemoryVT())) + Loads.insert(Load); + continue; + } + return false; + } + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + case ISD::AssertZext: { + unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes(); + EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); + EVT VT = Op.getOpcode() == ISD::AssertZext ? + cast<VTSDNode>(Op.getOperand(1))->getVT() : + Op.getOperand(0).getValueType(); + + // We can accept extending nodes if the mask is wider or an equal + // width to the original type. + if (ExtVT.bitsGE(VT)) + continue; + break; + } + case ISD::OR: + case ISD::XOR: + case ISD::AND: + if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask, + NodeToMask)) + return false; + continue; + } + + // Allow one node which will masked along with any loads found. + if (NodeToMask) + return false; + NodeToMask = Op.getNode(); + } + return true; +} + +bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) { + auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1)); + if (!Mask) + return false; + + if (!Mask->getAPIntValue().isMask()) + return false; + + // No need to do anything if the and directly uses a load. + if (isa<LoadSDNode>(N->getOperand(0))) + return false; + + SmallPtrSet<LoadSDNode*, 8> Loads; + SmallPtrSet<SDNode*, 2> NodesWithConsts; + SDNode *FixupNode = nullptr; + if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) { + if (Loads.size() == 0) + return false; + + SDValue MaskOp = N->getOperand(1); + + // If it exists, fixup the single node we allow in the tree that needs + // masking. + if (FixupNode) { + SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode), + FixupNode->getValueType(0), + SDValue(FixupNode, 0), MaskOp); + DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And); + DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), + MaskOp); + } + + // Narrow any constants that need it. + for (auto *LogicN : NodesWithConsts) { + auto *C = cast<ConstantSDNode>(LogicN->getOperand(1)); + SDValue And = DAG.getNode(ISD::AND, SDLoc(C), C->getValueType(0), + SDValue(C, 0), MaskOp); + DAG.UpdateNodeOperands(LogicN, LogicN->getOperand(0), And); + } + + // Create narrow loads. + for (auto *Load : Loads) { + SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0), + SDValue(Load, 0), MaskOp); + DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And); + DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp); + SDValue NewLoad = ReduceLoadWidth(And.getNode()); + assert(NewLoad && + "Shouldn't be masking the load if it can't be narrowed"); + CombineTo(Load, NewLoad, NewLoad.getValue(1)); + } + DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode()); + return true; + } + return false; +} + SDValue DAGCombiner::visitAND(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -3829,55 +4123,23 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD || (N0.getOpcode() == ISD::ANY_EXTEND && N0.getOperand(0).getOpcode() == ISD::LOAD))) { - bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND; - LoadSDNode *LN0 = HasAnyExt - ? cast<LoadSDNode>(N0.getOperand(0)) - : cast<LoadSDNode>(N0); - if (LN0->getExtensionType() != ISD::SEXTLOAD && - LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) { - auto NarrowLoad = false; - EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; - EVT ExtVT, LoadedVT; - if (isAndLoadExtLoad(N1C, LN0, LoadResultTy, ExtVT, LoadedVT, - NarrowLoad)) { - if (!NarrowLoad) { - SDValue NewLoad = - DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, - LN0->getChain(), LN0->getBasePtr(), ExtVT, - LN0->getMemOperand()); - AddToWorklist(N); - CombineTo(LN0, NewLoad, NewLoad.getValue(1)); - return SDValue(N, 0); // Return N so it doesn't get rechecked! - } else { - EVT PtrType = LN0->getOperand(1).getValueType(); - - unsigned Alignment = LN0->getAlignment(); - SDValue NewPtr = LN0->getBasePtr(); - - // For big endian targets, we need to add an offset to the pointer - // to load the correct bytes. For little endian systems, we merely - // need to read fewer bytes from the same pointer. - if (DAG.getDataLayout().isBigEndian()) { - unsigned LVTStoreBytes = LoadedVT.getStoreSize(); - unsigned EVTStoreBytes = ExtVT.getStoreSize(); - unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; - SDLoc DL(LN0); - NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, - NewPtr, DAG.getConstant(PtrOff, DL, PtrType)); - Alignment = MinAlign(Alignment, PtrOff); - } + if (SDValue Res = ReduceLoadWidth(N)) { + LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND + ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0); - AddToWorklist(NewPtr.getNode()); + AddToWorklist(N); + CombineTo(LN0, Res, Res.getValue(1)); + return SDValue(N, 0); + } + } - SDValue Load = DAG.getExtLoad( - ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, LN0->getChain(), NewPtr, - LN0->getPointerInfo(), ExtVT, Alignment, - LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); - AddToWorklist(N); - CombineTo(LN0, Load, Load.getValue(1)); - return SDValue(N, 0); // Return N so it doesn't get rechecked! - } - } + if (Level >= AfterLegalizeTypes) { + // Attempt to propagate the AND back up to the leaves which, if they're + // loads, can be combined to narrow loads and the AND node can be removed. + // Perform after legalization so that extend nodes will already be + // combined into the loads. + if (BackwardsPropagateMask(N, DAG)) { + return SDValue(N, 0); } } @@ -3974,7 +4236,7 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT)) return SDValue(); - // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00) + // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff) bool LookPassAnd0 = false; bool LookPassAnd1 = false; if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL) @@ -4593,20 +4855,6 @@ SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, return nullptr; } -// if Left + Right == Sum (constant or constant splat vector) -static bool sumMatchConstant(SDValue Left, SDValue Right, unsigned Sum, - SelectionDAG &DAG, const SDLoc &DL) { - EVT ShiftVT = Left.getValueType(); - if (ShiftVT != Right.getValueType()) return false; - - SDValue ShiftSum = DAG.FoldConstantArithmetic(ISD::ADD, DL, ShiftVT, - Left.getNode(), Right.getNode()); - if (!ShiftSum) return false; - - ConstantSDNode *CSum = isConstOrConstSplat(ShiftSum); - return CSum && CSum->getZExtValue() == Sum; -} - // MatchRotate - Handle an 'or' of two operands. If this is one of the many // idioms for rotate, and if the target supports rotation instructions, generate // a rot[lr]. @@ -4620,6 +4868,16 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT); if (!HasROTL && !HasROTR) return nullptr; + // Check for truncated rotate. + if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE && + LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) { + assert(LHS.getValueType() == RHS.getValueType()); + if (SDNode *Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) { + return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), + SDValue(Rot, 0)).getNode(); + } + } + // Match "(X shl/srl V1) & V2" where V2 may not be present. SDValue LHSShift; // The shift. SDValue LHSMask; // AND value if any. @@ -4652,7 +4910,11 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2) - if (sumMatchConstant(LHSShiftAmt, RHSShiftAmt, EltSizeInBits, DAG, DL)) { + auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS, + ConstantSDNode *RHS) { + return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits; + }; + if (matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) { SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt); @@ -4712,20 +4974,22 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { } namespace { + /// Represents known origin of an individual byte in load combine pattern. The /// value of the byte is either constant zero or comes from memory. struct ByteProvider { // For constant zero providers Load is set to nullptr. For memory providers // Load represents the node which loads the byte from memory. // ByteOffset is the offset of the byte in the value produced by the load. - LoadSDNode *Load; - unsigned ByteOffset; + LoadSDNode *Load = nullptr; + unsigned ByteOffset = 0; - ByteProvider() : Load(nullptr), ByteOffset(0) {} + ByteProvider() = default; static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) { return ByteProvider(Load, ByteOffset); } + static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); } bool isConstantZero() const { return !Load; } @@ -4740,6 +5004,8 @@ private: : Load(Load), ByteOffset(ByteOffset) {} }; +} // end anonymous namespace + /// Recursively traverses the expression calculating the origin of the requested /// byte of the given value. Returns None if the provider can't be calculated. /// @@ -4751,9 +5017,9 @@ private: /// Because the parts of the expression are not allowed to have more than one /// use this function iterates over trees, not DAGs. So it never visits the same /// node more than once. -const Optional<ByteProvider> calculateByteProvider(SDValue Op, unsigned Index, - unsigned Depth, - bool Root = false) { +static const Optional<ByteProvider> +calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, + bool Root = false) { // Typical i64 by i8 pattern requires recursion up to 8 calls depth if (Depth == 10) return None; @@ -4837,7 +5103,6 @@ const Optional<ByteProvider> calculateByteProvider(SDValue Op, unsigned Index, return None; } -} // namespace /// Match a pattern where a wide type scalar value is loaded by several narrow /// loads and combined by shifts and ors. Fold it into a single load or a load @@ -4950,7 +5215,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { Loads.insert(L); } - assert(Loads.size() > 0 && "All the bytes of the value must be loaded from " + assert(!Loads.empty() && "All the bytes of the value must be loaded from " "memory, so there must be at least one load which produces the value"); assert(Base && "Base address of the accessed memory location must be set"); assert(FirstOffset != INT64_MAX && "First byte offset must be set"); @@ -5373,7 +5638,11 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if (isNullConstantOrNullSplatConstant(N0)) return N0; // fold (shl x, c >= size(x)) -> undef - if (N1C && N1C->getAPIntValue().uge(OpSizeInBits)) + // NOTE: ALL vector elements must be too big to avoid partial UNDEFs. + auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) { + return Val->getAPIntValue().uge(OpSizeInBits); + }; + if (matchUnaryPredicate(N1, MatchShiftTooBig)) return DAG.getUNDEF(VT); // fold (shl x, 0) -> x if (N1C && N1C->isNullValue()) @@ -5400,20 +5669,29 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { return SDValue(N, 0); // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2)) - if (N1C && N0.getOpcode() == ISD::SHL) { - if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { - SDLoc DL(N); - APInt c1 = N0C1->getAPIntValue(); - APInt c2 = N1C->getAPIntValue(); + if (N0.getOpcode() == ISD::SHL) { + auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS, + ConstantSDNode *RHS) { + APInt c1 = LHS->getAPIntValue(); + APInt c2 = RHS->getAPIntValue(); zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); + return (c1 + c2).uge(OpSizeInBits); + }; + if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange)) + return DAG.getConstant(0, SDLoc(N), VT); - APInt Sum = c1 + c2; - if (Sum.uge(OpSizeInBits)) - return DAG.getConstant(0, DL, VT); - - return DAG.getNode( - ISD::SHL, DL, VT, N0.getOperand(0), - DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType())); + auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS, + ConstantSDNode *RHS) { + APInt c1 = LHS->getAPIntValue(); + APInt c2 = RHS->getAPIntValue(); + zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); + return (c1 + c2).ult(OpSizeInBits); + }; + if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) { + SDLoc DL(N); + EVT ShiftVT = N1.getValueType(); + SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1)); + return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum); } } @@ -5527,16 +5805,18 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { } // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) + // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2) // Variant of version done on multiply, except mul by a power of 2 is turned // into a shift. - if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && + if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) && + N0.getNode()->hasOneUse() && isConstantOrConstantVector(N1, /* No Opaques */ true) && isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) { SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1); SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1); AddToWorklist(Shl0.getNode()); AddToWorklist(Shl1.getNode()); - return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1); + return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1); } // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2) @@ -5579,7 +5859,11 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { if (N0C && N1C && !N1C->isOpaque()) return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C); // fold (sra x, c >= size(x)) -> undef - if (N1C && N1C->getAPIntValue().uge(OpSizeInBits)) + // NOTE: ALL vector elements must be too big to avoid partial UNDEFs. + auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) { + return Val->getAPIntValue().uge(OpSizeInBits); + }; + if (matchUnaryPredicate(N1, MatchShiftTooBig)) return DAG.getUNDEF(VT); // fold (sra x, 0) -> x if (N1C && N1C->isNullValue()) @@ -5603,20 +5887,31 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { } // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2)) - if (N1C && N0.getOpcode() == ISD::SRA) { - if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { - SDLoc DL(N); - APInt c1 = N0C1->getAPIntValue(); - APInt c2 = N1C->getAPIntValue(); - zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); - - APInt Sum = c1 + c2; - if (Sum.uge(OpSizeInBits)) - Sum = APInt(OpSizeInBits, OpSizeInBits - 1); + if (N0.getOpcode() == ISD::SRA) { + SDLoc DL(N); + EVT ShiftVT = N1.getValueType(); - return DAG.getNode( - ISD::SRA, DL, VT, N0.getOperand(0), - DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType())); + auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS, + ConstantSDNode *RHS) { + APInt c1 = LHS->getAPIntValue(); + APInt c2 = RHS->getAPIntValue(); + zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); + return (c1 + c2).uge(OpSizeInBits); + }; + if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange)) + return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), + DAG.getConstant(OpSizeInBits - 1, DL, ShiftVT)); + + auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS, + ConstantSDNode *RHS) { + APInt c1 = LHS->getAPIntValue(); + APInt c2 = RHS->getAPIntValue(); + zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); + return (c1 + c2).ult(OpSizeInBits); + }; + if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) { + SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1)); + return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), Sum); } } @@ -5647,7 +5942,6 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) && TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) && TLI.isTruncateFree(VT, TruncVT)) { - SDLoc DL(N); SDValue Amt = DAG.getConstant(ShiftAmt, DL, getShiftAmountTy(N0.getOperand(0).getValueType())); @@ -5697,7 +5991,6 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { if (N1C && SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); - // If the sign bit is known to be zero, switch this to a SRL. if (DAG.SignBitIsZero(N0)) return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1); @@ -5730,7 +6023,11 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (isNullConstantOrNullSplatConstant(N0)) return N0; // fold (srl x, c >= size(x)) -> undef - if (N1C && N1C->getAPIntValue().uge(OpSizeInBits)) + // NOTE: ALL vector elements must be too big to avoid partial UNDEFs. + auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) { + return Val->getAPIntValue().uge(OpSizeInBits); + }; + if (matchUnaryPredicate(N1, MatchShiftTooBig)) return DAG.getUNDEF(VT); // fold (srl x, 0) -> x if (N1C && N1C->isNullValue()) @@ -5745,20 +6042,29 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { return DAG.getConstant(0, SDLoc(N), VT); // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2)) - if (N1C && N0.getOpcode() == ISD::SRL) { - if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { - SDLoc DL(N); - APInt c1 = N0C1->getAPIntValue(); - APInt c2 = N1C->getAPIntValue(); + if (N0.getOpcode() == ISD::SRL) { + auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS, + ConstantSDNode *RHS) { + APInt c1 = LHS->getAPIntValue(); + APInt c2 = RHS->getAPIntValue(); zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); + return (c1 + c2).uge(OpSizeInBits); + }; + if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange)) + return DAG.getConstant(0, SDLoc(N), VT); - APInt Sum = c1 + c2; - if (Sum.uge(OpSizeInBits)) - return DAG.getConstant(0, DL, VT); - - return DAG.getNode( - ISD::SRL, DL, VT, N0.getOperand(0), - DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType())); + auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS, + ConstantSDNode *RHS) { + APInt c1 = LHS->getAPIntValue(); + APInt c2 = RHS->getAPIntValue(); + zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); + return (c1 + c2).ult(OpSizeInBits); + }; + if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) { + SDLoc DL(N); + EVT ShiftVT = N1.getValueType(); + SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1)); + return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum); } } @@ -6008,7 +6314,6 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) { return SDValue(); } - /// \brief Generate Min/Max node static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, @@ -6096,7 +6401,7 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { // For any constants that differ by 1, we can transform the select into an // extend and add. Use a target hook because some targets may prefer to // transform in the other direction. - if (TLI.convertSelectOfConstantsToMath()) { + if (TLI.convertSelectOfConstantsToMath(VT)) { if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) { // select Cond, C1, C1-1 --> add (zext Cond), C1-1 if (VT != MVT::i1) @@ -6371,7 +6676,6 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { } SDValue DAGCombiner::visitMSCATTER(SDNode *N) { - if (Level >= AfterLegalizeTypes) return SDValue(); @@ -6432,7 +6736,6 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) { } SDValue DAGCombiner::visitMSTORE(SDNode *N) { - if (Level >= AfterLegalizeTypes) return SDValue(); @@ -6447,7 +6750,6 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { // prevents the type legalizer from unrolling SETCC into scalar comparisons // and enables future optimizations (e.g. min/max pattern matching on X86). if (Mask.getOpcode() == ISD::SETCC) { - // Check if any splitting is required. if (TLI.getTypeAction(*DAG.getContext(), VT) != TargetLowering::TypeSplitVector) @@ -6504,11 +6806,10 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { } SDValue DAGCombiner::visitMGATHER(SDNode *N) { - if (Level >= AfterLegalizeTypes) return SDValue(); - MaskedGatherSDNode *MGT = dyn_cast<MaskedGatherSDNode>(N); + MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N); SDValue Mask = MGT->getMask(); SDLoc DL(N); @@ -6581,7 +6882,6 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) { } SDValue DAGCombiner::visitMLOAD(SDNode *N) { - if (Level >= AfterLegalizeTypes) return SDValue(); @@ -6593,7 +6893,6 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { // SETCC, then split both nodes and its operands before legalization. This // prevents the type legalizer from unrolling SETCC into scalar comparisons // and enables future optimizations (e.g. min/max pattern matching on X86). - if (Mask.getOpcode() == ISD::SETCC) { EVT VT = N->getValueType(0); @@ -6665,6 +6964,57 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { return SDValue(); } +/// A vector select of 2 constant vectors can be simplified to math/logic to +/// avoid a variable select instruction and possibly avoid constant loads. +SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) { + SDValue Cond = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + EVT VT = N->getValueType(0); + if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 || + !TLI.convertSelectOfConstantsToMath(VT) || + !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) || + !ISD::isBuildVectorOfConstantSDNodes(N2.getNode())) + return SDValue(); + + // Check if we can use the condition value to increment/decrement a single + // constant value. This simplifies a select to an add and removes a constant + // load/materialization from the general case. + bool AllAddOne = true; + bool AllSubOne = true; + unsigned Elts = VT.getVectorNumElements(); + for (unsigned i = 0; i != Elts; ++i) { + SDValue N1Elt = N1.getOperand(i); + SDValue N2Elt = N2.getOperand(i); + if (N1Elt.isUndef() || N2Elt.isUndef()) + continue; + + const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue(); + const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue(); + if (C1 != C2 + 1) + AllAddOne = false; + if (C1 != C2 - 1) + AllSubOne = false; + } + + // Further simplifications for the extra-special cases where the constants are + // all 0 or all -1 should be implemented as folds of these patterns. + SDLoc DL(N); + if (AllAddOne || AllSubOne) { + // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C + // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C + auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND; + SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond); + return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2); + } + + // The general case for select-of-constants: + // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2 + // ...but that only makes sense if a vselect is slower than 2 logic ops, so + // leave that to a machine-specific pass. + return SDValue(); +} + SDValue DAGCombiner::visitVSELECT(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -6729,6 +7079,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { return CV; } + if (SDValue V = foldVSelectOfConstants(N)) + return V; + return SDValue(); } @@ -7243,8 +7596,15 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDLoc(N0.getOperand(0)), N0.getOperand(0).getValueType(), ExtLoad); ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND); + bool NoReplaceTruncAnd = !N0.hasOneUse(); bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse(); CombineTo(N, And); + // If N0 has multiple uses, change other uses as well. + if (NoReplaceTruncAnd) { + SDValue TruncAnd = + DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And); + CombineTo(N0.getNode(), TruncAnd); + } if (NoReplaceTrunc) DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); else @@ -7307,7 +7667,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true)) return SCC; - if (!VT.isVector()) { + if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) { EVT SetCCVT = getSetCCResultType(N00VT); // Don't do this transform for i1 because there's a select transform // that would reverse it. @@ -7399,20 +7759,6 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { return DAG.getZExtOrTrunc(Op, SDLoc(N), VT); } - // fold (zext (truncate (load x))) -> (zext (smaller load x)) - // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n))) - if (N0.getOpcode() == ISD::TRUNCATE) { - if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { - SDNode *oye = N0.getOperand(0).getNode(); - if (NarrowLoad.getNode() != N0.getNode()) { - CombineTo(N0.getNode(), NarrowLoad); - // CombineTo deleted the truncate, if needed, but not what's under it. - AddToWorklist(oye); - } - return SDValue(N, 0); // Return N so it doesn't get rechecked! - } - } - // fold (zext (truncate x)) -> (and x, mask) if (N0.getOpcode() == ISD::TRUNCATE) { // fold (zext (truncate (load x))) -> (zext (smaller load x)) @@ -7445,7 +7791,11 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) { SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT); AddToWorklist(Op.getNode()); - return DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType()); + SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType()); + // We may safely transfer the debug info describing the truncate node over + // to the equivalent and operation. + DAG.transferDbgValues(N0, And); + return And; } } @@ -7522,11 +7872,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { if (!N0.hasOneUse()) { if (N0.getOpcode() == ISD::AND) { auto *AndC = cast<ConstantSDNode>(N0.getOperand(1)); - auto NarrowLoad = false; EVT LoadResultTy = AndC->getValueType(0); - EVT ExtVT, LoadedVT; - if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, ExtVT, LoadedVT, - NarrowLoad)) + EVT ExtVT; + if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, ExtVT)) DoXform = false; } if (DoXform) @@ -7547,8 +7895,15 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDLoc(N0.getOperand(0)), N0.getOperand(0).getValueType(), ExtLoad); ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::ZERO_EXTEND); + bool NoReplaceTruncAnd = !N0.hasOneUse(); bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse(); CombineTo(N, And); + // If N0 has multiple uses, change other uses as well. + if (NoReplaceTruncAnd) { + SDValue TruncAnd = + DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And); + CombineTo(N0.getNode(), TruncAnd); + } if (NoReplaceTrunc) DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); else @@ -7604,10 +7959,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // If the desired elements are smaller or larger than the source // elements we can use a matching integer vector type and then // truncate/sign extend. - EVT MatchingElementType = EVT::getIntegerVT( - *DAG.getContext(), N00VT.getScalarSizeInBits()); - EVT MatchingVectorType = EVT::getVectorVT( - *DAG.getContext(), MatchingElementType, N00VT.getVectorNumElements()); + EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger(); SDValue VsetCC = DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0), N0.getOperand(1), N0.getOperand(2)); @@ -7731,7 +8083,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { ISD::ANY_EXTEND); // If the load value is used only by N, replace it via CombineTo N. bool NoReplaceTrunc = N0.hasOneUse(); - CombineTo(N, ExtLoad); + CombineTo(N, ExtLoad); if (NoReplaceTrunc) DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); else @@ -7769,13 +8121,16 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // aext(setcc) -> aext(vsetcc) // Only do this before legalize for now. if (VT.isVector() && !LegalOperations) { - EVT N0VT = N0.getOperand(0).getValueType(); - // We know that the # elements of the results is the same as the - // # elements of the compare (and the # elements of the compare result - // for that matter). Check to see that they are the same size. If so, - // we know that the element size of the sext'd result matches the - // element size of the compare operands. - if (VT.getSizeInBits() == N0VT.getSizeInBits()) + EVT N00VT = N0.getOperand(0).getValueType(); + if (getSetCCResultType(N00VT) == N0.getValueType()) + return SDValue(); + + // We know that the # elements of the results is the same as the + // # elements of the compare (and the # elements of the compare result + // for that matter). Check to see that they are the same size. If so, + // we know that the element size of the sext'd result matches the + // element size of the compare operands. + if (VT.getSizeInBits() == N00VT.getSizeInBits()) return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), cast<CondCodeSDNode>(N0.getOperand(2))->get()); @@ -7783,7 +8138,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // elements we can use a matching integer vector type and then // truncate/any extend else { - EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger(); + EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger(); SDValue VsetCC = DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0), N0.getOperand(1), @@ -7804,77 +8159,47 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { return SDValue(); } -SDValue DAGCombiner::visitAssertZext(SDNode *N) { +SDValue DAGCombiner::visitAssertExt(SDNode *N) { + unsigned Opcode = N->getOpcode(); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - EVT EVT = cast<VTSDNode>(N1)->getVT(); + EVT AssertVT = cast<VTSDNode>(N1)->getVT(); - // fold (assertzext (assertzext x, vt), vt) -> (assertzext x, vt) - if (N0.getOpcode() == ISD::AssertZext && - EVT == cast<VTSDNode>(N0.getOperand(1))->getVT()) + // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt) + if (N0.getOpcode() == Opcode && + AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT()) return N0; - return SDValue(); -} + if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && + N0.getOperand(0).getOpcode() == Opcode) { + // We have an assert, truncate, assert sandwich. Make one stronger assert + // by asserting on the smallest asserted type to the larger source type. + // This eliminates the later assert: + // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN + // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN + SDValue BigA = N0.getOperand(0); + EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT(); + assert(BigA_AssertVT.bitsLE(N0.getValueType()) && + "Asserting zero/sign-extended bits to a type larger than the " + "truncated destination does not provide information"); -/// See if the specified operand can be simplified with the knowledge that only -/// the bits specified by Mask are used. If so, return the simpler operand, -/// otherwise return a null SDValue. -/// -/// (This exists alongside SimplifyDemandedBits because GetDemandedBits can -/// simplify nodes with multiple uses more aggressively.) -SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { - switch (V.getOpcode()) { - default: break; - case ISD::Constant: { - const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode()); - assert(CV && "Const value should be ConstSDNode."); - const APInt &CVal = CV->getAPIntValue(); - APInt NewVal = CVal & Mask; - if (NewVal != CVal) - return DAG.getConstant(NewVal, SDLoc(V), V.getValueType()); - break; + SDLoc DL(N); + EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT; + SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT); + SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(), + BigA.getOperand(0), MinAssertVTVal); + return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert); } - case ISD::OR: - case ISD::XOR: - // If the LHS or RHS don't contribute bits to the or, drop them. - if (DAG.MaskedValueIsZero(V.getOperand(0), Mask)) - return V.getOperand(1); - if (DAG.MaskedValueIsZero(V.getOperand(1), Mask)) - return V.getOperand(0); - break; - case ISD::SRL: - // Only look at single-use SRLs. - if (!V.getNode()->hasOneUse()) - break; - if (ConstantSDNode *RHSC = getAsNonOpaqueConstant(V.getOperand(1))) { - // See if we can recursively simplify the LHS. - unsigned Amt = RHSC->getZExtValue(); - // Watch out for shift count overflow though. - if (Amt >= Mask.getBitWidth()) break; - APInt NewMask = Mask << Amt; - if (SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask)) - return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(), - SimplifyLHS, V.getOperand(1)); - } - break; - case ISD::AND: { - // X & -1 -> X (ignoring bits which aren't demanded). - ConstantSDNode *AndVal = isConstOrConstSplat(V.getOperand(1)); - if (AndVal && (AndVal->getAPIntValue() & Mask) == Mask) - return V.getOperand(0); - break; - } - } return SDValue(); } /// If the result of a wider load is shifted to right of N bits and then /// truncated to a narrower type and where N is a multiple of number of bits of /// the narrower type, transform it to a narrower load from address + N / num of -/// bits of new type. If the result is to be extended, also fold the extension -/// to form a extending load. +/// bits of new type. Also narrow the load if the result is masked with an AND +/// to effectively produce a smaller type. If the result is to be extended, also +/// fold the extension to form a extending load. SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { unsigned Opc = N->getOpcode(); @@ -7893,28 +8218,40 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { ExtType = ISD::SEXTLOAD; ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT(); } else if (Opc == ISD::SRL) { - // Another special-case: SRL is basically zero-extending a narrower value. + // Another special-case: SRL is basically zero-extending a narrower value, + // or it maybe shifting a higher subword, half or byte into the lowest + // bits. ExtType = ISD::ZEXTLOAD; N0 = SDValue(N, 0); - ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1)); - if (!N01) return SDValue(); - ExtVT = EVT::getIntegerVT(*DAG.getContext(), - VT.getSizeInBits() - N01->getZExtValue()); - } - if (LegalOperations && !TLI.isLoadExtLegal(ExtType, VT, ExtVT)) - return SDValue(); - unsigned EVTBits = ExtVT.getSizeInBits(); + auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0)); + auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (!N01 || !LN0) + return SDValue(); - // Do not generate loads of non-round integer types since these can - // be expensive (and would be wrong if the type is not byte sized). - if (!ExtVT.isRound()) - return SDValue(); + uint64_t ShiftAmt = N01->getZExtValue(); + uint64_t MemoryWidth = LN0->getMemoryVT().getSizeInBits(); + if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt) + ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt); + else + ExtVT = EVT::getIntegerVT(*DAG.getContext(), + VT.getSizeInBits() - ShiftAmt); + } else if (Opc == ISD::AND) { + // An AND with a constant mask is the same as a truncate + zero-extend. + auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1)); + if (!AndC || !AndC->getAPIntValue().isMask()) + return SDValue(); + + unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes(); + ExtType = ISD::ZEXTLOAD; + ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); + } unsigned ShAmt = 0; if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { ShAmt = N01->getZExtValue(); + unsigned EVTBits = ExtVT.getSizeInBits(); // Is the shift amount a multiple of size of VT? if ((ShAmt & (EVTBits-1)) == 0) { N0 = N0.getOperand(0); @@ -7951,42 +8288,12 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { } } - // If we haven't found a load, we can't narrow it. Don't transform one with - // multiple uses, this would require adding a new load. - if (!isa<LoadSDNode>(N0) || !N0.hasOneUse()) + // If we haven't found a load, we can't narrow it. + if (!isa<LoadSDNode>(N0)) return SDValue(); - // Don't change the width of a volatile load. LoadSDNode *LN0 = cast<LoadSDNode>(N0); - if (LN0->isVolatile()) - return SDValue(); - - // Verify that we are actually reducing a load width here. - if (LN0->getMemoryVT().getSizeInBits() < EVTBits) - return SDValue(); - - // For the transform to be legal, the load must produce only two values - // (the value loaded and the chain). Don't transform a pre-increment - // load, for example, which produces an extra value. Otherwise the - // transformation is not equivalent, and the downstream logic to replace - // uses gets things wrong. - if (LN0->getNumValues() > 2) - return SDValue(); - - // If the load that we're shrinking is an extload and we're not just - // discarding the extension we can't simply shrink the load. Bail. - // TODO: It would be possible to merge the extensions in some cases. - if (LN0->getExtensionType() != ISD::NON_EXTLOAD && - LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt) - return SDValue(); - - if (!TLI.shouldReduceLoadWidth(LN0, ExtType, ExtVT)) - return SDValue(); - - EVT PtrType = N0.getOperand(1).getValueType(); - - if (PtrType == MVT::Untyped || PtrType.isExtended()) - // It's not possible to generate a constant of extended or untyped type. + if (!isLegalNarrowLoad(LN0, ExtType, ExtVT, ShAmt)) return SDValue(); // For big endian targets, we need to adjust the offset to the pointer to @@ -7997,6 +8304,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { ShAmt = LVTStoreBits - EVTStoreBits - ShAmt; } + EVT PtrType = N0.getOperand(1).getValueType(); uint64_t PtrOff = ShAmt / 8; unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); SDLoc DL(LN0); @@ -8130,10 +8438,14 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { } // fold (sext_inreg (extload x)) -> (sextload x) + // If sextload is not supported by target, we can only do the combine when + // load has one use. Doing otherwise can block folding the extload with other + // extends that the target does support. if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && EVT == cast<LoadSDNode>(N0)->getMemoryVT() && - ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || + ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile() && + N0.hasOneUse()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, @@ -8208,12 +8520,18 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // noop truncate if (N0.getValueType() == N->getValueType(0)) return N0; - // fold (truncate c1) -> c1 - if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) - return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0); + // fold (truncate (truncate x)) -> (truncate x) if (N0.getOpcode() == ISD::TRUNCATE) return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0)); + + // fold (truncate c1) -> c1 + if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) { + SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0); + if (C.getNode() != N) + return C; + } + // fold (truncate (ext x)) -> (ext x) or (truncate x) or x if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND || @@ -8245,7 +8563,6 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // we need to be more careful about the vector instructions that we generate. if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) { - EVT VecTy = N0.getOperand(0).getValueType(); EVT ExTy = N0.getValueType(); EVT TrTy = N->getValueType(0); @@ -8311,7 +8628,6 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() && N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR && N0.getOperand(0).hasOneUse()) { - SDValue BuildVect = N0.getOperand(0); EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType(); EVT TruncVecEltTy = VT.getVectorElementType(); @@ -8340,9 +8656,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // Currently we only perform this optimization on scalars because vectors // may have different active low bits. if (!VT.isVector()) { - if (SDValue Shorter = - GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(), - VT.getSizeInBits()))) + APInt Mask = + APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits()); + if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask)) return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter); } @@ -8413,7 +8729,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // Fold truncate of a bitcast of a vector to an extract of the low vector // element. // - // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, 0 + // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) { SDValue VecSrc = N0.getOperand(0); EVT SrcVT = VecSrc.getValueType(); @@ -8423,8 +8739,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SDLoc SL(N); EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout()); + unsigned Idx = isLE ? 0 : SrcVT.getVectorNumElements() - 1; return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, - VecSrc, DAG.getConstant(0, SL, IdxVT)); + VecSrc, DAG.getConstant(Idx, SL, IdxVT)); } } @@ -8466,11 +8783,18 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0)); LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1)); + + // A BUILD_PAIR is always having the least significant part in elt 0 and the + // most significant part in elt 1. So when combining into one large load, we + // need to consider the endianness. + if (DAG.getDataLayout().isBigEndian()) + std::swap(LD1, LD2); + if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() || LD1->getAddressSpace() != LD2->getAddressSpace()) return SDValue(); EVT LD1VT = LD1->getValueType(0); - unsigned LD1Bytes = LD1VT.getSizeInBits() / 8; + unsigned LD1Bytes = LD1VT.getStoreSize(); if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() && DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) { unsigned Align = LD1->getAlignment(); @@ -8751,12 +9075,15 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { if (Op.getOpcode() == ISD::BITCAST && Op.getOperand(0).getValueType() == VT) return SDValue(Op.getOperand(0)); - if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) || + if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) || ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())) return DAG.getBitcast(VT, Op); return SDValue(); }; + // FIXME: If either input vector is bitcast, try to convert the shuffle to + // the result type of this bitcast. This would eliminate at least one + // bitcast. See the transform in InstCombine. SDValue SV0 = PeekThroughBitcast(N0->getOperand(0)); SDValue SV1 = PeekThroughBitcast(N0->getOperand(1)); if (!(SV0 && SV1)) @@ -8949,7 +9276,6 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { // Always prefer FMAD to FMA for precision. unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; bool Aggressive = TLI.enableAggressiveFMAFusion(VT); - bool LookThroughFPExt = TLI.isFPExtFree(VT); // Is the node an FMUL and contractable either due to global flags or // SDNodeFlags. @@ -8979,28 +9305,31 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { } // Look through FP_EXTEND nodes to do more combining. - if (LookThroughFPExt) { - // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) - if (N0.getOpcode() == ISD::FP_EXTEND) { - SDValue N00 = N0.getOperand(0); - if (isContractableFMUL(N00)) - return DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N00.getOperand(0)), - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N00.getOperand(1)), N1); + + // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (isContractableFMUL(N00) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N00.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N00.getOperand(1)), N1); } + } - // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x) - // Note: Commutes FADD operands. - if (N1.getOpcode() == ISD::FP_EXTEND) { - SDValue N10 = N1.getOperand(0); - if (isContractableFMUL(N10)) - return DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N10.getOperand(0)), - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N10.getOperand(1)), N0); + // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x) + // Note: Commutes FADD operands. + if (N1.getOpcode() == ISD::FP_EXTEND) { + SDValue N10 = N1.getOperand(0); + if (isContractableFMUL(N10) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N10.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N10.getOperand(1)), N0); } } @@ -9036,80 +9365,87 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { N0)); } - if (LookThroughFPExt) { - // fold (fadd (fma x, y, (fpext (fmul u, v))), z) - // -> (fma x, y, (fma (fpext u), (fpext v), z)) - auto FoldFAddFMAFPExtFMul = [&] ( - SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) { - return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y, - DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, U), - DAG.getNode(ISD::FP_EXTEND, SL, VT, V), - Z)); - }; - if (N0.getOpcode() == PreferredFusedOpcode) { - SDValue N02 = N0.getOperand(2); - if (N02.getOpcode() == ISD::FP_EXTEND) { - SDValue N020 = N02.getOperand(0); - if (isContractableFMUL(N020)) - return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1), - N020.getOperand(0), N020.getOperand(1), - N1); + + // fold (fadd (fma x, y, (fpext (fmul u, v))), z) + // -> (fma x, y, (fma (fpext u), (fpext v), z)) + auto FoldFAddFMAFPExtFMul = [&] ( + SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y, + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, U), + DAG.getNode(ISD::FP_EXTEND, SL, VT, V), + Z)); + }; + if (N0.getOpcode() == PreferredFusedOpcode) { + SDValue N02 = N0.getOperand(2); + if (N02.getOpcode() == ISD::FP_EXTEND) { + SDValue N020 = N02.getOperand(0); + if (isContractableFMUL(N020) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) { + return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1), + N020.getOperand(0), N020.getOperand(1), + N1); } } + } - // fold (fadd (fpext (fma x, y, (fmul u, v))), z) - // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z)) - // FIXME: This turns two single-precision and one double-precision - // operation into two double-precision operations, which might not be - // interesting for all targets, especially GPUs. - auto FoldFAddFPExtFMAFMul = [&] ( - SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) { - return DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, X), - DAG.getNode(ISD::FP_EXTEND, SL, VT, Y), - DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, U), - DAG.getNode(ISD::FP_EXTEND, SL, VT, V), - Z)); - }; - if (N0.getOpcode() == ISD::FP_EXTEND) { - SDValue N00 = N0.getOperand(0); - if (N00.getOpcode() == PreferredFusedOpcode) { - SDValue N002 = N00.getOperand(2); - if (isContractableFMUL(N002)) - return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1), - N002.getOperand(0), N002.getOperand(1), - N1); + // fold (fadd (fpext (fma x, y, (fmul u, v))), z) + // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z)) + // FIXME: This turns two single-precision and one double-precision + // operation into two double-precision operations, which might not be + // interesting for all targets, especially GPUs. + auto FoldFAddFPExtFMAFMul = [&] ( + SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, X), + DAG.getNode(ISD::FP_EXTEND, SL, VT, Y), + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, U), + DAG.getNode(ISD::FP_EXTEND, SL, VT, V), + Z)); + }; + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == PreferredFusedOpcode) { + SDValue N002 = N00.getOperand(2); + if (isContractableFMUL(N002) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) { + return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1), + N002.getOperand(0), N002.getOperand(1), + N1); } } + } - // fold (fadd x, (fma y, z, (fpext (fmul u, v))) - // -> (fma y, z, (fma (fpext u), (fpext v), x)) - if (N1.getOpcode() == PreferredFusedOpcode) { - SDValue N12 = N1.getOperand(2); - if (N12.getOpcode() == ISD::FP_EXTEND) { - SDValue N120 = N12.getOperand(0); - if (isContractableFMUL(N120)) - return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1), - N120.getOperand(0), N120.getOperand(1), - N0); + // fold (fadd x, (fma y, z, (fpext (fmul u, v))) + // -> (fma y, z, (fma (fpext u), (fpext v), x)) + if (N1.getOpcode() == PreferredFusedOpcode) { + SDValue N12 = N1.getOperand(2); + if (N12.getOpcode() == ISD::FP_EXTEND) { + SDValue N120 = N12.getOperand(0); + if (isContractableFMUL(N120) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) { + return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1), + N120.getOperand(0), N120.getOperand(1), + N0); } } + } - // fold (fadd x, (fpext (fma y, z, (fmul u, v))) - // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x)) - // FIXME: This turns two single-precision and one double-precision - // operation into two double-precision operations, which might not be - // interesting for all targets, especially GPUs. - if (N1.getOpcode() == ISD::FP_EXTEND) { - SDValue N10 = N1.getOperand(0); - if (N10.getOpcode() == PreferredFusedOpcode) { - SDValue N102 = N10.getOperand(2); - if (isContractableFMUL(N102)) - return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1), - N102.getOperand(0), N102.getOperand(1), - N0); + // fold (fadd x, (fpext (fma y, z, (fmul u, v))) + // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x)) + // FIXME: This turns two single-precision and one double-precision + // operation into two double-precision operations, which might not be + // interesting for all targets, especially GPUs. + if (N1.getOpcode() == ISD::FP_EXTEND) { + SDValue N10 = N1.getOperand(0); + if (N10.getOpcode() == PreferredFusedOpcode) { + SDValue N102 = N10.getOperand(2); + if (isContractableFMUL(N102) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) { + return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1), + N102.getOperand(0), N102.getOperand(1), + N0); } } } @@ -9151,7 +9487,6 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { // Always prefer FMAD to FMA for precision. unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; bool Aggressive = TLI.enableAggressiveFMAFusion(VT); - bool LookThroughFPExt = TLI.isFPExtFree(VT); // Is the node an FMUL and contractable either due to global flags or // SDNodeFlags. @@ -9187,79 +9522,83 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { } // Look through FP_EXTEND nodes to do more combining. - if (LookThroughFPExt) { - // fold (fsub (fpext (fmul x, y)), z) - // -> (fma (fpext x), (fpext y), (fneg z)) - if (N0.getOpcode() == ISD::FP_EXTEND) { - SDValue N00 = N0.getOperand(0); - if (isContractableFMUL(N00)) - return DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N00.getOperand(0)), - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N00.getOperand(1)), - DAG.getNode(ISD::FNEG, SL, VT, N1)); + + // fold (fsub (fpext (fmul x, y)), z) + // -> (fma (fpext x), (fpext y), (fneg z)) + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (isContractableFMUL(N00) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N00.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N00.getOperand(1)), + DAG.getNode(ISD::FNEG, SL, VT, N1)); } + } - // fold (fsub x, (fpext (fmul y, z))) - // -> (fma (fneg (fpext y)), (fpext z), x) - // Note: Commutes FSUB operands. - if (N1.getOpcode() == ISD::FP_EXTEND) { - SDValue N10 = N1.getOperand(0); - if (isContractableFMUL(N10)) - return DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FNEG, SL, VT, + // fold (fsub x, (fpext (fmul y, z))) + // -> (fma (fneg (fpext y)), (fpext z), x) + // Note: Commutes FSUB operands. + if (N1.getOpcode() == ISD::FP_EXTEND) { + SDValue N10 = N1.getOperand(0); + if (isContractableFMUL(N10) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N10.getOperand(0))), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N10.getOperand(1)), + N0); + } + } + + // fold (fsub (fpext (fneg (fmul, x, y))), z) + // -> (fneg (fma (fpext x), (fpext y), z)) + // Note: This could be removed with appropriate canonicalization of the + // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the + // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent + // from implementing the canonicalization in visitFSUB. + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FNEG) { + SDValue N000 = N00.getOperand(0); + if (isContractableFMUL(N000) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) { + return DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, - N10.getOperand(0))), - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N10.getOperand(1)), - N0); - } - - // fold (fsub (fpext (fneg (fmul, x, y))), z) - // -> (fneg (fma (fpext x), (fpext y), z)) - // Note: This could be removed with appropriate canonicalization of the - // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the - // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent - // from implementing the canonicalization in visitFSUB. - if (N0.getOpcode() == ISD::FP_EXTEND) { - SDValue N00 = N0.getOperand(0); - if (N00.getOpcode() == ISD::FNEG) { - SDValue N000 = N00.getOperand(0); - if (isContractableFMUL(N000)) { - return DAG.getNode(ISD::FNEG, SL, VT, - DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N000.getOperand(0)), - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N000.getOperand(1)), - N1)); - } + N000.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N000.getOperand(1)), + N1)); } } + } - // fold (fsub (fneg (fpext (fmul, x, y))), z) - // -> (fneg (fma (fpext x)), (fpext y), z) - // Note: This could be removed with appropriate canonicalization of the - // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the - // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent - // from implementing the canonicalization in visitFSUB. - if (N0.getOpcode() == ISD::FNEG) { - SDValue N00 = N0.getOperand(0); - if (N00.getOpcode() == ISD::FP_EXTEND) { - SDValue N000 = N00.getOperand(0); - if (isContractableFMUL(N000)) { - return DAG.getNode(ISD::FNEG, SL, VT, - DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N000.getOperand(0)), - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N000.getOperand(1)), - N1)); - } + // fold (fsub (fneg (fpext (fmul, x, y))), z) + // -> (fneg (fma (fpext x)), (fpext y), z) + // Note: This could be removed with appropriate canonicalization of the + // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the + // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent + // from implementing the canonicalization in visitFSUB. + if (N0.getOpcode() == ISD::FNEG) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FP_EXTEND) { + SDValue N000 = N00.getOperand(0); + if (isContractableFMUL(N000) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N000.getValueType())) { + return DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N000.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N000.getOperand(1)), + N1)); } } - } // More folding opportunities when target permits. @@ -9298,102 +9637,108 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { N21, N0)); } - if (LookThroughFPExt) { - // fold (fsub (fma x, y, (fpext (fmul u, v))), z) - // -> (fma x, y (fma (fpext u), (fpext v), (fneg z))) - if (N0.getOpcode() == PreferredFusedOpcode) { - SDValue N02 = N0.getOperand(2); - if (N02.getOpcode() == ISD::FP_EXTEND) { - SDValue N020 = N02.getOperand(0); - if (isContractableFMUL(N020)) - return DAG.getNode(PreferredFusedOpcode, SL, VT, - N0.getOperand(0), N0.getOperand(1), - DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N020.getOperand(0)), - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N020.getOperand(1)), - DAG.getNode(ISD::FNEG, SL, VT, - N1))); - } - } - // fold (fsub (fpext (fma x, y, (fmul u, v))), z) - // -> (fma (fpext x), (fpext y), - // (fma (fpext u), (fpext v), (fneg z))) - // FIXME: This turns two single-precision and one double-precision - // operation into two double-precision operations, which might not be - // interesting for all targets, especially GPUs. - if (N0.getOpcode() == ISD::FP_EXTEND) { - SDValue N00 = N0.getOperand(0); - if (N00.getOpcode() == PreferredFusedOpcode) { - SDValue N002 = N00.getOperand(2); - if (isContractableFMUL(N002)) - return DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N00.getOperand(0)), - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N00.getOperand(1)), - DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N002.getOperand(0)), - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N002.getOperand(1)), - DAG.getNode(ISD::FNEG, SL, VT, - N1))); - } - } - - // fold (fsub x, (fma y, z, (fpext (fmul u, v)))) - // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x)) - if (N1.getOpcode() == PreferredFusedOpcode && - N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) { - SDValue N120 = N1.getOperand(2).getOperand(0); - if (isContractableFMUL(N120)) { - SDValue N1200 = N120.getOperand(0); - SDValue N1201 = N120.getOperand(1); + // fold (fsub (fma x, y, (fpext (fmul u, v))), z) + // -> (fma x, y (fma (fpext u), (fpext v), (fneg z))) + if (N0.getOpcode() == PreferredFusedOpcode) { + SDValue N02 = N0.getOperand(2); + if (N02.getOpcode() == ISD::FP_EXTEND) { + SDValue N020 = N02.getOperand(0); + if (isContractableFMUL(N020) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) { return DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), - N1.getOperand(1), + N0.getOperand(0), N0.getOperand(1), DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FNEG, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, - VT, N1200)), DAG.getNode(ISD::FP_EXTEND, SL, VT, - N1201), - N0)); + N020.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N020.getOperand(1)), + DAG.getNode(ISD::FNEG, SL, VT, + N1))); } } + } - // fold (fsub x, (fpext (fma y, z, (fmul u, v)))) - // -> (fma (fneg (fpext y)), (fpext z), - // (fma (fneg (fpext u)), (fpext v), x)) - // FIXME: This turns two single-precision and one double-precision - // operation into two double-precision operations, which might not be - // interesting for all targets, especially GPUs. - if (N1.getOpcode() == ISD::FP_EXTEND && - N1.getOperand(0).getOpcode() == PreferredFusedOpcode) { - SDValue N100 = N1.getOperand(0).getOperand(0); - SDValue N101 = N1.getOperand(0).getOperand(1); - SDValue N102 = N1.getOperand(0).getOperand(2); - if (isContractableFMUL(N102)) { - SDValue N1020 = N102.getOperand(0); - SDValue N1021 = N102.getOperand(1); + // fold (fsub (fpext (fma x, y, (fmul u, v))), z) + // -> (fma (fpext x), (fpext y), + // (fma (fpext u), (fpext v), (fneg z))) + // FIXME: This turns two single-precision and one double-precision + // operation into two double-precision operations, which might not be + // interesting for all targets, especially GPUs. + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == PreferredFusedOpcode) { + SDValue N002 = N00.getOperand(2); + if (isContractableFMUL(N002) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) { return DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FNEG, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, VT, - N100)), - DAG.getNode(ISD::FP_EXTEND, SL, VT, N101), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N00.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N00.getOperand(1)), DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FNEG, SL, VT, - DAG.getNode(ISD::FP_EXTEND, SL, - VT, N1020)), DAG.getNode(ISD::FP_EXTEND, SL, VT, - N1021), - N0)); + N002.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N002.getOperand(1)), + DAG.getNode(ISD::FNEG, SL, VT, + N1))); } } } + + // fold (fsub x, (fma y, z, (fpext (fmul u, v)))) + // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x)) + if (N1.getOpcode() == PreferredFusedOpcode && + N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) { + SDValue N120 = N1.getOperand(2).getOperand(0); + if (isContractableFMUL(N120) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) { + SDValue N1200 = N120.getOperand(0); + SDValue N1201 = N120.getOperand(1); + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), + N1.getOperand(1), + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, + VT, N1200)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N1201), + N0)); + } + } + + // fold (fsub x, (fpext (fma y, z, (fmul u, v)))) + // -> (fma (fneg (fpext y)), (fpext z), + // (fma (fneg (fpext u)), (fpext v), x)) + // FIXME: This turns two single-precision and one double-precision + // operation into two double-precision operations, which might not be + // interesting for all targets, especially GPUs. + if (N1.getOpcode() == ISD::FP_EXTEND && + N1.getOperand(0).getOpcode() == PreferredFusedOpcode) { + SDValue CvtSrc = N1.getOperand(0); + SDValue N100 = CvtSrc.getOperand(0); + SDValue N101 = CvtSrc.getOperand(1); + SDValue N102 = CvtSrc.getOperand(2); + if (isContractableFMUL(N102) && + TLI.isFPExtFoldable(PreferredFusedOpcode, VT, CvtSrc.getValueType())) { + SDValue N1020 = N102.getOperand(0); + SDValue N1021 = N102.getOperand(1); + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N100)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, N101), + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, + VT, N1020)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N1021), + N0)); + } + } } return SDValue(); @@ -9959,6 +10304,14 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { // TODO: The FMA node should have flags that propagate to this node. return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg); } + + // fma (fneg x), K, y -> fma x -K, y + if (N0.getOpcode() == ISD::FNEG && + (TLI.isOperationLegal(ISD::ConstantFP, VT) || + (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT)))) { + return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0), + DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2); + } } if (Options.UnsafeFPMath) { @@ -10081,8 +10434,8 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { (!LegalOperations || // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM // backend)... we should handle this gracefully after Legalize. - // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) || - TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) || + // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) || + TLI.isOperationLegal(ISD::ConstantFP, VT) || TLI.isFPImmLegal(Recip, VT))) return DAG.getNode(ISD::FMUL, DL, VT, N0, DAG.getConstantFP(Recip, DL, VT), Flags); @@ -10264,7 +10617,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && // ...but only if the target supports immediate floating-point values (!LegalOperations || - TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) + TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0); // If the input is a legal type, and SINT_TO_FP is not legal on this target, @@ -10282,7 +10635,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 && !VT.isVector() && (!LegalOperations || - TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { + TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) { SDLoc DL(N); SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1), @@ -10296,7 +10649,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() && (!LegalOperations || - TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { + TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) { SDLoc DL(N); SDValue Ops[] = { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1), @@ -10318,7 +10671,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && // ...but only if the target supports immediate floating-point values (!LegalOperations || - TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) + TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0); // If the input is a legal type, and UINT_TO_FP is not legal on this target, @@ -10333,10 +10686,9 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { // The next optimizations are desirable only if SELECT_CC can be lowered. if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) { // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) - if (N0.getOpcode() == ISD::SETCC && !VT.isVector() && (!LegalOperations || - TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { + TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) { SDLoc DL(N); SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1), @@ -10557,6 +10909,19 @@ SDValue DAGCombiner::visitFTRUNC(SDNode *N) { if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0); + // fold ftrunc (known rounded int x) -> x + // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is + // likely to be generated to extract integer from a rounded floating value. + switch (N0.getOpcode()) { + default: break; + case ISD::FRINT: + case ISD::FTRUNC: + case ISD::FNEARBYINT: + case ISD::FFLOOR: + case ISD::FCEIL: + return N0; + } + return SDValue(); } @@ -11160,6 +11525,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { // Replace the uses of Ptr with uses of the updated base value. DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0)); deleteAndRecombine(Ptr.getNode()); + AddToWorklist(Result.getNode()); return true; } @@ -11445,6 +11811,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { } namespace { + /// \brief Helper structure used to slice a load in smaller loads. /// Basically a slice is obtained from the following sequence: /// Origin = load Ty1, Base @@ -11462,21 +11829,19 @@ struct LoadedSlice { struct Cost { /// Are we optimizing for code size. bool ForCodeSize; + /// Various cost. - unsigned Loads; - unsigned Truncates; - unsigned CrossRegisterBanksCopies; - unsigned ZExts; - unsigned Shift; + unsigned Loads = 0; + unsigned Truncates = 0; + unsigned CrossRegisterBanksCopies = 0; + unsigned ZExts = 0; + unsigned Shift = 0; - Cost(bool ForCodeSize = false) - : ForCodeSize(ForCodeSize), Loads(0), Truncates(0), - CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {} + Cost(bool ForCodeSize = false) : ForCodeSize(ForCodeSize) {} /// \brief Get the cost of one isolated slice. Cost(const LoadedSlice &LS, bool ForCodeSize = false) - : ForCodeSize(ForCodeSize), Loads(1), Truncates(0), - CrossRegisterBanksCopies(0), ZExts(0), Shift(0) { + : ForCodeSize(ForCodeSize), Loads(1) { EVT TruncType = LS.Inst->getValueType(0); EVT LoadedType = LS.getLoadedType(); if (TruncType != LoadedType && @@ -11538,13 +11903,17 @@ struct LoadedSlice { bool operator>=(const Cost &RHS) const { return !(*this < RHS); } }; + // The last instruction that represent the slice. This should be a // truncate instruction. SDNode *Inst; + // The original load instruction. LoadSDNode *Origin; + // The right shift amount in bits from the original load. unsigned Shift; + // The DAG from which Origin came from. // This is used to get some contextual information about legal types, etc. SelectionDAG *DAG; @@ -11746,7 +12115,8 @@ struct LoadedSlice { return true; } }; -} + +} // end anonymous namespace /// \brief Check that all bits set in \p UsedBits form a dense region, i.e., /// \p UsedBits looks like 0..0 1..1 0..0. @@ -11804,7 +12174,6 @@ static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices, for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice, // Set the beginning of the pair. First = Second) { - Second = &LoadedSlices[CurrSlice]; // If First is NULL, it means we start a new pair. @@ -11935,7 +12304,7 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) { // will be across several bytes. We do not support that. unsigned Width = User->getValueSizeInBits(0); if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7)) - return 0; + return false; // Build the slice for this chain of computations. LoadedSlice LS(User, LD, Shift, &DAG); @@ -12060,7 +12429,6 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { return Result; } - /// Check to see if IVal is something that provides a value as specified by /// MaskInfo. If so, replace the specified store with a narrower store of /// truncated IVal. @@ -12121,7 +12489,6 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, .getNode(); } - /// Look for sequence of load / op / store where op is one of 'or', 'xor', and /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try /// narrowing the load and store if it would end up being a win for performance @@ -12325,7 +12692,6 @@ bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, // Walk all the users of the constant with which we're multiplying. for (SDNode *Use : ConstNode->uses()) { - if (Use == MulNode) // This use is the one we're on right now. Skip it. continue; @@ -12376,6 +12742,12 @@ bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, return false; } +static SDValue peekThroughBitcast(SDValue V) { + while (V.getOpcode() == ISD::BITCAST) + V = V.getOperand(0); + return V; +} + SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores) { SmallVector<SDValue, 8> Chains; @@ -12403,56 +12775,93 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( if (NumStores < 2) return false; - int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8; - // The latest Node in the DAG. SDLoc DL(StoreNodes[0].MemNode); - SDValue StoredVal; + int64_t ElementSizeBits = MemVT.getStoreSizeInBits(); + unsigned SizeInBits = NumStores * ElementSizeBits; + unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1; + + EVT StoreTy; if (UseVector) { - bool IsVec = MemVT.isVector(); - unsigned Elts = NumStores; - if (IsVec) { - // When merging vector stores, get the total number of elements. - Elts *= MemVT.getVectorNumElements(); - } + unsigned Elts = NumStores * NumMemElts; // Get the type for the merged vector store. - EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); - assert(TLI.isTypeLegal(Ty) && "Illegal vector store"); + StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); + } else + StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits); + SDValue StoredVal; + if (UseVector) { if (IsConstantSrc) { SmallVector<SDValue, 8> BuildVector; - for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) { + for (unsigned I = 0; I != NumStores; ++I) { StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode); SDValue Val = St->getValue(); - if (MemVT.getScalarType().isInteger()) - if (auto *CFP = dyn_cast<ConstantFPSDNode>(St->getValue())) - Val = DAG.getConstant( - (uint32_t)CFP->getValueAPF().bitcastToAPInt().getZExtValue(), - SDLoc(CFP), MemVT); + // If constant is of the wrong type, convert it now. + if (MemVT != Val.getValueType()) { + Val = peekThroughBitcast(Val); + // Deal with constants of wrong size. + if (ElementSizeBits != Val.getValueSizeInBits()) { + EVT IntMemVT = + EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()); + if (isa<ConstantFPSDNode>(Val)) { + // Not clear how to truncate FP values. + return false; + } else if (auto *C = dyn_cast<ConstantSDNode>(Val)) + Val = DAG.getConstant(C->getAPIntValue() + .zextOrTrunc(Val.getValueSizeInBits()) + .zextOrTrunc(ElementSizeBits), + SDLoc(C), IntMemVT); + } + // Make sure correctly size type is the correct type. + Val = DAG.getBitcast(MemVT, Val); + } BuildVector.push_back(Val); } - StoredVal = DAG.getBuildVector(Ty, DL, BuildVector); + StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS + : ISD::BUILD_VECTOR, + DL, StoreTy, BuildVector); } else { SmallVector<SDValue, 8> Ops; for (unsigned i = 0; i < NumStores; ++i) { StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); - SDValue Val = St->getValue(); - // All operands of BUILD_VECTOR / CONCAT_VECTOR must have the same type. - if (Val.getValueType() != MemVT) - return false; + SDValue Val = peekThroughBitcast(St->getValue()); + // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of + // type MemVT. If the underlying value is not the correct + // type, but it is an extraction of an appropriate vector we + // can recast Val to be of the correct type. This may require + // converting between EXTRACT_VECTOR_ELT and + // EXTRACT_SUBVECTOR. + if ((MemVT != Val.getValueType()) && + (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT || + Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) { + SDValue Vec = Val.getOperand(0); + EVT MemVTScalarTy = MemVT.getScalarType(); + // We may need to add a bitcast here to get types to line up. + if (MemVTScalarTy != Vec.getValueType()) { + unsigned Elts = Vec.getValueType().getSizeInBits() / + MemVTScalarTy.getSizeInBits(); + EVT NewVecTy = + EVT::getVectorVT(*DAG.getContext(), MemVTScalarTy, Elts); + Vec = DAG.getBitcast(NewVecTy, Vec); + } + auto OpC = (MemVT.isVector()) ? ISD::EXTRACT_SUBVECTOR + : ISD::EXTRACT_VECTOR_ELT; + Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Val.getOperand(1)); + } Ops.push_back(Val); } // Build the extracted vector elements back into a vector. - StoredVal = DAG.getNode(IsVec ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, - DL, Ty, Ops); } + StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS + : ISD::BUILD_VECTOR, + DL, StoreTy, Ops); + } } else { // We should always use a vector store when merging extracted vector // elements, so this path implies a store of constants. assert(IsConstantSrc && "Merged vector elements should use vector store"); - unsigned SizeInBits = NumStores * ElementSizeBytes * 8; APInt StoreInt(SizeInBits, 0); // Construct a single integer constant which is made of the smaller @@ -12463,18 +12872,25 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode); SDValue Val = St->getValue(); - StoreInt <<= ElementSizeBytes * 8; + StoreInt <<= ElementSizeBits; if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) { - StoreInt |= C->getAPIntValue().zextOrTrunc(SizeInBits); + StoreInt |= C->getAPIntValue() + .zextOrTrunc(ElementSizeBits) + .zextOrTrunc(SizeInBits); } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) { - StoreInt |= C->getValueAPF().bitcastToAPInt().zextOrTrunc(SizeInBits); + StoreInt |= C->getValueAPF() + .bitcastToAPInt() + .zextOrTrunc(ElementSizeBits) + .zextOrTrunc(SizeInBits); + // If fp truncation is necessary give up for now. + if (MemVT.getSizeInBits() != ElementSizeBits) + return false; } else { llvm_unreachable("Invalid constant element type"); } } // Create the new Load and Store operations. - EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits); StoredVal = DAG.getConstant(StoreInt, DL, StoreTy); } @@ -12483,7 +12899,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( // make sure we use trunc store if it's necessary to be legal. SDValue NewStore; - if (UseVector || !UseTrunc) { + if (!UseTrunc) { NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), FirstInChain->getAlignment()); @@ -12517,6 +12933,7 @@ void DAGCombiner::getStoreMergeCandidates( BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG); EVT MemVT = St->getMemoryVT(); + SDValue Val = peekThroughBitcast(St->getValue()); // We must have a base and an offset. if (!BasePtr.getBase().getNode()) return; @@ -12525,47 +12942,62 @@ void DAGCombiner::getStoreMergeCandidates( if (BasePtr.getBase().isUndef()) return; - bool IsConstantSrc = isa<ConstantSDNode>(St->getValue()) || - isa<ConstantFPSDNode>(St->getValue()); - bool IsExtractVecSrc = - (St->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT || - St->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR); - bool IsLoadSrc = isa<LoadSDNode>(St->getValue()); + bool IsConstantSrc = isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val); + bool IsExtractVecSrc = (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT || + Val.getOpcode() == ISD::EXTRACT_SUBVECTOR); + bool IsLoadSrc = isa<LoadSDNode>(Val); BaseIndexOffset LBasePtr; // Match on loadbaseptr if relevant. - if (IsLoadSrc) - LBasePtr = BaseIndexOffset::match( - cast<LoadSDNode>(St->getValue())->getBasePtr(), DAG); - + EVT LoadVT; + if (IsLoadSrc) { + auto *Ld = cast<LoadSDNode>(Val); + LBasePtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG); + LoadVT = Ld->getMemoryVT(); + // Load and store should be the same type. + if (MemVT != LoadVT) + return; + } auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr, int64_t &Offset) -> bool { if (Other->isVolatile() || Other->isIndexed()) return false; - // We can merge constant floats to equivalent integers - if (Other->getMemoryVT() != MemVT) - if (!(MemVT.isInteger() && MemVT.bitsEq(Other->getMemoryVT()) && - isa<ConstantFPSDNode>(Other->getValue()))) - return false; + SDValue Val = peekThroughBitcast(Other->getValue()); + // Allow merging constants of different types as integers. + bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT()) + : Other->getMemoryVT() != MemVT; if (IsLoadSrc) { + if (NoTypeMatch) + return false; // The Load's Base Ptr must also match - if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Other->getValue())) { + if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Val)) { auto LPtr = BaseIndexOffset::match(OtherLd->getBasePtr(), DAG); + if (LoadVT != OtherLd->getMemoryVT()) + return false; if (!(LBasePtr.equalBaseIndex(LPtr, DAG))) return false; } else return false; } - if (IsConstantSrc) - if (!(isa<ConstantSDNode>(Other->getValue()) || - isa<ConstantFPSDNode>(Other->getValue()))) + if (IsConstantSrc) { + if (NoTypeMatch) return false; - if (IsExtractVecSrc) - if (!(Other->getValue().getOpcode() == ISD::EXTRACT_VECTOR_ELT || - Other->getValue().getOpcode() == ISD::EXTRACT_SUBVECTOR)) + if (!(isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val))) + return false; + } + if (IsExtractVecSrc) { + // Do not merge truncated stores here. + if (Other->isTruncatingStore()) return false; + if (!MemVT.bitsEq(Val.getValueType())) + return false; + if (Val.getOpcode() != ISD::EXTRACT_VECTOR_ELT && + Val.getOpcode() != ISD::EXTRACT_SUBVECTOR) + return false; + } Ptr = BaseIndexOffset::match(Other->getBasePtr(), DAG); return (BasePtr.equalBaseIndex(Ptr, DAG, Offset)); }; + // We looking for a root node which is an ancestor to all mergable // stores. We search up through a load, to our root and then down // through all children. For instance we will find Store{1,2,3} if @@ -12612,10 +13044,8 @@ void DAGCombiner::getStoreMergeCandidates( // indirectly through its operand (we already consider dependencies // through the chain). Check in parallel by searching up from // non-chain operands of candidates. - bool DAGCombiner::checkMergeStoreCandidatesForDependencies( SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores) { - // FIXME: We should be able to truncate a full search of // predecessors by doing a BFS and keeping tabs the originating // stores from which worklist nodes come from in a similar way to @@ -12648,12 +13078,13 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { return false; EVT MemVT = St->getMemoryVT(); - int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8; + int64_t ElementSizeBytes = MemVT.getStoreSize(); + unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1; if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits) return false; - bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute( + bool NoVectors = DAG.getMachineFunction().getFunction().hasFnAttribute( Attribute::NoImplicitFloat); // This function cannot currently deal with non-byte-sized memory sizes. @@ -12665,7 +13096,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { // Perform an early exit check. Do not bother looking at stored values that // are not constants, loads, or extracted vector elements. - SDValue StoredVal = St->getValue(); + SDValue StoredVal = peekThroughBitcast(St->getValue()); bool IsLoadSrc = isa<LoadSDNode>(StoredVal); bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) || isa<ConstantFPSDNode>(StoredVal); @@ -12675,12 +13106,6 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc) return false; - // Don't merge vectors into wider vectors if the source data comes from loads. - // TODO: This restriction can be lifted by using logic similar to the - // ExtractVecSrc case. - if (MemVT.isVector() && IsLoadSrc) - return false; - SmallVector<MemOpLink, 8> StoreNodes; // Find potential store merge candidates by searching through chain sub-DAG getStoreMergeCandidates(St, StoreNodes); @@ -12759,19 +13184,20 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { unsigned LastLegalVectorType = 1; bool LastIntegerTrunc = false; bool NonZero = false; + unsigned FirstZeroAfterNonZero = NumConsecutiveStores; for (unsigned i = 0; i < NumConsecutiveStores; ++i) { StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode); SDValue StoredVal = ST->getValue(); - - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) { - NonZero |= !C->isNullValue(); - } else if (ConstantFPSDNode *C = - dyn_cast<ConstantFPSDNode>(StoredVal)) { - NonZero |= !C->getConstantFPValue()->isNullValue(); - } else { - // Non-constant. - break; + bool IsElementZero = false; + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) + IsElementZero = C->isNullValue(); + else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) + IsElementZero = C->getConstantFPValue()->isNullValue(); + if (IsElementZero) { + if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores) + FirstZeroAfterNonZero = i; } + NonZero |= !IsElementZero; // Find a legal type for the constant store. unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; @@ -12791,8 +13217,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { TLI.getTypeToTransformTo(Context, StoredVal.getValueType()); if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) && - TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, - FirstStoreAS, FirstStoreAlign, &IsFast) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, + FirstStoreAlign, &IsFast) && IsFast) { LastIntegerTrunc = true; LastLegalType = i + 1; @@ -12806,13 +13232,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) && !NoVectors) { // Find a legal type for the vector store. - unsigned Elts = i + 1; - if (MemVT.isVector()) { - // When merging vector stores, get the total number of elements. - Elts *= MemVT.getVectorNumElements(); - } + unsigned Elts = (i + 1) * NumMemElts; EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); - if (TLI.isTypeLegal(Ty) && + if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) && TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, FirstStoreAlign, &IsFast) && @@ -12821,23 +13243,34 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { } } + bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors; + unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType; + // Check if we found a legal integer type that creates a meaningful merge. - if (LastLegalType < 2 && LastLegalVectorType < 2) { - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1); + if (NumElem < 2) { + // We know that candidate stores are in order and of correct + // shape. While there is no mergeable sequence from the + // beginning one may start later in the sequence. The only + // reason a merge of size N could have failed where another of + // the same size would not have, is if the alignment has + // improved or we've dropped a non-zero value. Drop as many + // candidates as we can here. + unsigned NumSkip = 1; + while ( + (NumSkip < NumConsecutiveStores) && + (NumSkip < FirstZeroAfterNonZero) && + (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) { + NumSkip++; + } + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); continue; } - bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors; - unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType; - bool Merged = MergeStoresOfConstantsOrVecElts( StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc); - if (!Merged) { - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); - continue; - } + RV |= Merged; + // Remove merged stores for next iteration. - RV = true; StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); continue; } @@ -12849,25 +13282,20 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { unsigned FirstStoreAS = FirstInChain->getAddressSpace(); unsigned FirstStoreAlign = FirstInChain->getAlignment(); unsigned NumStoresToMerge = 1; - bool IsVec = MemVT.isVector(); for (unsigned i = 0; i < NumConsecutiveStores; ++i) { StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); - unsigned StoreValOpcode = St->getValue().getOpcode(); + SDValue StVal = peekThroughBitcast(St->getValue()); // This restriction could be loosened. // Bail out if any stored values are not elements extracted from a // vector. It should be possible to handle mixed sources, but load // sources need more careful handling (see the block of code below that // handles consecutive loads). - if (StoreValOpcode != ISD::EXTRACT_VECTOR_ELT && - StoreValOpcode != ISD::EXTRACT_SUBVECTOR) + if (StVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT && + StVal.getOpcode() != ISD::EXTRACT_SUBVECTOR) return RV; // Find a legal type for the vector store. - unsigned Elts = i + 1; - if (IsVec) { - // When merging vector stores, get the total number of elements. - Elts *= MemVT.getVectorNumElements(); - } + unsigned Elts = (i + 1) * NumMemElts; EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); bool IsFast; @@ -12879,6 +13307,23 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { NumStoresToMerge = i + 1; } + // Check if we found a legal integer type that creates a meaningful merge. + if (NumStoresToMerge < 2) { + // We know that candidate stores are in order and of correct + // shape. While there is no mergeable sequence from the + // beginning one may start later in the sequence. The only + // reason a merge of size N could have failed where another of + // the same size would not have, is if the alignment has + // improved. Drop as many candidates as we can here. + unsigned NumSkip = 1; + while ((NumSkip < NumConsecutiveStores) && + (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) + NumSkip++; + + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); + continue; + } + bool Merged = MergeStoresOfConstantsOrVecElts( StoreNodes, MemVT, NumStoresToMerge, false, true, false); if (!Merged) { @@ -12905,7 +13350,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { BaseIndexOffset LdBasePtr; for (unsigned i = 0; i < NumConsecutiveStores; ++i) { StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); - LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue()); + SDValue Val = peekThroughBitcast(St->getValue()); + LoadSDNode *Ld = dyn_cast<LoadSDNode>(Val); if (!Ld) break; @@ -12917,10 +13363,6 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { if (Ld->isVolatile() || Ld->isIndexed()) break; - // We do not accept ext loads. - if (Ld->getExtensionType() != ISD::NON_EXTLOAD) - break; - // The stored memory type must be the same. if (Ld->getMemoryVT() != MemVT) break; @@ -12986,7 +13428,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { isDereferenceable = false; // Find a legal type for the vector store. - EVT StoreTy = EVT::getVectorVT(Context, MemVT, i + 1); + unsigned Elts = (i + 1) * NumMemElts; + EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); + bool IsFastSt, IsFastLd; if (TLI.isTypeLegal(StoreTy) && TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && @@ -13023,8 +13467,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) && TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) && - TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, - FirstStoreAS, FirstStoreAlign, &IsFastSt) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, + FirstStoreAlign, &IsFastSt) && IsFastSt && TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, FirstLoadAlign, &IsFastLd) && @@ -13047,7 +13491,19 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { NumElem = std::min(LastLegalType, NumElem); if (NumElem < 2) { - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1); + // We know that candidate stores are in order and of correct + // shape. While there is no mergeable sequence from the + // beginning one may start later in the sequence. The only + // reason a merge of size N could have failed where another of + // the same size would not have is if the alignment or either + // the load or store has improved. Drop as many candidates as we + // can here. + unsigned NumSkip = 1; + while ((NumSkip < LoadNodes.size()) && + (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) && + (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) + NumSkip++; + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); continue; } @@ -13055,7 +13511,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { // to memory. EVT JointMemOpVT; if (UseVectorTy) { - JointMemOpVT = EVT::getVectorVT(Context, MemVT, NumElem); + // Find a legal type for the vector store. + unsigned Elts = NumElem * NumMemElts; + JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); } else { unsigned SizeInBits = NumElem * ElementSizeBytes * 8; JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits); @@ -13104,12 +13562,17 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { SDValue(NewLoad.getNode(), 1)); } - // Replace the all stores with the new store. - for (unsigned i = 0; i < NumElem; ++i) + // Replace the all stores with the new store. Recursively remove + // corresponding value if its no longer used. + for (unsigned i = 0; i < NumElem; ++i) { + SDValue Val = StoreNodes[i].MemNode->getOperand(1); CombineTo(StoreNodes[i].MemNode, NewStore); + if (Val.getNode()->use_empty()) + recursivelyDeleteUnusedNodes(Val.getNode()); + } + RV = true; StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); - continue; } return RV; } @@ -13284,7 +13747,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // See if we can simplify the input to this truncstore with knowledge that // only the low bits are being used. For example: // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8" - SDValue Shorter = GetDemandedBits( + SDValue Shorter = DAG.GetDemandedBits( Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(), ST->getMemoryVT().getScalarSizeInBits())); AddToWorklist(Value.getNode()); @@ -13356,11 +13819,11 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { Ptr, ST->getMemoryVT(), ST->getMemOperand()); } - // Only perform this optimization before the types are legal, because we - // don't want to perform this optimization on every DAGCombine invocation. - if ((TLI.mergeStoresAfterLegalization()) ? Level == AfterLegalizeDAG - : !LegalTypes) { - for (;;) { + // Always perform this optimization before types are legal. If the target + // prefers, also try this after legalization to catch stores that were created + // by intrinsics or other nodes. + if (!LegalTypes || (TLI.mergeStoresAfterLegalization())) { + while (true) { // There can be multiple store sequences on the same chain. // Keep trying to merge store sequences until we are unable to do so // or until we merge the last store on the chain. @@ -13499,6 +13962,60 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) { return St1; } +/// Convert a disguised subvector insertion into a shuffle: +/// insert_vector_elt V, (bitcast X from vector type), IdxC --> +/// bitcast(shuffle (bitcast V), (extended X), Mask) +/// Note: We do not use an insert_subvector node because that requires a legal +/// subvector type. +SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) { + SDValue InsertVal = N->getOperand(1); + if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() || + !InsertVal.getOperand(0).getValueType().isVector()) + return SDValue(); + + SDValue SubVec = InsertVal.getOperand(0); + SDValue DestVec = N->getOperand(0); + EVT SubVecVT = SubVec.getValueType(); + EVT VT = DestVec.getValueType(); + unsigned NumSrcElts = SubVecVT.getVectorNumElements(); + unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits(); + unsigned NumMaskVals = ExtendRatio * NumSrcElts; + + // Step 1: Create a shuffle mask that implements this insert operation. The + // vector that we are inserting into will be operand 0 of the shuffle, so + // those elements are just 'i'. The inserted subvector is in the first + // positions of operand 1 of the shuffle. Example: + // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7} + SmallVector<int, 16> Mask(NumMaskVals); + for (unsigned i = 0; i != NumMaskVals; ++i) { + if (i / NumSrcElts == InsIndex) + Mask[i] = (i % NumSrcElts) + NumMaskVals; + else + Mask[i] = i; + } + + // Bail out if the target can not handle the shuffle we want to create. + EVT SubVecEltVT = SubVecVT.getVectorElementType(); + EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals); + if (!TLI.isShuffleMaskLegal(Mask, ShufVT)) + return SDValue(); + + // Step 2: Create a wide vector from the inserted source vector by appending + // undefined elements. This is the same size as our destination vector. + SDLoc DL(N); + SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT)); + ConcatOps[0] = SubVec; + SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps); + + // Step 3: Shuffle in the padded subvector. + SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec); + SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask); + AddToWorklist(PaddedSubV.getNode()); + AddToWorklist(DestVecBC.getNode()); + AddToWorklist(Shuf.getNode()); + return DAG.getBitcast(VT, Shuf); +} + SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { SDValue InVec = N->getOperand(0); SDValue InVal = N->getOperand(1); @@ -13511,10 +14028,20 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { EVT VT = InVec.getValueType(); - // Check that we know which element is being inserted - if (!isa<ConstantSDNode>(EltNo)) + // Remove redundant insertions: + // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x + if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1)) + return InVec; + + // We must know which element is being inserted for folds below here. + auto *IndexC = dyn_cast<ConstantSDNode>(EltNo); + if (!IndexC) return SDValue(); - unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); + unsigned Elt = IndexC->getZExtValue(); + + if (SDValue Shuf = combineInsertEltToShuffle(N, Elt)) + return Shuf; // Canonicalize insert_vector_elt dag nodes. // Example: @@ -13692,9 +14219,11 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // converts. } - // extract_vector_elt (v2i32 (bitcast i64:x)), 0 -> i32 (trunc i64:x) + // extract_vector_elt (v2i32 (bitcast i64:x)), EltTrunc -> i32 (trunc i64:x) + bool isLE = DAG.getDataLayout().isLittleEndian(); + unsigned EltTrunc = isLE ? 0 : VT.getVectorNumElements() - 1; if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST && InVec.hasOneUse() && - ConstEltNo->isNullValue() && VT.isInteger()) { + ConstEltNo->getZExtValue() == EltTrunc && VT.isInteger()) { SDValue BCSrc = InVec.getOperand(0); if (BCSrc.getValueType().isScalarInteger()) return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc); @@ -13748,7 +14277,10 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // FIXME: We should handle recursing on other vector shuffles and // scalar_to_vector here as well. - if (!LegalOperations) { + if (!LegalOperations || + // FIXME: Should really be just isOperationLegalOrCustom. + TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VT) || + TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VT)) { EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout()); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec, DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy)); @@ -14054,10 +14586,18 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N, EVT InVT1 = VecIn1.getValueType(); EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1; - unsigned Vec2Offset = InVT1.getVectorNumElements(); + unsigned Vec2Offset = 0; unsigned NumElems = VT.getVectorNumElements(); unsigned ShuffleNumElems = NumElems; + // In case both the input vectors are extracted from same base + // vector we do not need extra addend (Vec2Offset) while + // computing shuffle mask. + if (!VecIn2 || !(VecIn1.getOpcode() == ISD::EXTRACT_SUBVECTOR) || + !(VecIn2.getOpcode() == ISD::EXTRACT_SUBVECTOR) || + !(VecIn1.getOperand(0) == VecIn2.getOperand(0))) + Vec2Offset = InVT1.getVectorNumElements(); + // We can't generate a shuffle node with mismatched input and output types. // Try to make the types match the type of the output. if (InVT1 != VT || InVT2 != VT) { @@ -14072,7 +14612,7 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N, VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps); VecIn2 = SDValue(); } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) { - if (!TLI.isExtractSubvectorCheap(VT, NumElems)) + if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems)) return SDValue(); if (!VecIn2.getNode()) { @@ -14204,7 +14744,6 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) { if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT || !isa<ConstantSDNode>(Op.getOperand(1))) return SDValue(); - SDValue ExtractedFromVec = Op.getOperand(0); // All inputs must have the same element type as the output. @@ -14227,6 +14766,50 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) { if (VecIn.size() < 2) return SDValue(); + // If all the Operands of BUILD_VECTOR extract from same + // vector, then split the vector efficiently based on the maximum + // vector access index and adjust the VectorMask and + // VecIn accordingly. + if (VecIn.size() == 2) { + unsigned MaxIndex = 0; + unsigned NearestPow2 = 0; + SDValue Vec = VecIn.back(); + EVT InVT = Vec.getValueType(); + MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); + SmallVector<unsigned, 8> IndexVec(NumElems, 0); + + for (unsigned i = 0; i < NumElems; i++) { + if (VectorMask[i] <= 0) + continue; + unsigned Index = N->getOperand(i).getConstantOperandVal(1); + IndexVec[i] = Index; + MaxIndex = std::max(MaxIndex, Index); + } + + NearestPow2 = PowerOf2Ceil(MaxIndex); + if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 && + NumElems * 2 < NearestPow2) { + unsigned SplitSize = NearestPow2 / 2; + EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), + InVT.getVectorElementType(), SplitSize); + if (TLI.isTypeLegal(SplitVT)) { + SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec, + DAG.getConstant(SplitSize, DL, IdxTy)); + SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec, + DAG.getConstant(0, DL, IdxTy)); + VecIn.pop_back(); + VecIn.push_back(VecIn1); + VecIn.push_back(VecIn2); + + for (unsigned i = 0; i < NumElems; i++) { + if (VectorMask[i] <= 0) + continue; + VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2; + } + } + } + } + // TODO: We want to sort the vectors by descending length, so that adjacent // pairs have similar length, and the longer vector is always first in the // pair. @@ -14315,77 +14898,9 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) { DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask); } } - return Shuffles[0]; } -// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT -// operations which can be matched to a truncate. -SDValue DAGCombiner::reduceBuildVecToTrunc(SDNode *N) { - // TODO: Add support for big-endian. - if (DAG.getDataLayout().isBigEndian()) - return SDValue(); - if (N->getNumOperands() < 2) - return SDValue(); - SDLoc DL(N); - EVT VT = N->getValueType(0); - unsigned NumElems = N->getNumOperands(); - - if (!isTypeLegal(VT)) - return SDValue(); - - // If the input is something other than an EXTRACT_VECTOR_ELT with a constant - // index, bail out. - // TODO: Allow undef elements in some cases? - if (any_of(N->ops(), [VT](SDValue Op) { - return Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT || - !isa<ConstantSDNode>(Op.getOperand(1)) || - Op.getValueType() != VT.getVectorElementType(); - })) - return SDValue(); - - // Helper for obtaining an EXTRACT_VECTOR_ELT's constant index - auto GetExtractIdx = [](SDValue Extract) { - return cast<ConstantSDNode>(Extract.getOperand(1))->getSExtValue(); - }; - - // The first BUILD_VECTOR operand must be an an extract from index zero - // (assuming no undef and little-endian). - if (GetExtractIdx(N->getOperand(0)) != 0) - return SDValue(); - - // Compute the stride from the first index. - int Stride = GetExtractIdx(N->getOperand(1)); - SDValue ExtractedFromVec = N->getOperand(0).getOperand(0); - - // Proceed only if the stride and the types can be matched to a truncate. - if ((Stride == 1 || !isPowerOf2_32(Stride)) || - (ExtractedFromVec.getValueType().getVectorNumElements() != - Stride * NumElems) || - (VT.getScalarSizeInBits() * Stride > 64)) - return SDValue(); - - // Check remaining operands are consistent with the computed stride. - for (unsigned i = 1; i != NumElems; ++i) { - SDValue Op = N->getOperand(i); - - if ((Op.getOperand(0) != ExtractedFromVec) || - (GetExtractIdx(Op) != Stride * i)) - return SDValue(); - } - - // All checks were ok, construct the truncate. - LLVMContext &Ctx = *DAG.getContext(); - EVT NewVT = VT.getVectorVT( - Ctx, EVT::getIntegerVT(Ctx, VT.getScalarSizeInBits() * Stride), NumElems); - EVT TruncVT = - VT.isFloatingPoint() ? VT.changeVectorElementTypeToInteger() : VT; - - SDValue Res = DAG.getBitcast(NewVT, ExtractedFromVec); - Res = DAG.getNode(ISD::TRUNCATE, SDLoc(N), TruncVT, Res); - return DAG.getBitcast(VT, Res); -} - SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { EVT VT = N->getValueType(0); @@ -14428,10 +14943,6 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N)) return V; - if (TLI.isDesirableToCombineBuildVectorToTruncate()) - if (SDValue V = reduceBuildVecToTrunc(N)) - return V; - if (SDValue V = reduceBuildVecToShuffle(N)) return V; @@ -14514,8 +15025,7 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) { for (SDValue Op : N->ops()) { // Peek through any bitcast. - while (Op.getOpcode() == ISD::BITCAST) - Op = Op.getOperand(0); + Op = peekThroughBitcast(Op); // UNDEF nodes convert to UNDEF shuffle mask values. if (Op.isUndef()) { @@ -14534,8 +15044,7 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) { EVT ExtVT = ExtVec.getValueType(); // Peek through any bitcast. - while (ExtVec.getOpcode() == ISD::BITCAST) - ExtVec = ExtVec.getOperand(0); + ExtVec = peekThroughBitcast(ExtVec); // UNDEF nodes convert to UNDEF shuffle mask values. if (ExtVec.isUndef()) { @@ -14760,9 +15269,7 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) { // We are looking for an optionally bitcasted wide vector binary operator // feeding an extract subvector. - SDValue BinOp = Extract->getOperand(0); - if (BinOp.getOpcode() == ISD::BITCAST) - BinOp = BinOp.getOperand(0); + SDValue BinOp = peekThroughBitcast(Extract->getOperand(0)); // TODO: The motivating case for this transform is an x86 AVX1 target. That // target has temptingly almost legal versions of bitwise logic ops in 256-bit @@ -14786,13 +15293,8 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) { return SDValue(); // Peek through bitcasts of the binary operator operands if needed. - SDValue LHS = BinOp.getOperand(0); - if (LHS.getOpcode() == ISD::BITCAST) - LHS = LHS.getOperand(0); - - SDValue RHS = BinOp.getOperand(1); - if (RHS.getOpcode() == ISD::BITCAST) - RHS = RHS.getOperand(0); + SDValue LHS = peekThroughBitcast(BinOp.getOperand(0)); + SDValue RHS = peekThroughBitcast(BinOp.getOperand(1)); // We need at least one concatenation operation of a binop operand to make // this transform worthwhile. The concat must double the input vector sizes. @@ -14891,8 +15393,34 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { } // Skip bitcasting - if (V->getOpcode() == ISD::BITCAST) - V = V.getOperand(0); + V = peekThroughBitcast(V); + + // If the input is a build vector. Try to make a smaller build vector. + if (V->getOpcode() == ISD::BUILD_VECTOR) { + if (auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))) { + EVT InVT = V->getValueType(0); + unsigned ExtractSize = NVT.getSizeInBits(); + unsigned EltSize = InVT.getScalarSizeInBits(); + // Only do this if we won't split any elements. + if (ExtractSize % EltSize == 0) { + unsigned NumElems = ExtractSize / EltSize; + EVT ExtractVT = EVT::getVectorVT(*DAG.getContext(), + InVT.getVectorElementType(), NumElems); + if ((!LegalOperations || + TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT)) && + (!LegalTypes || TLI.isTypeLegal(ExtractVT))) { + unsigned IdxVal = (Idx->getZExtValue() * NVT.getScalarSizeInBits()) / + EltSize; + + // Extract the pieces from the original build_vector. + SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N), + makeArrayRef(V->op_begin() + IdxVal, + NumElems)); + return DAG.getBitcast(NVT, BuildVec); + } + } + } + } if (V->getOpcode() == ISD::INSERT_SUBVECTOR) { // Handle only simple case where vector being inserted and vector @@ -15013,6 +15541,37 @@ static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0, return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask()); } +static SDValue simplifyShuffleMask(ShuffleVectorSDNode *SVN, SDValue N0, + SDValue N1, SelectionDAG &DAG) { + auto isUndefElt = [](SDValue V, int Idx) { + // TODO - handle more cases as required. + if (V.getOpcode() == ISD::BUILD_VECTOR) + return V.getOperand(Idx).isUndef(); + if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) + return (Idx != 0) || V.getOperand(0).isUndef(); + return false; + }; + + EVT VT = SVN->getValueType(0); + unsigned NumElts = VT.getVectorNumElements(); + + bool Changed = false; + SmallVector<int, 8> NewMask; + for (unsigned i = 0; i != NumElts; ++i) { + int Idx = SVN->getMaskElt(i); + if ((0 <= Idx && Idx < (int)NumElts && isUndefElt(N0, Idx)) || + ((int)NumElts < Idx && isUndefElt(N1, Idx - NumElts))) { + Changed = true; + Idx = -1; + } + NewMask.push_back(Idx); + } + if (Changed) + return DAG.getVectorShuffle(VT, SDLoc(SVN), N0, N1, NewMask); + + return SDValue(); +} + // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat, // or turn a shuffle of a single concat into simpler shuffle then concat. static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { @@ -15091,7 +15650,7 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { // // To deal with this, we currently use a bunch of mostly arbitrary heuristics. // We don't fold shuffles where one side is a non-zero constant, and we don't -// fold shuffles if the resulting BUILD_VECTOR would have duplicate +// fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate // non-constant operands. This seems to work out reasonably well in practice. static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, @@ -15103,6 +15662,7 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, if (!N0->hasOneUse() || !N1->hasOneUse()) return SDValue(); + // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as // discussed above. if (!N1.isUndef()) { @@ -15114,6 +15674,15 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, return SDValue(); } + // If both inputs are splats of the same value then we can safely merge this + // to a single BUILD_VECTOR with undef elements based on the shuffle mask. + bool IsSplat = false; + auto *BV0 = dyn_cast<BuildVectorSDNode>(N0); + auto *BV1 = dyn_cast<BuildVectorSDNode>(N1); + if (BV0 && BV1) + if (SDValue Splat0 = BV0->getSplatValue()) + IsSplat = (Splat0 == BV1->getSplatValue()); + SmallVector<SDValue, 8> Ops; SmallSet<SDValue, 16> DuplicateOps; for (int M : SVN->getMask()) { @@ -15124,23 +15693,25 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, if (S.getOpcode() == ISD::BUILD_VECTOR) { Op = S.getOperand(Idx); } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) { - if (Idx == 0) - Op = S.getOperand(0); + assert(Idx == 0 && "Unexpected SCALAR_TO_VECTOR operand index."); + Op = S.getOperand(0); } else { // Operand can't be combined - bail out. return SDValue(); } } - // Don't duplicate a non-constant BUILD_VECTOR operand; semantically, this is - // fine, but it's likely to generate low-quality code if the target can't - // reconstruct an appropriate shuffle. + // Don't duplicate a non-constant BUILD_VECTOR operand unless we're + // generating a splat; semantically, this is fine, but it's likely to + // generate low-quality code if the target can't reconstruct an appropriate + // shuffle. if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op)) - if (!DuplicateOps.insert(Op).second) + if (!IsSplat && !DuplicateOps.insert(Op).second) return SDValue(); Ops.push_back(Op); } + // BUILD_VECTOR requires all inputs to be of the same type, find the // maximum type and extend them all. EVT SVT = VT.getScalarType(); @@ -15162,7 +15733,8 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN, SelectionDAG &DAG, const TargetLowering &TLI, - bool LegalOperations) { + bool LegalOperations, + bool LegalTypes) { EVT VT = SVN->getValueType(0); bool IsBigEndian = DAG.getDataLayout().isBigEndian(); @@ -15190,14 +15762,18 @@ static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN, // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for // power-of-2 extensions as they are the most likely. for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) { + // Check for non power of 2 vector sizes + if (NumElts % Scale != 0) + continue; if (!isAnyExtend(Scale)) continue; EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale); EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale); - if (!LegalOperations || - TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT)) - return DAG.getBitcast(VT, + if (!LegalTypes || TLI.isTypeLegal(OutVT)) + if (!LegalOperations || + TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT)) + return DAG.getBitcast(VT, DAG.getAnyExtendVectorInReg(N0, SDLoc(SVN), OutVT)); } @@ -15218,9 +15794,7 @@ static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, if (!VT.isInteger() || IsBigEndian) return SDValue(); - SDValue N0 = SVN->getOperand(0); - while (N0.getOpcode() == ISD::BITCAST) - N0 = N0.getOperand(0); + SDValue N0 = peekThroughBitcast(SVN->getOperand(0)); unsigned Opcode = N0.getOpcode(); if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG && @@ -15316,6 +15890,84 @@ static SDValue combineShuffleOfSplat(ArrayRef<int> UserMask, NewMask); } +/// If the shuffle mask is taking exactly one element from the first vector +/// operand and passing through all other elements from the second vector +/// operand, return the index of the mask element that is choosing an element +/// from the first operand. Otherwise, return -1. +static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) { + int MaskSize = Mask.size(); + int EltFromOp0 = -1; + // TODO: This does not match if there are undef elements in the shuffle mask. + // Should we ignore undefs in the shuffle mask instead? The trade-off is + // removing an instruction (a shuffle), but losing the knowledge that some + // vector lanes are not needed. + for (int i = 0; i != MaskSize; ++i) { + if (Mask[i] >= 0 && Mask[i] < MaskSize) { + // We're looking for a shuffle of exactly one element from operand 0. + if (EltFromOp0 != -1) + return -1; + EltFromOp0 = i; + } else if (Mask[i] != i + MaskSize) { + // Nothing from operand 1 can change lanes. + return -1; + } + } + return EltFromOp0; +} + +/// If a shuffle inserts exactly one element from a source vector operand into +/// another vector operand and we can access the specified element as a scalar, +/// then we can eliminate the shuffle. +static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf, + SelectionDAG &DAG) { + // First, check if we are taking one element of a vector and shuffling that + // element into another vector. + ArrayRef<int> Mask = Shuf->getMask(); + SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end()); + SDValue Op0 = Shuf->getOperand(0); + SDValue Op1 = Shuf->getOperand(1); + int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask); + if (ShufOp0Index == -1) { + // Commute mask and check again. + ShuffleVectorSDNode::commuteMask(CommutedMask); + ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask); + if (ShufOp0Index == -1) + return SDValue(); + // Commute operands to match the commuted shuffle mask. + std::swap(Op0, Op1); + Mask = CommutedMask; + } + + // The shuffle inserts exactly one element from operand 0 into operand 1. + // Now see if we can access that element as a scalar via a real insert element + // instruction. + // TODO: We can try harder to locate the element as a scalar. Examples: it + // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant. + assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() && + "Shuffle mask value must be from operand 0"); + if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT) + return SDValue(); + + auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2)); + if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index]) + return SDValue(); + + // There's an existing insertelement with constant insertion index, so we + // don't need to check the legality/profitability of a replacement operation + // that differs at most in the constant value. The target should be able to + // lower any of those in a similar way. If not, legalization will expand this + // to a scalar-to-vector plus shuffle. + // + // Note that the shuffle may move the scalar from the position that the insert + // element used. Therefore, our new insert element occurs at the shuffle's + // mask index value, not the insert's index value. + // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C' + SDValue NewInsIndex = DAG.getConstant(ShufOp0Index, SDLoc(Shuf), + Op0.getOperand(2).getValueType()); + return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(), + Op1, Op0.getOperand(1), NewInsIndex); +} + SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { EVT VT = N->getValueType(0); unsigned NumElts = VT.getVectorNumElements(); @@ -15362,6 +16014,13 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask); } + // Simplify shuffle mask if a referenced element is UNDEF. + if (SDValue V = simplifyShuffleMask(SVN, N0, N1, DAG)) + return V; + + if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG)) + return InsElt; + // A shuffle of a single vector that is a splat can always be folded. if (auto *N0Shuf = dyn_cast<ShuffleVectorSDNode>(N0)) if (N1->isUndef() && N0Shuf->isSplat()) @@ -15426,7 +16085,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return S; // Match shuffles that can be converted to any_vector_extend_in_reg. - if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations)) + if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations, LegalTypes)) return V; // Combine "truncate_vector_in_reg" style shuffles. @@ -15486,7 +16145,6 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { if (TLI.isTypeLegal(ScaleVT) && 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) && 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) { - int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits(); int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits(); @@ -15661,23 +16319,46 @@ SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) { EVT VT = N->getValueType(0); // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern - // with a VECTOR_SHUFFLE. + // with a VECTOR_SHUFFLE and possible truncate. if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { SDValue InVec = InVal->getOperand(0); SDValue EltNo = InVal->getOperand(1); - - // FIXME: We could support implicit truncation if the shuffle can be - // scaled to a smaller vector scalar type. - ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo); - if (C0 && VT == InVec.getValueType() && - VT.getScalarType() == InVal.getValueType()) { - SmallVector<int, 8> NewMask(VT.getVectorNumElements(), -1); + auto InVecT = InVec.getValueType(); + if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) { + SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1); int Elt = C0->getZExtValue(); NewMask[0] = Elt; - - if (TLI.isShuffleMaskLegal(NewMask, VT)) - return DAG.getVectorShuffle(VT, SDLoc(N), InVec, DAG.getUNDEF(VT), - NewMask); + SDValue Val; + // If we have an implict truncate do truncate here as long as it's legal. + // if it's not legal, this should + if (VT.getScalarType() != InVal.getValueType() && + InVal.getValueType().isScalarInteger() && + isTypeLegal(VT.getScalarType())) { + Val = + DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal); + return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val); + } + if (VT.getScalarType() == InVecT.getScalarType() && + VT.getVectorNumElements() <= InVecT.getVectorNumElements() && + TLI.isShuffleMaskLegal(NewMask, VT)) { + Val = DAG.getVectorShuffle(InVecT, SDLoc(N), InVec, + DAG.getUNDEF(InVecT), NewMask); + // If the initial vector is the correct size this shuffle is a + // valid result. + if (VT == InVecT) + return Val; + // If not we must truncate the vector. + if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) { + MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); + SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy); + EVT SubVT = + EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(), + VT.getVectorNumElements()); + Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, Val, + ZeroIdx); + return Val; + } + } } } @@ -15694,12 +16375,47 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { if (N1.isUndef()) return N0; + // For nested INSERT_SUBVECTORs, attempt to combine inner node first to allow + // us to pull BITCASTs from input to output. + if (N0.hasOneUse() && N0->getOpcode() == ISD::INSERT_SUBVECTOR) + if (SDValue NN0 = visitINSERT_SUBVECTOR(N0.getNode())) + return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, NN0, N1, N2); + // If this is an insert of an extracted vector into an undef vector, we can // just use the input to the extract. if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR && N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT) return N1.getOperand(0); + // If we are inserting a bitcast value into an undef, with the same + // number of elements, just use the bitcast input of the extract. + // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 -> + // BITCAST (INSERT_SUBVECTOR UNDEF N1 N2) + if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST && + N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR && + N1.getOperand(0).getOperand(1) == N2 && + N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() == + VT.getVectorNumElements()) { + return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0)); + } + + // If both N1 and N2 are bitcast values on which insert_subvector + // would makes sense, pull the bitcast through. + // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 -> + // BITCAST (INSERT_SUBVECTOR N0 N1 N2) + if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) { + SDValue CN0 = N0.getOperand(0); + SDValue CN1 = N1.getOperand(0); + if (CN0.getValueType().getVectorElementType() == + CN1.getValueType().getVectorElementType() && + CN0.getValueType().getVectorNumElements() == + VT.getVectorNumElements()) { + SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), + CN0.getValueType(), CN0, CN1, N2); + return DAG.getBitcast(VT, NewINSERT); + } + } + // Combine INSERT_SUBVECTORs where we are inserting to the same index. // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx ) // --> INSERT_SUBVECTOR( Vec, SubNew, Idx ) @@ -15779,7 +16495,7 @@ SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) { SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { EVT VT = N->getValueType(0); SDValue LHS = N->getOperand(0); - SDValue RHS = N->getOperand(1); + SDValue RHS = peekThroughBitcast(N->getOperand(1)); SDLoc DL(N); // Make sure we're not running after operation legalization where it @@ -15790,9 +16506,6 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { if (N->getOpcode() != ISD::AND) return SDValue(); - if (RHS.getOpcode() == ISD::BITCAST) - RHS = RHS.getOperand(0); - if (RHS.getOpcode() != ISD::BUILD_VECTOR) return SDValue(); @@ -15945,7 +16658,6 @@ SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, /// the DAG combiner loop to avoid it being looked at. bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, SDValue RHS) { - // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x)) // The select + setcc is redundant, because fsqrt returns NaN for X < 0. if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) { @@ -16418,7 +17130,7 @@ SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, SDValue DAGCombiner::BuildSDIV(SDNode *N) { // when optimising for minimum size, we don't want to expand a div to a mul // and a shift. - if (DAG.getMachineFunction().getFunction()->optForMinSize()) + if (DAG.getMachineFunction().getFunction().optForMinSize()) return SDValue(); ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); @@ -16429,7 +17141,7 @@ SDValue DAGCombiner::BuildSDIV(SDNode *N) { if (C->isNullValue()) return SDValue(); - std::vector<SDNode*> Built; + std::vector<SDNode *> Built; SDValue S = TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built); @@ -16464,7 +17176,7 @@ SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) { SDValue DAGCombiner::BuildUDIV(SDNode *N) { // when optimising for minimum size, we don't want to expand a div to a mul // and a shift. - if (DAG.getMachineFunction().getFunction()->optForMinSize()) + if (DAG.getMachineFunction().getFunction().optForMinSize()) return SDValue(); ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); @@ -16475,7 +17187,7 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) { if (C->isNullValue()) return SDValue(); - std::vector<SDNode*> Built; + std::vector<SDNode *> Built; SDValue S = TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built); @@ -16760,8 +17472,8 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { if (Op1->isInvariant() && Op0->writeMem()) return false; - unsigned NumBytes0 = Op0->getMemoryVT().getSizeInBits() >> 3; - unsigned NumBytes1 = Op1->getMemoryVT().getSizeInBits() >> 3; + unsigned NumBytes0 = Op0->getMemoryVT().getStoreSize(); + unsigned NumBytes1 = Op1->getMemoryVT().getStoreSize(); // Check for BaseIndexOffset matching. BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0->getBasePtr(), DAG); @@ -16957,7 +17669,11 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain /// (aliasing node.) SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { - SmallVector<SDValue, 8> Aliases; // Ops for replacing token factor. + if (OptLevel == CodeGenOpt::None) + return OldChain; + + // Ops for replacing token factor. + SmallVector<SDValue, 8> Aliases; // Accumulate all the aliases to this node. GatherAllAliases(N, OldChain, Aliases); @@ -16987,6 +17703,9 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { // to go from a partially-merged state to the desired final // fully-merged state. bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { + if (OptLevel == CodeGenOpt::None) + return false; + // This holds the base pointer, index, and the offset in bytes from the base // pointer. BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG); diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index b2599b2e17f10..d3c94b5f9e6b4 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -63,6 +63,9 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/StackMaps.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" @@ -98,11 +101,8 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -168,8 +168,7 @@ bool FastISel::hasTrivialKill(const Value *V) { // No-op casts are trivially coalesced by fast-isel. if (const auto *Cast = dyn_cast<CastInst>(I)) - if (Cast->isNoopCast(DL.getIntPtrType(Cast->getContext())) && - !hasTrivialKill(Cast->getOperand(0))) + if (Cast->isNoopCast(DL) && !hasTrivialKill(Cast->getOperand(0))) return false; // Even the value might have only one use in the LLVM IR, it is possible that @@ -1133,6 +1132,8 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { case Intrinsic::lifetime_end: // The donothing intrinsic does, well, nothing. case Intrinsic::donothing: + // Neither does the sideeffect intrinsic. + case Intrinsic::sideeffect: // Neither does the assume intrinsic; it's also OK not to codegen its operand. case Intrinsic::assume: return true; @@ -1187,7 +1188,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { // into an indirect DBG_VALUE. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true, - Op->getReg(), 0, DI->getVariable(), DI->getExpression()); + Op->getReg(), DI->getVariable(), DI->getExpression()); } else BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::DBG_VALUE)) @@ -1212,35 +1213,32 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { if (!V) { // Currently the optimizer can produce this; insert an undef to // help debugging. Probably the optimizer should not do this. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addReg(0U) - .addImm(DI->getOffset()) - .addMetadata(DI->getVariable()) - .addMetadata(DI->getExpression()); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, false, 0U, + DI->getVariable(), DI->getExpression()); } else if (const auto *CI = dyn_cast<ConstantInt>(V)) { if (CI->getBitWidth() > 64) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addCImm(CI) - .addImm(DI->getOffset()) + .addImm(0U) .addMetadata(DI->getVariable()) .addMetadata(DI->getExpression()); else BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addImm(CI->getZExtValue()) - .addImm(DI->getOffset()) + .addImm(0U) .addMetadata(DI->getVariable()) .addMetadata(DI->getExpression()); } else if (const auto *CF = dyn_cast<ConstantFP>(V)) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addFPImm(CF) - .addImm(DI->getOffset()) + .addImm(0U) .addMetadata(DI->getVariable()) .addMetadata(DI->getExpression()); } else if (unsigned Reg = lookUpRegForValue(V)) { // FIXME: This does not handle register-indirect values at offset 0. - bool IsIndirect = DI->getOffset() != 0; + bool IsIndirect = false; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, IsIndirect, Reg, - DI->getOffset(), DI->getVariable(), DI->getExpression()); + DI->getVariable(), DI->getExpression()); } else { // We can't yet handle anything else here because it would require // generating code, thus altering codegen because of debug info. diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index b736037d71ddc..c7cdb49203b15 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -17,11 +17,14 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" @@ -32,12 +35,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> using namespace llvm; diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index b96c96f0b4df4..cc9b41b4b487f 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -21,14 +21,14 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/StackMaps.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "instr-emitter" @@ -673,7 +673,6 @@ void InstrEmitter::EmitRegSequence(SDNode *Node, MachineInstr * InstrEmitter::EmitDbgValue(SDDbgValue *SD, DenseMap<SDValue, unsigned> &VRBaseMap) { - uint64_t Offset = SD->getOffset(); MDNode *Var = SD->getVariable(); MDNode *Expr = SD->getExpression(); DebugLoc DL = SD->getDebugLoc(); @@ -685,7 +684,7 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, // EmitTargetCodeForFrameDebugValue is responsible for allocation. return BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE)) .addFrameIndex(SD->getFrameIx()) - .addImm(Offset) + .addImm(0) .addMetadata(Var) .addMetadata(Expr); } @@ -727,11 +726,9 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, // Indirect addressing is indicated by an Imm as the second parameter. if (SD->isIndirect()) - MIB.addImm(Offset); - else { - assert(Offset == 0 && "direct value cannot have an offset"); + MIB.addImm(0U); + else MIB.addReg(0U, RegState::Debug); - } MIB.addMetadata(Var); MIB.addMetadata(Expr); @@ -938,10 +935,14 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, EmitCopyFromReg(Node, 0, IsClone, IsCloned, SrcReg, VRBaseMap); break; } - case ISD::EH_LABEL: { - MCSymbol *S = cast<EHLabelSDNode>(Node)->getLabel(); + case ISD::EH_LABEL: + case ISD::ANNOTATION_LABEL: { + unsigned Opc = (Node->getOpcode() == ISD::EH_LABEL) + ? TargetOpcode::EH_LABEL + : TargetOpcode::ANNOTATION_LABEL; + MCSymbol *S = cast<LabelSDNode>(Node)->getLabel(); BuildMI(*MBB, InsertPos, Node->getDebugLoc(), - TII->get(TargetOpcode::EH_LABEL)).addSym(S); + TII->get(Opc)).addSym(S); break; } diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 7e4bc3ccb5d39..bb1dc17b7a1b0 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1,4 +1,4 @@ -//===-- LegalizeDAG.cpp - Implement SelectionDAG::Legalize ----------------===// +//===- LegalizeDAG.cpp - Implement SelectionDAG::Legalize -----------------===// // // The LLVM Compiler Infrastructure // @@ -11,37 +11,65 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Triple.h" +#include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineValueType.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" -#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Target/TargetOptions.h" +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <tuple> +#include <utility> + using namespace llvm; #define DEBUG_TYPE "legalizedag" namespace { -struct FloatSignAsInt; +/// Keeps track of state when getting the sign of a floating-point value as an +/// integer. +struct FloatSignAsInt { + EVT FloatVT; + SDValue Chain; + SDValue FloatPtr; + SDValue IntPtr; + MachinePointerInfo IntPointerInfo; + MachinePointerInfo FloatPointerInfo; + SDValue IntValue; + APInt SignMask; + uint8_t SignBit; +}; //===----------------------------------------------------------------------===// /// This takes an arbitrary SelectionDAG as input and @@ -54,7 +82,6 @@ struct FloatSignAsInt; /// as part of its processing. For example, if a target does not support a /// 'setcc' instruction efficiently, but does support 'brcc' instruction, this /// will attempt merge setcc and brc instructions into brcc's. -/// class SelectionDAGLegalize { const TargetMachine &TM; const TargetLowering &TLI; @@ -165,11 +192,13 @@ private: public: // Node replacement helpers + void ReplacedNode(SDNode *N) { LegalizedNodes.erase(N); if (UpdatedNodes) UpdatedNodes->insert(N); } + void ReplaceNode(SDNode *Old, SDNode *New) { DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG); dbgs() << " with: "; New->dump(&DAG)); @@ -182,6 +211,7 @@ public: UpdatedNodes->insert(New); ReplacedNode(Old); } + void ReplaceNode(SDValue Old, SDValue New) { DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG); dbgs() << " with: "; New->dump(&DAG)); @@ -191,6 +221,7 @@ public: UpdatedNodes->insert(New.getNode()); ReplacedNode(Old.getNode()); } + void ReplaceNode(SDNode *Old, const SDValue *New) { DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG)); @@ -205,7 +236,8 @@ public: ReplacedNode(Old); } }; -} + +} // end anonymous namespace /// Return a vector shuffle operation which /// performs the same shuffe in terms of order or result bytes, but on a type @@ -376,6 +408,7 @@ SDValue SelectionDAGLegalize::ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, } SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { + DEBUG(dbgs() << "Optimizing float store operations\n"); // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' // FIXME: We shouldn't do this for TargetConstantFP's. // FIXME: move this to the DAG Combiner! Note that we can't regress due @@ -434,172 +467,184 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { } void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { - StoreSDNode *ST = cast<StoreSDNode>(Node); - SDValue Chain = ST->getChain(); - SDValue Ptr = ST->getBasePtr(); - SDLoc dl(Node); - - unsigned Alignment = ST->getAlignment(); - MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); - AAMDNodes AAInfo = ST->getAAInfo(); - - if (!ST->isTruncatingStore()) { - if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) { - ReplaceNode(ST, OptStore); - return; - } + StoreSDNode *ST = cast<StoreSDNode>(Node); + SDValue Chain = ST->getChain(); + SDValue Ptr = ST->getBasePtr(); + SDLoc dl(Node); - { - SDValue Value = ST->getValue(); - MVT VT = Value.getSimpleValueType(); - switch (TLI.getOperationAction(ISD::STORE, VT)) { - default: llvm_unreachable("This action is not supported yet!"); - case TargetLowering::Legal: { - // If this is an unaligned store and the target doesn't support it, - // expand it. - EVT MemVT = ST->getMemoryVT(); - unsigned AS = ST->getAddressSpace(); - unsigned Align = ST->getAlignment(); - const DataLayout &DL = DAG.getDataLayout(); - if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) { - SDValue Result = TLI.expandUnalignedStore(ST, DAG); - ReplaceNode(SDValue(ST, 0), Result); - } - break; - } - case TargetLowering::Custom: { - SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); - if (Res && Res != SDValue(Node, 0)) - ReplaceNode(SDValue(Node, 0), Res); - return; - } - case TargetLowering::Promote: { - MVT NVT = TLI.getTypeToPromoteTo(ISD::STORE, VT); - assert(NVT.getSizeInBits() == VT.getSizeInBits() && - "Can only promote stores to same size type"); - Value = DAG.getNode(ISD::BITCAST, dl, NVT, Value); - SDValue Result = - DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), - Alignment, MMOFlags, AAInfo); - ReplaceNode(SDValue(Node, 0), Result); - break; - } - } - return; - } + unsigned Alignment = ST->getAlignment(); + MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); + AAMDNodes AAInfo = ST->getAAInfo(); + + if (!ST->isTruncatingStore()) { + DEBUG(dbgs() << "Legalizing store operation\n"); + if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) { + ReplaceNode(ST, OptStore); + return; + } + + SDValue Value = ST->getValue(); + MVT VT = Value.getSimpleValueType(); + switch (TLI.getOperationAction(ISD::STORE, VT)) { + default: llvm_unreachable("This action is not supported yet!"); + case TargetLowering::Legal: { + // If this is an unaligned store and the target doesn't support it, + // expand it. + EVT MemVT = ST->getMemoryVT(); + unsigned AS = ST->getAddressSpace(); + unsigned Align = ST->getAlignment(); + const DataLayout &DL = DAG.getDataLayout(); + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) { + DEBUG(dbgs() << "Expanding unsupported unaligned store\n"); + SDValue Result = TLI.expandUnalignedStore(ST, DAG); + ReplaceNode(SDValue(ST, 0), Result); + } else + DEBUG(dbgs() << "Legal store\n"); + break; + } + case TargetLowering::Custom: { + DEBUG(dbgs() << "Trying custom lowering\n"); + SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); + if (Res && Res != SDValue(Node, 0)) + ReplaceNode(SDValue(Node, 0), Res); + return; + } + case TargetLowering::Promote: { + MVT NVT = TLI.getTypeToPromoteTo(ISD::STORE, VT); + assert(NVT.getSizeInBits() == VT.getSizeInBits() && + "Can only promote stores to same size type"); + Value = DAG.getNode(ISD::BITCAST, dl, NVT, Value); + SDValue Result = + DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), + Alignment, MMOFlags, AAInfo); + ReplaceNode(SDValue(Node, 0), Result); + break; + } + } + return; + } + + DEBUG(dbgs() << "Legalizing truncating store operations\n"); + SDValue Value = ST->getValue(); + EVT StVT = ST->getMemoryVT(); + unsigned StWidth = StVT.getSizeInBits(); + auto &DL = DAG.getDataLayout(); + + if (StWidth != StVT.getStoreSizeInBits()) { + // Promote to a byte-sized store with upper bits zero if not + // storing an integral number of bytes. For example, promote + // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1) + EVT NVT = EVT::getIntegerVT(*DAG.getContext(), + StVT.getStoreSizeInBits()); + Value = DAG.getZeroExtendInReg(Value, dl, StVT); + SDValue Result = + DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), NVT, + Alignment, MMOFlags, AAInfo); + ReplaceNode(SDValue(Node, 0), Result); + } else if (StWidth & (StWidth - 1)) { + // If not storing a power-of-2 number of bits, expand as two stores. + assert(!StVT.isVector() && "Unsupported truncstore!"); + unsigned RoundWidth = 1 << Log2_32(StWidth); + assert(RoundWidth < StWidth); + unsigned ExtraWidth = StWidth - RoundWidth; + assert(ExtraWidth < RoundWidth); + assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && + "Store size not an integral number of bytes!"); + EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth); + EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth); + SDValue Lo, Hi; + unsigned IncrementSize; + + if (DL.isLittleEndian()) { + // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16) + // Store the bottom RoundWidth bits. + Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), + RoundVT, Alignment, MMOFlags, AAInfo); + + // Store the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, dl, + Ptr.getValueType())); + Hi = DAG.getNode( + ISD::SRL, dl, Value.getValueType(), Value, + DAG.getConstant(RoundWidth, dl, + TLI.getShiftAmountTy(Value.getValueType(), DL))); + Hi = DAG.getTruncStore( + Chain, dl, Hi, Ptr, + ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, + MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); } else { - SDValue Value = ST->getValue(); - - EVT StVT = ST->getMemoryVT(); - unsigned StWidth = StVT.getSizeInBits(); - auto &DL = DAG.getDataLayout(); - - if (StWidth != StVT.getStoreSizeInBits()) { - // Promote to a byte-sized store with upper bits zero if not - // storing an integral number of bytes. For example, promote - // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1) - EVT NVT = EVT::getIntegerVT(*DAG.getContext(), - StVT.getStoreSizeInBits()); - Value = DAG.getZeroExtendInReg(Value, dl, StVT); - SDValue Result = - DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), NVT, - Alignment, MMOFlags, AAInfo); - ReplaceNode(SDValue(Node, 0), Result); - } else if (StWidth & (StWidth - 1)) { - // If not storing a power-of-2 number of bits, expand as two stores. - assert(!StVT.isVector() && "Unsupported truncstore!"); - unsigned RoundWidth = 1 << Log2_32(StWidth); - assert(RoundWidth < StWidth); - unsigned ExtraWidth = StWidth - RoundWidth; - assert(ExtraWidth < RoundWidth); - assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && - "Store size not an integral number of bytes!"); - EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth); - EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth); - SDValue Lo, Hi; - unsigned IncrementSize; - - if (DL.isLittleEndian()) { - // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16) - // Store the bottom RoundWidth bits. - Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), - RoundVT, Alignment, MMOFlags, AAInfo); - - // Store the remaining ExtraWidth bits. - IncrementSize = RoundWidth / 8; - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, dl, - Ptr.getValueType())); - Hi = DAG.getNode( - ISD::SRL, dl, Value.getValueType(), Value, - DAG.getConstant(RoundWidth, dl, - TLI.getShiftAmountTy(Value.getValueType(), DL))); - Hi = DAG.getTruncStore( - Chain, dl, Hi, Ptr, - ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, - MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); - } else { - // Big endian - avoid unaligned stores. - // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X - // Store the top RoundWidth bits. - Hi = DAG.getNode( - ISD::SRL, dl, Value.getValueType(), Value, - DAG.getConstant(ExtraWidth, dl, - TLI.getShiftAmountTy(Value.getValueType(), DL))); - Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(), - RoundVT, Alignment, MMOFlags, AAInfo); - - // Store the remaining ExtraWidth bits. - IncrementSize = RoundWidth / 8; - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, dl, - Ptr.getValueType())); - Lo = DAG.getTruncStore( - Chain, dl, Value, Ptr, - ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, - MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); - } + // Big endian - avoid unaligned stores. + // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X + // Store the top RoundWidth bits. + Hi = DAG.getNode( + ISD::SRL, dl, Value.getValueType(), Value, + DAG.getConstant(ExtraWidth, dl, + TLI.getShiftAmountTy(Value.getValueType(), DL))); + Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(), + RoundVT, Alignment, MMOFlags, AAInfo); - // The order of the stores doesn't matter. - SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); - ReplaceNode(SDValue(Node, 0), Result); + // Store the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, dl, + Ptr.getValueType())); + Lo = DAG.getTruncStore( + Chain, dl, Value, Ptr, + ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, + MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); + } + + // The order of the stores doesn't matter. + SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); + ReplaceNode(SDValue(Node, 0), Result); + } else { + switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) { + default: llvm_unreachable("This action is not supported yet!"); + case TargetLowering::Legal: { + EVT MemVT = ST->getMemoryVT(); + unsigned AS = ST->getAddressSpace(); + unsigned Align = ST->getAlignment(); + // If this is an unaligned store and the target doesn't support it, + // expand it. + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) { + SDValue Result = TLI.expandUnalignedStore(ST, DAG); + ReplaceNode(SDValue(ST, 0), Result); + } + break; + } + case TargetLowering::Custom: { + SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); + if (Res && Res != SDValue(Node, 0)) + ReplaceNode(SDValue(Node, 0), Res); + return; + } + case TargetLowering::Expand: + assert(!StVT.isVector() && + "Vector Stores are handled in LegalizeVectorOps"); + + SDValue Result; + + // TRUNCSTORE:i16 i32 -> STORE i16 + if (TLI.isTypeLegal(StVT)) { + Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value); + Result = DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), + Alignment, MMOFlags, AAInfo); } else { - switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) { - default: llvm_unreachable("This action is not supported yet!"); - case TargetLowering::Legal: { - EVT MemVT = ST->getMemoryVT(); - unsigned AS = ST->getAddressSpace(); - unsigned Align = ST->getAlignment(); - // If this is an unaligned store and the target doesn't support it, - // expand it. - if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) { - SDValue Result = TLI.expandUnalignedStore(ST, DAG); - ReplaceNode(SDValue(ST, 0), Result); - } - break; - } - case TargetLowering::Custom: { - SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); - if (Res && Res != SDValue(Node, 0)) - ReplaceNode(SDValue(Node, 0), Res); - return; - } - case TargetLowering::Expand: - assert(!StVT.isVector() && - "Vector Stores are handled in LegalizeVectorOps"); - - // TRUNCSTORE:i16 i32 -> STORE i16 - assert(TLI.isTypeLegal(StVT) && - "Do not know how to expand this store!"); - Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value); - SDValue Result = - DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), - Alignment, MMOFlags, AAInfo); - ReplaceNode(SDValue(Node, 0), Result); - break; - } + // The in-memory type isn't legal. Truncate to the type it would promote + // to, and then do a truncstore. + Value = DAG.getNode(ISD::TRUNCATE, dl, + TLI.getTypeToTransformTo(*DAG.getContext(), StVT), + Value); + Result = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), + StVT, Alignment, MMOFlags, AAInfo); } + + ReplaceNode(SDValue(Node, 0), Result); + break; } + } } void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { @@ -611,6 +656,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { ISD::LoadExtType ExtType = LD->getExtensionType(); if (ExtType == ISD::NON_EXTLOAD) { + DEBUG(dbgs() << "Legalizing non-extending load operation\n"); MVT VT = Node->getSimpleValueType(0); SDValue RVal = SDValue(Node, 0); SDValue RChain = SDValue(Node, 1); @@ -629,13 +675,13 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { } break; } - case TargetLowering::Custom: { + case TargetLowering::Custom: if (SDValue Res = TLI.LowerOperation(RVal, DAG)) { RVal = Res; RChain = Res.getValue(1); } break; - } + case TargetLowering::Promote: { MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT); assert(NVT.getSizeInBits() == VT.getSizeInBits() && @@ -660,6 +706,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { return; } + DEBUG(dbgs() << "Legalizing extending load operation\n"); EVT SrcVT = LD->getMemoryVT(); unsigned SrcWidth = SrcVT.getSizeInBits(); unsigned Alignment = LD->getAlignment(); @@ -795,7 +842,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { case TargetLowering::Custom: isCustom = true; LLVM_FALLTHROUGH; - case TargetLowering::Legal: { + case TargetLowering::Legal: Value = SDValue(Node, 0); Chain = SDValue(Node, 1); @@ -816,8 +863,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { } } break; - } - case TargetLowering::Expand: + + case TargetLowering::Expand: { EVT DestVT = Node->getValueType(0); if (!TLI.isLoadExtLegal(ISD::EXTLOAD, DestVT, SrcVT)) { // If the source type is not legal, see if there is a legal extload to @@ -883,6 +930,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { Chain = Result.getValue(1); break; } + } } // Since loads produce two values, make sure to remember that we legalized @@ -907,6 +955,7 @@ getStrictFPOpcodeAction(const TargetLowering &TLI, unsigned Opcode, EVT VT) { case ISD::STRICT_FSQRT: EqOpc = ISD::FSQRT; break; case ISD::STRICT_FPOW: EqOpc = ISD::FPOW; break; case ISD::STRICT_FPOWI: EqOpc = ISD::FPOWI; break; + case ISD::STRICT_FMA: EqOpc = ISD::FMA; break; case ISD::STRICT_FSIN: EqOpc = ISD::FSIN; break; case ISD::STRICT_FCOS: EqOpc = ISD::FCOS; break; case ISD::STRICT_FEXP: EqOpc = ISD::FEXP; break; @@ -932,7 +981,9 @@ getStrictFPOpcodeAction(const TargetLowering &TLI, unsigned Opcode, EVT VT) { void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { DEBUG(dbgs() << "\nLegalizing: "; Node->dump(&DAG)); - if (Node->getOpcode() == ISD::TargetConstant) // Allow illegal target nodes. + // Allow illegal target nodes and illegal registers. + if (Node->getOpcode() == ISD::TargetConstant || + Node->getOpcode() == ISD::Register) return; #ifndef NDEBUG @@ -946,7 +997,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { assert((TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) == TargetLowering::TypeLegal || TLI.isTypeLegal(Op.getValueType()) || - Op.getOpcode() == ISD::TargetConstant) && + Op.getOpcode() == ISD::TargetConstant || + Op.getOpcode() == ISD::Register) && "Unexpected illegal type!"); #endif @@ -983,11 +1035,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Action = TLI.getOperationAction(Node->getOpcode(), InnerType); break; } - case ISD::ATOMIC_STORE: { + case ISD::ATOMIC_STORE: Action = TLI.getOperationAction(Node->getOpcode(), Node->getOperand(2).getValueType()); break; - } case ISD::SELECT_CC: case ISD::SETCC: case ISD::BR_CC: { @@ -1072,6 +1123,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { } break; case ISD::STRICT_FSQRT: + case ISD::STRICT_FMA: case ISD::STRICT_FPOW: case ISD::STRICT_FPOWI: case ISD::STRICT_FSIN: @@ -1090,7 +1142,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Action = getStrictFPOpcodeAction(TLI, Node->getOpcode(), Node->getValueType(0)); break; - default: if (Node->getOpcode() >= ISD::BUILTIN_OP_END) { Action = TargetLowering::Legal; @@ -1141,8 +1192,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { if (SAO != Op2) NewNode = DAG.UpdateNodeOperands(Node, Op0, Op1, SAO); } + break; } - break; } if (NewNode != Node) { @@ -1151,8 +1202,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { } switch (Action) { case TargetLowering::Legal: + DEBUG(dbgs() << "Legal node: nothing to do\n"); return; - case TargetLowering::Custom: { + case TargetLowering::Custom: + DEBUG(dbgs() << "Trying custom legalization\n"); // FIXME: The handling for custom lowering with multiple results is // a complete mess. if (SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG)) { @@ -1160,6 +1213,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { return; if (Node->getNumValues() == 1) { + DEBUG(dbgs() << "Successfully custom legalized node\n"); // We can just directly replace this node with the lowered value. ReplaceNode(SDValue(Node, 0), Res); return; @@ -1168,11 +1222,12 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { SmallVector<SDValue, 8> ResultVals; for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) ResultVals.push_back(Res.getValue(i)); + DEBUG(dbgs() << "Successfully custom legalized node\n"); ReplaceNode(Node, ResultVals.data()); return; } + DEBUG(dbgs() << "Could not custom legalize node\n"); LLVM_FALLTHROUGH; - } case TargetLowering::Expand: if (ExpandNode(Node)) return; @@ -1198,13 +1253,11 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::CALLSEQ_START: case ISD::CALLSEQ_END: break; - case ISD::LOAD: { + case ISD::LOAD: return LegalizeLoadOps(Node); - } - case ISD::STORE: { + case ISD::STORE: return LegalizeStoreOps(Node); } - } } SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { @@ -1240,7 +1293,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { // If the index is dependent on the store we will introduce a cycle when // creating the load (the load uses the index, and by replacing the chain // we will make the index dependent on the load). Also, the store might be - // dependent on the extractelement and introduce a cycle when creating + // dependent on the extractelement and introduce a cycle when creating // the load. if (SDNode::hasPredecessorHelper(ST, Visited, Worklist) || ST->hasPredecessor(Op.getNode())) @@ -1361,22 +1414,6 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo); } -namespace { -/// Keeps track of state when getting the sign of a floating-point value as an -/// integer. -struct FloatSignAsInt { - EVT FloatVT; - SDValue Chain; - SDValue FloatPtr; - SDValue IntPtr; - MachinePointerInfo IntPointerInfo; - MachinePointerInfo FloatPointerInfo; - SDValue IntValue; - APInt SignMask; - uint8_t SignBit; -}; -} - /// Bitcast a floating-point value to an integer value. Only bitcast the part /// containing the sign bit if the target has no integer value capable of /// holding all bits of the floating-point value. @@ -1753,8 +1790,8 @@ ExpandBVWithShuffles(SDNode *Node, SelectionDAG &DAG, // We do this in two phases; first to check the legality of the shuffles, // and next, assuming that all shuffles are legal, to create the new nodes. for (int Phase = 0; Phase < 2; ++Phase) { - SmallVector<std::pair<SDValue, SmallVector<int, 16> >, 16> IntermedVals, - NewIntermedVals; + SmallVector<std::pair<SDValue, SmallVector<int, 16>>, 16> IntermedVals, + NewIntermedVals; for (unsigned i = 0; i < NumElems; ++i) { SDValue V = Node->getOperand(i); if (V.isUndef()) @@ -1977,10 +2014,10 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, // isTailCall may be true since the callee does not reference caller stack // frame. Check if it's in the right position and that the return types match. SDValue TCChain = InChain; - const Function *F = DAG.getMachineFunction().getFunction(); + const Function &F = DAG.getMachineFunction().getFunction(); bool isTailCall = TLI.isInTailCallPosition(DAG, Node, TCChain) && - (RetTy == F->getReturnType() || F->getReturnType()->isVoidTy()); + (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy()); if (isTailCall) InChain = TCChain; @@ -1996,10 +2033,13 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); - if (!CallInfo.second.getNode()) + if (!CallInfo.second.getNode()) { + DEBUG(dbgs() << "Created tailcall: "; DAG.getRoot().dump()); // It's a tailcall, return the chain (which is the DAG root). return DAG.getRoot(); + } + DEBUG(dbgs() << "Created libcall: "; CallInfo.first.dump()); return CallInfo.first; } @@ -2285,9 +2325,10 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, EVT DestVT, const SDLoc &dl) { // TODO: Should any fast-math-flags be set for the created nodes? - + DEBUG(dbgs() << "Legalizing INT_TO_FP\n"); if (Op0.getValueType() == MVT::i32 && TLI.isTypeLegal(MVT::f64)) { - // simple 32-bit [signed|unsigned] integer to float/double expansion + DEBUG(dbgs() << "32-bit [signed|unsigned] integer to float/double " + "expansion\n"); // Get the stack frame index of a 8 byte buffer. SDValue StackSlot = DAG.CreateStackTemporary(MVT::f64); @@ -2352,6 +2393,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, // and in all alternate rounding modes. // TODO: Generalize this for use with other types. if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f64) { + DEBUG(dbgs() << "Converting unsigned i64 to f64\n"); SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, MVT::i64); SDValue TwoP84PlusTwoP52 = @@ -2372,9 +2414,9 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub); } - // Implementation of unsigned i64 to f32. // TODO: Generalize this for use with other types. if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) { + DEBUG(dbgs() << "Converting unsigned i64 to f32\n"); // For unsigned conversions, convert them to signed conversions using the // algorithm from the x86_64 __floatundidf in compiler_rt. if (!isSigned) { @@ -2498,7 +2540,7 @@ SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, unsigned OpToUse = 0; // Scan for the appropriate larger type to use. - while (1) { + while (true) { NewInTy = (MVT::SimpleValueType)(NewInTy.getSimpleVT().SimpleTy+1); assert(NewInTy.isInteger() && "Ran out of possibilities!"); @@ -2539,7 +2581,7 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, unsigned OpToUse = 0; // Scan for the appropriate larger type to use. - while (1) { + while (true) { NewOutTy = (MVT::SimpleValueType)(NewOutTy.getSimpleVT().SimpleTy+1); assert(NewOutTy.isInteger() && "Ran out of possibilities!"); @@ -2559,7 +2601,6 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, // Otherwise, try a larger type. } - // Okay, we found the operation and type to use. SDValue Operation = DAG.getNode(OpToUse, dl, NewOutTy, LegalOp); @@ -2745,7 +2786,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, return DAG.getNode(ISD::CTLZ, dl, Op.getValueType(), Op); case ISD::CTLZ: { EVT VT = Op.getValueType(); - unsigned len = VT.getSizeInBits(); + unsigned Len = VT.getSizeInBits(); if (TLI.isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) { EVT SetCCVT = getSetCCResultType(VT); @@ -2753,7 +2794,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, SDValue Zero = DAG.getConstant(0, dl, VT); SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ); return DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero, - DAG.getConstant(len, dl, VT), CTLZ); + DAG.getConstant(Len, dl, VT), CTLZ); } // for now, we do this: @@ -2766,7 +2807,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, // // Ref: "Hacker's Delight" by Henry Warren EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); - for (unsigned i = 0; (1U << i) <= (len / 2); ++i) { + for (unsigned i = 0; (1U << i) <= (Len / 2); ++i) { SDValue Tmp3 = DAG.getConstant(1ULL << i, dl, ShVT); Op = DAG.getNode(ISD::OR, dl, VT, Op, DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3)); @@ -2778,11 +2819,22 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, // This trivially expands to CTTZ. return DAG.getNode(ISD::CTTZ, dl, Op.getValueType(), Op); case ISD::CTTZ: { + EVT VT = Op.getValueType(); + unsigned Len = VT.getSizeInBits(); + + if (TLI.isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) { + EVT SetCCVT = getSetCCResultType(VT); + SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op); + SDValue Zero = DAG.getConstant(0, dl, VT); + SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ); + return DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero, + DAG.getConstant(Len, dl, VT), CTTZ); + } + // for now, we use: { return popcount(~x & (x - 1)); } // unless the target has ctlz but not ctpop, in which case we use: // { return 32 - nlz(~x & (x-1)); } // Ref: "Hacker's Delight" by Henry Warren - EVT VT = Op.getValueType(); SDValue Tmp3 = DAG.getNode(ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT), DAG.getNode(ISD::SUB, dl, VT, Op, @@ -2799,6 +2851,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, } bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { + DEBUG(dbgs() << "Trying to expand node\n"); SmallVector<SDValue, 8> Results; SDLoc dl(Node); SDValue Tmp1, Tmp2, Tmp3, Tmp4; @@ -2983,8 +3036,6 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { // NOTE: we could fall back on load/store here too for targets without // SRA. However, it is doubtful that any exist. EVT ShiftAmountTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); - if (VT.isVector()) - ShiftAmountTy = VT; unsigned BitsDiff = VT.getScalarSizeInBits() - ExtraVT.getScalarSizeInBits(); SDValue ShiftCst = DAG.getConstant(BitsDiff, dl, ShiftAmountTy); @@ -3062,10 +3113,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::INSERT_SUBVECTOR: Results.push_back(ExpandInsertToVectorThroughStack(SDValue(Node, 0))); break; - case ISD::CONCAT_VECTORS: { + case ISD::CONCAT_VECTORS: Results.push_back(ExpandVectorBuildThroughStack(Node)); break; - } case ISD::SCALAR_TO_VECTOR: Results.push_back(ExpandSCALAR_TO_VECTOR(Node)); break; @@ -3083,14 +3133,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { SDValue Op0 = Node->getOperand(0); SDValue Op1 = Node->getOperand(1); if (!TLI.isTypeLegal(EltVT)) { - EVT NewEltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT); // BUILD_VECTOR operands are allowed to be wider than the element type. // But if NewEltVT is smaller that EltVT the BUILD_VECTOR does not accept // it. if (NewEltVT.bitsLT(EltVT)) { - // Convert shuffle node. // If original node was v4i64 and the new EltVT is i32, // cast operands to v8i32 and re-build the mask. @@ -3261,6 +3309,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { } break; case ISD::FP_TO_FP16: + DEBUG(dbgs() << "Legalizing FP_TO_FP16\n"); if (!TLI.useSoftFloat() && TM.Options.UnsafeFPMath) { SDValue Op = Node->getOperand(0); MVT SVT = Op.getSimpleValueType(); @@ -3457,7 +3506,6 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign) // Sub: // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) - // SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE); SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE); SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign, @@ -3666,10 +3714,15 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp2.getOperand(0), Tmp2.getOperand(1), Node->getOperand(2)); } else { - // We test only the i1 bit. Skip the AND if UNDEF. - Tmp3 = (Tmp2.isUndef()) ? Tmp2 : - DAG.getNode(ISD::AND, dl, Tmp2.getValueType(), Tmp2, - DAG.getConstant(1, dl, Tmp2.getValueType())); + // We test only the i1 bit. Skip the AND if UNDEF or another AND. + if (Tmp2.isUndef() || + (Tmp2.getOpcode() == ISD::AND && + isa<ConstantSDNode>(Tmp2.getOperand(1)) && + dyn_cast<ConstantSDNode>(Tmp2.getOperand(1))->getZExtValue() == 1)) + Tmp3 = Tmp2; + else + Tmp3 = DAG.getNode(ISD::AND, dl, Tmp2.getValueType(), Tmp2, + DAG.getConstant(1, dl, Tmp2.getValueType())); Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1, DAG.getCondCode(ISD::SETNE), Tmp3, DAG.getConstant(0, dl, Tmp3.getValueType()), @@ -3865,17 +3918,20 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { } // Replace the original node with the legalized result. - if (Results.empty()) + if (Results.empty()) { + DEBUG(dbgs() << "Cannot expand node\n"); return false; + } + DEBUG(dbgs() << "Succesfully expanded node\n"); ReplaceNode(Node, Results.data()); return true; } void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { + DEBUG(dbgs() << "Trying to convert node to libcall\n"); SmallVector<SDValue, 8> Results; SDLoc dl(Node); - SDValue Tmp1, Tmp2, Tmp3, Tmp4; unsigned Opc = Node->getOpcode(); switch (Opc) { case ISD::ATOMIC_FENCE: { @@ -4057,6 +4113,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::REM_PPCF128)); break; case ISD::FMA: + case ISD::STRICT_FMA: Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64, RTLIB::FMA_F80, RTLIB::FMA_F128, RTLIB::FMA_PPCF128)); @@ -4126,8 +4183,11 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { } // Replace the original node with the legalized result. - if (!Results.empty()) + if (!Results.empty()) { + DEBUG(dbgs() << "Successfully converted node to libcall\n"); ReplaceNode(Node, Results.data()); + } else + DEBUG(dbgs() << "Could not convert node to libcall\n"); } // Determine the vector type to use in place of an original scalar element when @@ -4141,6 +4201,7 @@ static MVT getPromotedVectorElementType(const TargetLowering &TLI, } void SelectionDAGLegalize::PromoteNode(SDNode *Node) { + DEBUG(dbgs() << "Trying to promote node\n"); SmallVector<SDValue, 8> Results; MVT OVT = Node->getSimpleValueType(0); if (Node->getOpcode() == ISD::UINT_TO_FP || @@ -4369,7 +4430,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { case ISD::FREM: case ISD::FMINNUM: case ISD::FMAXNUM: - case ISD::FPOW: { + case ISD::FPOW: Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1)); Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2, @@ -4377,8 +4438,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp3, DAG.getIntPtrConstant(0, dl))); break; - } - case ISD::FMA: { + case ISD::FMA: Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1)); Tmp3 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(2)); @@ -4387,7 +4447,6 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2, Tmp3), DAG.getIntPtrConstant(0, dl))); break; - } case ISD::FCOPYSIGN: case ISD::FPOWI: { Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); @@ -4419,13 +4478,12 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { case ISD::FLOG10: case ISD::FABS: case ISD::FEXP: - case ISD::FEXP2: { + case ISD::FEXP2: Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp2, DAG.getIntPtrConstant(0, dl))); break; - } case ISD::BUILD_VECTOR: { MVT EltVT = OVT.getVectorElementType(); MVT NewEltVT = NVT.getVectorElementType(); @@ -4579,8 +4637,11 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { } // Replace the original node with the legalized result. - if (!Results.empty()) + if (!Results.empty()) { + DEBUG(dbgs() << "Successfully promoted node\n"); ReplaceNode(Node, Results.data()); + } else + DEBUG(dbgs() << "Could not promote node\n"); } /// This is the entry point for the file. @@ -4602,7 +4663,7 @@ void SelectionDAG::Legalize() { // nodes with their original operands intact. Legalization can produce // new nodes which may themselves need to be legalized. Iterate until all // nodes have been legalized. - for (;;) { + while (true) { bool AnyLegalized = false; for (auto NI = allnodes_end(); NI != allnodes_begin();) { --NI; diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 75fec7bd1d485..29f0bb475b083 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -40,8 +40,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { SDValue Res = SDValue(); // See if the target wants to custom expand this node. - if (CustomLowerNode(N, N->getValueType(ResNo), true)) + if (CustomLowerNode(N, N->getValueType(ResNo), true)) { + DEBUG(dbgs() << "Node has been custom expanded, done\n"); return; + } switch (N->getOpcode()) { default: @@ -568,10 +570,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_VSELECT(SDNode *N) { SDValue Mask = N->getOperand(0); - EVT OpTy = N->getOperand(1).getValueType(); - // Promote all the way up to the canonical SetCC type. - Mask = PromoteTargetBoolean(Mask, OpTy); SDValue LHS = GetPromotedInteger(N->getOperand(1)); SDValue RHS = GetPromotedInteger(N->getOperand(2)); return DAG.getNode(ISD::VSELECT, SDLoc(N), @@ -773,7 +772,30 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) { SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo) { if (ResNo == 1) return PromoteIntRes_Overflow(N); - llvm_unreachable("Not implemented"); + + // We need to sign-extend the operands so the carry value computed by the + // wide operation will be equivalent to the carry value computed by the + // narrow operation. + // An ADDCARRY can generate carry only if any of the operands has its + // most significant bit set. Sign extension propagates the most significant + // bit into the higher bits which means the extra bit that the narrow + // addition would need (i.e. the carry) will be propagated through the higher + // bits of the wide addition. + // A SUBCARRY can generate borrow only if LHS < RHS and this property will be + // preserved by sign extension. + SDValue LHS = SExtPromotedInteger(N->getOperand(0)); + SDValue RHS = SExtPromotedInteger(N->getOperand(1)); + + EVT ValueVTs[] = {LHS.getValueType(), N->getValueType(1)}; + + // Do the arithmetic in the wide type. + SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N), DAG.getVTList(ValueVTs), + LHS, RHS, N->getOperand(2)); + + // Update the users of the original carry/borrow value. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + + return SDValue(Res.getNode(), 0); } SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { @@ -885,8 +907,10 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { DEBUG(dbgs() << "Promote integer operand: "; N->dump(&DAG); dbgs() << "\n"); SDValue Res = SDValue(); - if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) + if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) { + DEBUG(dbgs() << "Node has been custom lowered, done\n"); return false; + } switch (N->getOpcode()) { default: @@ -1206,24 +1230,23 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, // When the data operand has illegal type, we should legalize the data // operand first. The mask will be promoted/splitted/widened according to // the data operand type. - if (TLI.isTypeLegal(DataVT)) + if (TLI.isTypeLegal(DataVT)) { Mask = PromoteTargetBoolean(Mask, DataVT); - else { - if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger) - return PromoteIntOp_MSTORE(N, 3); - - else if (getTypeAction(DataVT) == TargetLowering::TypeWidenVector) - return WidenVecOp_MSTORE(N, 3); - - else { - assert (getTypeAction(DataVT) == TargetLowering::TypeSplitVector); - return SplitVecOp_MSTORE(N, 3); - } + // Update in place. + SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end()); + NewOps[2] = Mask; + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); } + + if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger) + return PromoteIntOp_MSTORE(N, 3); + if (getTypeAction(DataVT) == TargetLowering::TypeWidenVector) + return WidenVecOp_MSTORE(N, 3); + assert (getTypeAction(DataVT) == TargetLowering::TypeSplitVector); + return SplitVecOp_MSTORE(N, 3); } else { // Data operand assert(OpNo == 3 && "Unexpected operand for promotion"); DataOp = GetPromotedInteger(DataOp); - Mask = PromoteTargetBoolean(Mask, DataOp.getValueType()); TruncateStore = true; } @@ -1250,6 +1273,9 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N, // The Mask EVT DataVT = N->getValueType(0); NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT); + } else if (OpNo == 4) { + // Need to sign extend the index since the bits will likely be used. + NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo)); } else NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo)); @@ -1270,6 +1296,9 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, // The Mask EVT DataVT = N->getValue().getValueType(); NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT); + } else if (OpNo == 4) { + // Need to sign extend the index since the bits will likely be used. + NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo)); } else NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo)); return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); @@ -3224,8 +3253,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits()/8; - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); + Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); Hi = DAG.getTruncStore( Ch, dl, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), NEVT, MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); @@ -3260,8 +3288,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { MMOFlags, AAInfo); // Increment the pointer to the other half. - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); + Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); // Store the lowest ExcessBits bits in the second half. Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), @@ -3462,7 +3489,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); assert(NOutVT.isVector() && "This type must be promoted to a vector type"); - EVT InElemTy = OutVT.getVectorElementType(); EVT OutElemTy = NOutVT.getVectorElementType(); unsigned NumElem = N->getOperand(0).getValueType().getVectorNumElements(); @@ -3471,15 +3497,36 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { assert(NumElem * NumOperands == NumOutElem && "Unexpected number of elements"); + // If the input type is legal and we can promote it to a legal type with the + // same element size, go ahead do that to create a new concat. + if (getTypeAction(N->getOperand(0).getValueType()) == + TargetLowering::TypeLegal) { + EVT InPromotedTy = EVT::getVectorVT(*DAG.getContext(), OutElemTy, NumElem); + if (TLI.isTypeLegal(InPromotedTy)) { + SmallVector<SDValue, 8> Ops(NumOperands); + for (unsigned i = 0; i < NumOperands; ++i) { + Ops[i] = DAG.getNode(ISD::ANY_EXTEND, dl, InPromotedTy, + N->getOperand(i)); + } + return DAG.getNode(ISD::CONCAT_VECTORS, dl, NOutVT, Ops); + } + } + // Take the elements from the first vector. SmallVector<SDValue, 8> Ops(NumOutElem); for (unsigned i = 0; i < NumOperands; ++i) { SDValue Op = N->getOperand(i); + if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteInteger) + Op = GetPromotedInteger(Op); + EVT SclrTy = Op.getValueType().getVectorElementType(); + assert(NumElem == Op.getValueType().getVectorNumElements() && + "Unexpected number of elements"); + for (unsigned j = 0; j < NumElem; ++j) { SDValue Ext = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, InElemTy, Op, + ISD::EXTRACT_VECTOR_ELT, dl, SclrTy, Op, DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - Ops[i * NumElem + j] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext); + Ops[i * NumElem + j] = DAG.getAnyExtOrTrunc(Ext, dl, OutElemTy); } } diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 001eed9fb8f62..b60d7bca498ad 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -14,7 +14,9 @@ //===----------------------------------------------------------------------===// #include "LegalizeTypes.h" +#include "SDNodeDbgValue.h" #include "llvm/ADT/SetVector.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/DataLayout.h" #include "llvm/Support/CommandLine.h" @@ -222,15 +224,21 @@ bool DAGTypeLegalizer::run() { assert(N->getNodeId() == ReadyToProcess && "Node should be ready if on worklist!"); - if (IgnoreNodeResults(N)) + DEBUG(dbgs() << "Legalizing node: "; N->dump()); + if (IgnoreNodeResults(N)) { + DEBUG(dbgs() << "Ignoring node results\n"); goto ScanOperands; + } // Scan the values produced by the node, checking to see if any result // types are illegal. for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) { EVT ResultVT = N->getValueType(i); + DEBUG(dbgs() << "Analyzing result type: " << + ResultVT.getEVTString() << "\n"); switch (getTypeAction(ResultVT)) { case TargetLowering::TypeLegal: + DEBUG(dbgs() << "Legal result type\n"); break; // The following calls must take care of *all* of the node's results, // not just the illegal result they were passed (this includes results @@ -287,9 +295,12 @@ ScanOperands: if (IgnoreNodeResults(N->getOperand(i).getNode())) continue; - EVT OpVT = N->getOperand(i).getValueType(); + const auto Op = N->getOperand(i); + DEBUG(dbgs() << "Analyzing operand: "; Op.dump()); + EVT OpVT = Op.getValueType(); switch (getTypeAction(OpVT)) { case TargetLowering::TypeLegal: + DEBUG(dbgs() << "Legal operand\n"); continue; // The following calls must either replace all of the node's results // using ReplaceValueWith, and return "false"; or update the node's @@ -832,6 +843,18 @@ void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo, AnalyzeNewValue(Lo); AnalyzeNewValue(Hi); + // Transfer debug values. Don't invalidate the source debug value until it's + // been transferred to the high and low bits. + if (DAG.getDataLayout().isBigEndian()) { + DAG.transferDbgValues(Op, Hi, 0, Hi.getValueSizeInBits(), false); + DAG.transferDbgValues(Op, Lo, Hi.getValueSizeInBits(), + Lo.getValueSizeInBits()); + } else { + DAG.transferDbgValues(Op, Lo, 0, Lo.getValueSizeInBits(), false); + DAG.transferDbgValues(Op, Hi, Lo.getValueSizeInBits(), + Hi.getValueSizeInBits()); + } + // Remember that this is the result of the node. std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op]; assert(!Entry.first.getNode() && "Node already expanded"); @@ -1002,8 +1025,13 @@ bool DAGTypeLegalizer::CustomWidenLowerNode(SDNode *N, EVT VT) { // Update the widening map. assert(Results.size() == N->getNumValues() && "Custom lowering returned the wrong number of results!"); - for (unsigned i = 0, e = Results.size(); i != e; ++i) - SetWidenedVector(SDValue(N, i), Results[i]); + for (unsigned i = 0, e = Results.size(); i != e; ++i) { + // If this is a chain output just replace it. + if (Results[i].getValueType() == MVT::Other) + ReplaceValueWith(SDValue(N, i), Results[i]); + else + SetWidenedVector(SDValue(N, i), Results[i]); + } return true; } @@ -1117,23 +1145,6 @@ SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT ValVT) { return DAG.getNode(ExtendCode, dl, BoolVT, Bool); } -/// Widen the given target boolean to a target boolean of the given type. -/// The boolean vector is widened and then promoted to match the target boolean -/// type of the given ValVT. -SDValue DAGTypeLegalizer::WidenTargetBoolean(SDValue Bool, EVT ValVT, - bool WithZeroes) { - SDLoc dl(Bool); - EVT BoolVT = Bool.getValueType(); - - assert(ValVT.getVectorNumElements() > BoolVT.getVectorNumElements() && - TLI.isTypeLegal(ValVT) && - "Unexpected types in WidenTargetBoolean"); - EVT WideVT = EVT::getVectorVT(*DAG.getContext(), BoolVT.getScalarType(), - ValVT.getVectorNumElements()); - Bool = ModifyToType(Bool, WideVT, WithZeroes); - return PromoteTargetBoolean(Bool, ValVT); -} - /// Return the lower LoVT bits of Op in Lo and the upper HiVT bits in Hi. void DAGTypeLegalizer::SplitInteger(SDValue Op, EVT LoVT, EVT HiVT, @@ -1142,9 +1153,14 @@ void DAGTypeLegalizer::SplitInteger(SDValue Op, assert(LoVT.getSizeInBits() + HiVT.getSizeInBits() == Op.getValueSizeInBits() && "Invalid integer splitting!"); Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Op); + unsigned ReqShiftAmountInBits = + Log2_32_Ceil(Op.getValueType().getSizeInBits()); + MVT ShiftAmountTy = + TLI.getScalarShiftAmountTy(DAG.getDataLayout(), Op.getValueType()); + if (ReqShiftAmountInBits > ShiftAmountTy.getSizeInBits()) + ShiftAmountTy = MVT::getIntegerVT(NextPowerOf2(ReqShiftAmountInBits)); Hi = DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op, - DAG.getConstant(LoVT.getSizeInBits(), dl, - TLI.getPointerTy(DAG.getDataLayout()))); + DAG.getConstant(LoVT.getSizeInBits(), dl, ShiftAmountTy)); Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index c46d1b04804c9..64cb80e0d8538 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -18,9 +18,9 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/TargetLowering.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/Target/TargetLowering.h" namespace llvm { @@ -89,7 +89,8 @@ private: /// Pretend all of this node's results are legal. bool IgnoreNodeResults(SDNode *N) const { - return N->getOpcode() == ISD::TargetConstant; + return N->getOpcode() == ISD::TargetConstant || + N->getOpcode() == ISD::Register; } /// For integer nodes that are below legal width, this map indicates what @@ -182,10 +183,6 @@ private: SDValue PromoteTargetBoolean(SDValue Bool, EVT ValVT); - /// Modify Bit Vector to match SetCC result type of ValVT. - /// The bit vector is widened with zeroes when WithZeroes is true. - SDValue WidenTargetBoolean(SDValue Bool, EVT ValVT, bool WithZeroes = false); - void ReplaceValueWith(SDValue From, SDValue To); void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi); void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT, @@ -400,18 +397,22 @@ private: /// Given an operand Op of Float type, returns the integer if the Op is not /// supported in target HW and converted to the integer. /// The integer contains exactly the same bits as Op - only the type changed. - /// For example, if Op is an f32 which was softened to an i32, then this method - /// returns an i32, the bits of which coincide with those of Op. + /// For example, if Op is an f32 which was softened to an i32, then this + /// method returns an i32, the bits of which coincide with those of Op. /// If the Op can be efficiently supported in target HW or the operand must /// stay in a register, the Op is not converted to an integer. /// In that case, the given op is returned. SDValue GetSoftenedFloat(SDValue Op) { - SDValue &SoftenedOp = SoftenedFloats[Op]; - if (!SoftenedOp.getNode() && - isSimpleLegalType(Op.getValueType())) + auto Iter = SoftenedFloats.find(Op); + if (Iter == SoftenedFloats.end()) { + assert(isSimpleLegalType(Op.getValueType()) && + "Operand wasn't converted to integer?"); return Op; + } + + SDValue &SoftenedOp = Iter->second; + assert(SoftenedOp.getNode() && "Unconverted op in SoftenedFloats?"); RemapValue(SoftenedOp); - assert(SoftenedOp.getNode() && "Operand wasn't converted to integer?"); return SoftenedOp; } void SetSoftenedFloat(SDValue Op, SDValue Result); @@ -618,7 +619,6 @@ private: SDValue ScalarizeVecRes_SETCC(SDNode *N); SDValue ScalarizeVecRes_UNDEF(SDNode *N); SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N); - SDValue ScalarizeVecRes_VSETCC(SDNode *N); // Vector Operand Scalarization: <1 x ty> -> ty. bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo); @@ -727,7 +727,6 @@ private: SDValue WidenVecRes_SETCC(SDNode* N); SDValue WidenVecRes_UNDEF(SDNode *N); SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N); - SDValue WidenVecRes_VSETCC(SDNode* N); SDValue WidenVecRes_Ternary(SDNode *N); SDValue WidenVecRes_Binary(SDNode *N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index f3306151d864b..993465ae9dc21 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -484,8 +484,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(), Alignment, St->getMemOperand()->getFlags(), AAInfo); - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); + Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); Hi = DAG.getStore(Chain, dl, Hi, Ptr, St->getPointerInfo().getWithOffset(IncrementSize), MinAlign(Alignment, IncrementSize), diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 9355dbe77f94e..74970ab5792c0 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -1,4 +1,4 @@ -//===-- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ---===// +//===- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ----===// // // The LLVM Compiler Infrastructure // @@ -27,15 +27,34 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/Target/TargetLowering.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include <cassert> +#include <cstdint> +#include <iterator> +#include <utility> + using namespace llvm; namespace { + class VectorLegalizer { SelectionDAG& DAG; const TargetLowering &TLI; - bool Changed; // Keep track of whether anything changed + bool Changed = false; // Keep track of whether anything changed /// For nodes that are of legal width, and that have more than one use, this /// map indicates what regularized operand to use. This allows us to avoid @@ -128,12 +147,15 @@ class VectorLegalizer { SDValue PromoteFP_TO_INT(SDValue Op, bool isSigned); public: + VectorLegalizer(SelectionDAG& dag) : + DAG(dag), TLI(dag.getTargetLoweringInfo()) {} + /// \brief Begin legalizer the vector operations in the DAG. bool Run(); - VectorLegalizer(SelectionDAG& dag) : - DAG(dag), TLI(dag.getTargetLoweringInfo()), Changed(false) {} }; +} // end anonymous namespace + bool VectorLegalizer::Run() { // Before we start legalizing vector nodes, check if there are any vectors. bool HasVectors = false; @@ -475,10 +497,10 @@ SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op, bool isSigned) { "Can't promote a vector with multiple results!"); EVT VT = Op.getValueType(); - EVT NewVT; + EVT NewVT = VT; unsigned NewOpc; - while (1) { - NewVT = VT.widenIntegerVectorElementType(*DAG.getContext()); + while (true) { + NewVT = NewVT.widenIntegerVectorElementType(*DAG.getContext()); assert(NewVT.isSimple() && "Promoting to a non-simple vector type!"); if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewVT)) { NewOpc = ISD::FP_TO_SINT; @@ -490,12 +512,19 @@ SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op, bool isSigned) { } } - SDLoc loc(Op); - SDValue promoted = DAG.getNode(NewOpc, SDLoc(Op), NewVT, Op.getOperand(0)); - return DAG.getNode(ISD::TRUNCATE, SDLoc(Op), VT, promoted); + SDLoc dl(Op); + SDValue Promoted = DAG.getNode(NewOpc, dl, NewVT, Op.getOperand(0)); + + // Assert that the converted value fits in the original type. If it doesn't + // (eg: because the value being converted is too big), then the result of the + // original operation was undefined anyway, so the assert is still correct. + Promoted = DAG.getNode(Op->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext + : ISD::AssertSext, + dl, NewVT, Promoted, + DAG.getValueType(VT.getScalarType())); + return DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted); } - SDValue VectorLegalizer::ExpandLoad(SDValue Op) { LoadSDNode *LD = cast<LoadSDNode>(Op.getNode()); @@ -503,7 +532,6 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { EVT SrcEltVT = SrcVT.getScalarType(); unsigned NumElem = SrcVT.getVectorNumElements(); - SDValue NewChain; SDValue Value; if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) { @@ -534,7 +562,6 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { unsigned Offset = 0; unsigned RemainingBytes = SrcVT.getStoreSize(); SmallVector<SDValue, 8> LoadVals; - while (RemainingBytes > 0) { SDValue ScalarLoad; unsigned LoadBytes = WideBytes; @@ -560,9 +587,8 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { RemainingBytes -= LoadBytes; Offset += LoadBytes; - BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, - DAG.getConstant(LoadBytes, dl, - BasePTR.getValueType())); + + BasePTR = DAG.getObjectPtrOffset(dl, BasePTR, LoadBytes); LoadVals.push_back(ScalarLoad.getValue(0)); LoadChains.push_back(ScalarLoad.getValue(1)); @@ -1117,8 +1143,6 @@ SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) { return DAG.getBuildVector(VT, dl, Ops); } -} - bool SelectionDAG::LegalizeVectors() { return VectorLegalizer(*this).Run(); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 6aa3270883f08..8f2320f52a0f3 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -243,7 +243,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) { // For instance, this happens on AArch64: v1i1 is illegal but v1i{8,16,32} // are widened to v8i8, v4i16, and v2i32, which is legal, because v1i64 is // legal and was not scalarized. - // See the similar logic in ScalarizeVecRes_VSETCC + // See the similar logic in ScalarizeVecRes_SETCC if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) { Op = GetScalarizedVector(Op); } else { @@ -307,7 +307,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) { SDLoc DL(N); // The vselect result and true/value operands needs scalarizing, but it's // not a given that the Cond does. For instance, in AVX512 v1i1 is legal. - // See the similar logic in ScalarizeVecRes_VSETCC + // See the similar logic in ScalarizeVecRes_SETCC if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) { Cond = GetScalarizedVector(Cond); } else { @@ -380,21 +380,6 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT_CC(SDNode *N) { N->getOperand(4)); } -SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) { - assert(N->getValueType(0).isVector() == - N->getOperand(0).getValueType().isVector() && - "Scalar/Vector type mismatch"); - - if (N->getValueType(0).isVector()) return ScalarizeVecRes_VSETCC(N); - - SDValue LHS = GetScalarizedVector(N->getOperand(0)); - SDValue RHS = GetScalarizedVector(N->getOperand(1)); - SDLoc DL(N); - - // Turn it into a scalar SETCC. - return DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, N->getOperand(2)); -} - SDValue DAGTypeLegalizer::ScalarizeVecRes_UNDEF(SDNode *N) { return DAG.getUNDEF(N->getValueType(0).getVectorElementType()); } @@ -408,7 +393,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) { return GetScalarizedVector(N->getOperand(Op)); } -SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) { +SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) { assert(N->getValueType(0).isVector() && N->getOperand(0).getValueType().isVector() && "Operand types must be vectors"); @@ -461,7 +446,8 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { N->dump(&DAG); dbgs() << "\n"; #endif - llvm_unreachable("Do not know how to scalarize this operator's operand!"); + report_fatal_error("Do not know how to scalarize this operator's " + "operand!\n"); case ISD::BITCAST: Res = ScalarizeVecOp_BITCAST(N); break; @@ -1068,34 +1054,57 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, if (CustomLowerNode(N, N->getValueType(0), true)) return; - // Spill the vector to the stack. + // Make the vector elements byte-addressable if they aren't already. EVT VecVT = Vec.getValueType(); EVT EltVT = VecVT.getVectorElementType(); + if (VecVT.getScalarSizeInBits() < 8) { + EltVT = MVT::i8; + VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, + VecVT.getVectorNumElements()); + Vec = DAG.getNode(ISD::ANY_EXTEND, dl, VecVT, Vec); + // Extend the element type to match if needed. + if (EltVT.bitsGT(Elt.getValueType())) + Elt = DAG.getNode(ISD::ANY_EXTEND, dl, EltVT, Elt); + } + + // Spill the vector to the stack. SDValue StackPtr = DAG.CreateStackTemporary(VecVT); - SDValue Store = - DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo()); + auto &MF = DAG.getMachineFunction(); + auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); + auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo); // Store the new element. This may be larger than the vector element type, // so use a truncating store. SDValue EltPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); Type *VecType = VecVT.getTypeForEVT(*DAG.getContext()); unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType); - Store = - DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo(), EltVT); + Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, + MachinePointerInfo::getUnknownStack(MF), EltVT); + + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT); // Load the Lo part from the stack slot. - Lo = - DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo()); + Lo = DAG.getLoad(LoVT, dl, Store, StackPtr, PtrInfo); // Increment the pointer to the other part. - unsigned IncrementSize = Lo.getValueSizeInBits() / 8; + unsigned IncrementSize = LoVT.getSizeInBits() / 8; StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, DAG.getConstant(IncrementSize, dl, StackPtr.getValueType())); // Load the Hi part from the stack slot. - Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), + Hi = DAG.getLoad(HiVT, dl, Store, StackPtr, + PtrInfo.getWithOffset(IncrementSize), MinAlign(Alignment, IncrementSize)); + + // If we adjusted the original type, we need to truncate the results. + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + if (LoVT != Lo.getValueType()) + Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Lo); + if (HiVT != Hi.getValueType()) + Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi); } void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, @@ -1130,8 +1139,7 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, LD->getPointerInfo(), LoMemVT, Alignment, MMOFlags, AAInfo); unsigned IncrementSize = LoMemVT.getSizeInBits()/8; - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); + Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, LD->getPointerInfo().getWithOffset(IncrementSize), HiMemVT, Alignment, MMOFlags, AAInfo); @@ -1283,10 +1291,19 @@ void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) { SDLoc DL(N); std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); - // Split the input. + // If the input also splits, handle it directly. Otherwise split it by hand. SDValue LL, LH, RL, RH; - std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); - std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); + if (getTypeAction(N->getOperand(0).getValueType()) == + TargetLowering::TypeSplitVector) + GetSplitVector(N->getOperand(0), LL, LH); + else + std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); + + if (getTypeAction(N->getOperand(1).getValueType()) == + TargetLowering::TypeSplitVector) + GetSplitVector(N->getOperand(1), RL, RH); + else + std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); @@ -1753,30 +1770,25 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { // Make the vector elements byte-addressable if they aren't already. SDLoc dl(N); EVT EltVT = VecVT.getVectorElementType(); - if (EltVT.getSizeInBits() < 8) { - SmallVector<SDValue, 4> ElementOps; - for (unsigned i = 0; i < VecVT.getVectorNumElements(); ++i) { - ElementOps.push_back(DAG.getAnyExtOrTrunc( - DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vec, - DAG.getConstant(i, dl, MVT::i8)), - dl, MVT::i8)); - } - + if (VecVT.getScalarSizeInBits() < 8) { EltVT = MVT::i8; VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, VecVT.getVectorNumElements()); - Vec = DAG.getBuildVector(VecVT, dl, ElementOps); + Vec = DAG.getNode(ISD::ANY_EXTEND, dl, VecVT, Vec); } // Store the vector to the stack. SDValue StackPtr = DAG.CreateStackTemporary(VecVT); - SDValue Store = - DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo()); + auto &MF = DAG.getMachineFunction(); + auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); + auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo); // Load back the required element. StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); - return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr, - MachinePointerInfo(), EltVT); + return DAG.getExtLoad( + ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr, + MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), EltVT); } SDValue DAGTypeLegalizer::SplitVecOp_ExtVecInRegOp(SDNode *N) { @@ -1886,9 +1898,6 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, else std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL); - MaskLo = PromoteTargetBoolean(MaskLo, DataLo.getValueType()); - MaskHi = PromoteTargetBoolean(MaskHi, DataHi.getValueType()); - // if Alignment is equal to the vector size, // take the half of it for the second part unsigned SecondHalfAlignment = @@ -1955,7 +1964,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, else std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL); - SDValue Lo, Hi; + SDValue Lo; MachineMemOperand *MMO = DAG.getMachineFunction(). getMachineMemOperand(N->getPointerInfo(), MachineMemOperand::MOStore, LoMemVT.getStoreSize(), @@ -1970,13 +1979,12 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, MachineMemOperand::MOStore, HiMemVT.getStoreSize(), Alignment, N->getAAInfo(), N->getRanges()); - SDValue OpsHi[] = {Ch, DataHi, MaskHi, Ptr, IndexHi}; - Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(), - DL, OpsHi, MMO); - - // Build a factor node to remember that this store is independent of the - // other one. - return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); + // The order of the Scatter operation after split is well defined. The "Hi" + // part comes after the "Lo". So these two operations should be chained one + // after another. + SDValue OpsHi[] = {Lo, DataHi, MaskHi, Ptr, IndexHi}; + return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(), + DL, OpsHi, MMO); } SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { @@ -2007,8 +2015,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { AAInfo); // Increment the pointer to the other half. - Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, DL, Ptr.getValueType())); + Ptr = DAG.getObjectPtrOffset(DL, Ptr, IncrementSize); if (isTruncating) Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, @@ -2919,30 +2926,16 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) { ISD::LoadExtType ExtType = N->getExtensionType(); SDLoc dl(N); - if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector) - Mask = GetWidenedVector(Mask); - else { - EVT BoolVT = getSetCCResultType(WidenVT); - - // We can't use ModifyToType() because we should fill the mask with - // zeroes - unsigned WidenNumElts = BoolVT.getVectorNumElements(); - unsigned MaskNumElts = MaskVT.getVectorNumElements(); - - unsigned NumConcat = WidenNumElts / MaskNumElts; - SmallVector<SDValue, 16> Ops(NumConcat); - SDValue ZeroVal = DAG.getConstant(0, dl, MaskVT); - Ops[0] = Mask; - for (unsigned i = 1; i != NumConcat; ++i) - Ops[i] = ZeroVal; - - Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops); - } + // The mask should be widened as well + EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), + MaskVT.getVectorElementType(), + WidenVT.getVectorNumElements()); + Mask = ModifyToType(Mask, WideMaskVT, true); SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(), Mask, Src0, N->getMemoryVT(), N->getMemOperand(), ExtType, - N->isExpandingLoad()); + N->isExpandingLoad()); // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); @@ -2953,12 +2946,16 @@ SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) { EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Mask = N->getMask(); + EVT MaskVT = Mask.getValueType(); SDValue Src0 = GetWidenedVector(N->getValue()); unsigned NumElts = WideVT.getVectorNumElements(); SDLoc dl(N); // The mask should be widened as well - Mask = WidenTargetBoolean(Mask, WideVT, true); + EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), + MaskVT.getVectorElementType(), + WideVT.getVectorNumElements()); + Mask = ModifyToType(Mask, WideMaskVT, true); // Widen the Index operand SDValue Index = N->getIndex(); @@ -3032,7 +3029,7 @@ SDValue DAGTypeLegalizer::convertMask(SDValue InMask, EVT MaskVT, // Make a new Mask node, with a legal result VT. SmallVector<SDValue, 4> Ops; - for (unsigned i = 0; i < InMask->getNumOperands(); ++i) + for (unsigned i = 0, e = InMask->getNumOperands(); i < e; ++i) Ops.push_back(InMask->getOperand(i)); SDValue Mask = DAG.getNode(InMask->getOpcode(), SDLoc(InMask), MaskVT, Ops); @@ -3065,12 +3062,9 @@ SDValue DAGTypeLegalizer::convertMask(SDValue InMask, EVT MaskVT, } else if (CurrMaskNumEls < ToMaskVT.getVectorNumElements()) { unsigned NumSubVecs = (ToMaskVT.getVectorNumElements() / CurrMaskNumEls); EVT SubVT = Mask->getValueType(0); - SmallVector<SDValue, 16> SubConcatOps(NumSubVecs); - SubConcatOps[0] = Mask; - for (unsigned i = 1; i < NumSubVecs; ++i) - SubConcatOps[i] = DAG.getUNDEF(SubVT); - Mask = - DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Mask), ToMaskVT, SubConcatOps); + SmallVector<SDValue, 16> SubOps(NumSubVecs, DAG.getUNDEF(SubVT)); + SubOps[0] = Mask; + Mask = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Mask), ToMaskVT, SubOps); } assert((Mask->getValueType(0) == ToMaskVT) && @@ -3105,7 +3099,8 @@ SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) { // If this is a splitted VSELECT that was previously already handled, do // nothing. - if (Cond->getValueType(0).getScalarSizeInBits() != 1) + EVT CondVT = Cond->getValueType(0); + if (CondVT.getScalarSizeInBits() != 1) return SDValue(); EVT VSelVT = N->getValueType(0); @@ -3129,6 +3124,14 @@ SDValue DAGTypeLegalizer::WidenVSELECTAndMask(SDNode *N) { EVT SetCCResVT = getSetCCResultType(SetCCOpVT); if (SetCCResVT.getScalarSizeInBits() == 1) return SDValue(); + } else if (CondVT.getScalarType() == MVT::i1) { + // If there is support for an i1 vector mask (or only scalar i1 conditions), + // don't touch. + while (TLI.getTypeAction(Ctx, CondVT) != TargetLowering::TypeLegal) + CondVT = TLI.getTypeToTransformTo(Ctx, CondVT); + + if (CondVT.getScalarType() == MVT::i1) + return SDValue(); } // Get the VT and operands for VSELECT, and widen if needed. @@ -3236,19 +3239,6 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) { N->getOperand(1), InOp1, InOp2, N->getOperand(4)); } -SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) { - assert(N->getValueType(0).isVector() == - N->getOperand(0).getValueType().isVector() && - "Scalar/Vector type mismatch"); - if (N->getValueType(0).isVector()) return WidenVecRes_VSETCC(N); - - EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue InOp1 = GetWidenedVector(N->getOperand(0)); - SDValue InOp2 = GetWidenedVector(N->getOperand(1)); - return DAG.getNode(ISD::SETCC, SDLoc(N), WidenVT, - InOp1, InOp2, N->getOperand(2)); -} - SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); return DAG.getUNDEF(WidenVT); @@ -3279,7 +3269,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) { return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, NewMask); } -SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) { +SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) { assert(N->getValueType(0).isVector() && N->getOperand(0).getValueType().isVector() && "Operands must be vectors"); @@ -3556,6 +3546,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { } SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) { + assert(OpNo == 3 && "Can widen only data operand of mstore"); MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N); SDValue Mask = MST->getMask(); EVT MaskVT = Mask.getValueType(); @@ -3564,25 +3555,13 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) { SDValue WideVal = GetWidenedVector(StVal); SDLoc dl(N); - if (OpNo == 2 || getTypeAction(MaskVT) == TargetLowering::TypeWidenVector) - Mask = GetWidenedVector(Mask); - else { - // The mask should be widened as well. - EVT BoolVT = getSetCCResultType(WideVal.getValueType()); - // We can't use ModifyToType() because we should fill the mask with - // zeroes. - unsigned WidenNumElts = BoolVT.getVectorNumElements(); - unsigned MaskNumElts = MaskVT.getVectorNumElements(); - - unsigned NumConcat = WidenNumElts / MaskNumElts; - SmallVector<SDValue, 16> Ops(NumConcat); - SDValue ZeroVal = DAG.getConstant(0, dl, MaskVT); - Ops[0] = Mask; - for (unsigned i = 1; i != NumConcat; ++i) - Ops[i] = ZeroVal; + // The mask should be widened as well. + EVT WideVT = WideVal.getValueType(); + EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), + MaskVT.getVectorElementType(), + WideVT.getVectorNumElements()); + Mask = ModifyToType(Mask, WideMaskVT, true); - Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops); - } assert(Mask.getValueType().getVectorNumElements() == WideVal.getValueType().getVectorNumElements() && "Mask and data vectors should have the same number of elements"); @@ -3596,15 +3575,18 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) { MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N); SDValue DataOp = MSC->getValue(); SDValue Mask = MSC->getMask(); + EVT MaskVT = Mask.getValueType(); // Widen the value. SDValue WideVal = GetWidenedVector(DataOp); EVT WideVT = WideVal.getValueType(); - unsigned NumElts = WideVal.getValueType().getVectorNumElements(); + unsigned NumElts = WideVT.getVectorNumElements(); SDLoc dl(N); // The mask should be widened as well. - Mask = WidenTargetBoolean(Mask, WideVT, true); + EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), + MaskVT.getVectorElementType(), NumElts); + Mask = ModifyToType(Mask, WideMaskVT, true); // Widen index. SDValue Index = MSC->getIndex(); @@ -3806,8 +3788,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, while (LdWidth > 0) { unsigned Increment = NewVTWidth / 8; Offset += Increment; - BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, - DAG.getConstant(Increment, dl, BasePtr.getValueType())); + BasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Increment); SDValue L; if (LdWidth < NewVTWidth) { @@ -3839,7 +3820,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, } LdOps.push_back(L); - + LdOp = L; LdWidth -= NewVTWidth; } @@ -3929,10 +3910,7 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain, LdChain.push_back(Ops[0].getValue(1)); unsigned i = 0, Offset = Increment; for (i=1; i < NumElts; ++i, Offset += Increment) { - SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), - BasePtr, - DAG.getConstant(Offset, dl, - BasePtr.getValueType())); + SDValue NewBasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Offset); Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, LD->getPointerInfo().getWithOffset(Offset), LdEltVT, Align, MMOFlags, AAInfo); @@ -3987,9 +3965,8 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StWidth -= NewVTWidth; Offset += Increment; Idx += NumVTElts; - BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, - DAG.getConstant(Increment, dl, - BasePtr.getValueType())); + + BasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Increment); } while (StWidth != 0 && StWidth >= NewVTWidth); } else { // Cast the vector to the scalar type we can store. @@ -4008,9 +3985,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, MinAlign(Align, Offset), MMOFlags, AAInfo)); StWidth -= NewVTWidth; Offset += Increment; - BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, - DAG.getConstant(Increment, dl, - BasePtr.getValueType())); + BasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Increment); } while (StWidth != 0 && StWidth >= NewVTWidth); // Restore index back to be relative to the original widen element type. Idx = Idx * NewVTWidth / ValEltWidth; @@ -4053,10 +4028,7 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain, MMOFlags, AAInfo)); unsigned Offset = Increment; for (unsigned i=1; i < NumElts; ++i, Offset += Increment) { - SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), - BasePtr, - DAG.getConstant(Offset, dl, - BasePtr.getValueType())); + SDValue NewBasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Offset); SDValue EOp = DAG.getNode( ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp index a21b4c7332540..379f0dcef513d 100644 --- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp +++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -22,12 +22,12 @@ #include "llvm/CodeGen/ResourcePriorityQueue.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; diff --git a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h index 237d541b4cb97..cf92907a8b5f9 100644 --- a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h +++ b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h @@ -20,32 +20,31 @@ namespace llvm { -class MDNode; +class DIVariable; +class DIExpression; class SDNode; class Value; -/// SDDbgValue - Holds the information from a dbg_value node through SDISel. +/// Holds the information from a dbg_value node through SDISel. /// We do not use SDValue here to avoid including its header. - class SDDbgValue { public: enum DbgValueKind { - SDNODE = 0, // value is the result of an expression - CONST = 1, // value is a constant - FRAMEIX = 2 // value is contents of a stack location + SDNODE = 0, ///< Value is the result of an expression. + CONST = 1, ///< Value is a constant. + FRAMEIX = 2 ///< Value is contents of a stack location. }; private: union { struct { - SDNode *Node; // valid for expressions - unsigned ResNo; // valid for expressions + SDNode *Node; ///< Valid for expressions. + unsigned ResNo; ///< Valid for expressions. } s; - const Value *Const; // valid for constants - unsigned FrameIx; // valid for stack objects + const Value *Const; ///< Valid for constants. + unsigned FrameIx; ///< Valid for stack objects. } u; - MDNode *Var; - MDNode *Expr; - uint64_t Offset; + DIVariable *Var; + DIExpression *Expr; DebugLoc DL; unsigned Order; enum DbgValueKind kind; @@ -53,71 +52,65 @@ private: bool Invalid = false; public: - // Constructor for non-constants. - SDDbgValue(MDNode *Var, MDNode *Expr, SDNode *N, unsigned R, bool indir, - uint64_t off, DebugLoc dl, unsigned O) - : Var(Var), Expr(Expr), Offset(off), DL(std::move(dl)), Order(O), - IsIndirect(indir) { + /// Constructor for non-constants. + SDDbgValue(DIVariable *Var, DIExpression *Expr, SDNode *N, unsigned R, + bool indir, DebugLoc dl, unsigned O) + : Var(Var), Expr(Expr), DL(std::move(dl)), Order(O), IsIndirect(indir) { kind = SDNODE; u.s.Node = N; u.s.ResNo = R; } - // Constructor for constants. - SDDbgValue(MDNode *Var, MDNode *Expr, const Value *C, uint64_t off, - DebugLoc dl, unsigned O) - : Var(Var), Expr(Expr), Offset(off), DL(std::move(dl)), Order(O), - IsIndirect(false) { + /// Constructor for constants. + SDDbgValue(DIVariable *Var, DIExpression *Expr, const Value *C, DebugLoc dl, + unsigned O) + : Var(Var), Expr(Expr), DL(std::move(dl)), Order(O), IsIndirect(false) { kind = CONST; u.Const = C; } - // Constructor for frame indices. - SDDbgValue(MDNode *Var, MDNode *Expr, unsigned FI, uint64_t off, DebugLoc dl, + /// Constructor for frame indices. + SDDbgValue(DIVariable *Var, DIExpression *Expr, unsigned FI, DebugLoc dl, unsigned O) - : Var(Var), Expr(Expr), Offset(off), DL(std::move(dl)), Order(O), - IsIndirect(false) { + : Var(Var), Expr(Expr), DL(std::move(dl)), Order(O), IsIndirect(false) { kind = FRAMEIX; u.FrameIx = FI; } - // Returns the kind. + /// Returns the kind. DbgValueKind getKind() const { return kind; } - // Returns the MDNode pointer for the variable. - MDNode *getVariable() const { return Var; } + /// Returns the DIVariable pointer for the variable. + DIVariable *getVariable() const { return Var; } - // Returns the MDNode pointer for the expression. - MDNode *getExpression() const { return Expr; } + /// Returns the DIExpression pointer for the expression. + DIExpression *getExpression() const { return Expr; } - // Returns the SDNode* for a register ref + /// Returns the SDNode* for a register ref SDNode *getSDNode() const { assert (kind==SDNODE); return u.s.Node; } - // Returns the ResNo for a register ref + /// Returns the ResNo for a register ref unsigned getResNo() const { assert (kind==SDNODE); return u.s.ResNo; } - // Returns the Value* for a constant + /// Returns the Value* for a constant const Value *getConst() const { assert (kind==CONST); return u.Const; } - // Returns the FrameIx for a stack object + /// Returns the FrameIx for a stack object unsigned getFrameIx() const { assert (kind==FRAMEIX); return u.FrameIx; } - // Returns whether this is an indirect value. + /// Returns whether this is an indirect value. bool isIndirect() const { return IsIndirect; } - // Returns the offset. - uint64_t getOffset() const { return Offset; } - - // Returns the DebugLoc. + /// Returns the DebugLoc. DebugLoc getDebugLoc() const { return DL; } - // Returns the SDNodeOrder. This is the order of the preceding node in the - // input. + /// Returns the SDNodeOrder. This is the order of the preceding node in the + /// input. unsigned getOrder() const { return Order; } - // setIsInvalidated / isInvalidated - Setter / getter of the "Invalidated" - // property. A SDDbgValue is invalid if the SDNode that produces the value is - // deleted. + /// setIsInvalidated / isInvalidated - Setter / getter of the "Invalidated" + /// property. A SDDbgValue is invalid if the SDNode that produces the value is + /// deleted. void setIsInvalidated() { Invalid = true; } bool isInvalidated() const { return Invalid; } }; diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 1379940932772..698e14453d1d7 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -18,13 +18,13 @@ #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; #define DEBUG_TYPE "pre-RA-sched" diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 70b1fa77a0991..49f304c8cc869 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -1,4 +1,4 @@ -//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===// +//===- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler ------===// // // The LLVM Compiler Infrastructure // @@ -16,23 +16,47 @@ //===----------------------------------------------------------------------===// #include "ScheduleDAGSDNodes.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineValueType.h" +#include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/IR/DataLayout.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/InlineAsm.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CodeGen.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" -#include <climits> +#include <algorithm> +#include <cassert> +#include <cstdint> +#include <cstdlib> +#include <iterator> +#include <limits> +#include <memory> +#include <utility> +#include <vector> + using namespace llvm; #define DEBUG_TYPE "pre-RA-sched" @@ -46,6 +70,7 @@ static RegisterScheduler burrListDAGScheduler("list-burr", "Bottom-up register reduction list scheduling", createBURRListDAGScheduler); + static RegisterScheduler sourceListDAGScheduler("source", "Similar to list-burr but schedules in source " @@ -105,6 +130,7 @@ static cl::opt<unsigned> AvgIPC( cl::desc("Average inst/cycle whan no target itinerary exists.")); namespace { + //===----------------------------------------------------------------------===// /// ScheduleDAGRRList - The actual register reduction list scheduler /// implementation. This supports both top-down and bottom-up scheduling. @@ -112,7 +138,6 @@ namespace { class ScheduleDAGRRList : public ScheduleDAGSDNodes { private: /// NeedLatency - True if the scheduler will make use of latency information. - /// bool NeedLatency; /// AvailableQueue - The priority queue to use for the available SUnits. @@ -122,13 +147,13 @@ private: /// been issued, but their results are not ready yet (due to the latency of /// the operation). Once the operands becomes available, the instruction is /// added to the AvailableQueue. - std::vector<SUnit*> PendingQueue; + std::vector<SUnit *> PendingQueue; /// HazardRec - The hazard recognizer to use. ScheduleHazardRecognizer *HazardRec; /// CurCycle - The current scheduler state corresponds to this cycle. - unsigned CurCycle; + unsigned CurCycle = 0; /// MinAvailableCycle - Cycle of the soonest available instruction. unsigned MinAvailableCycle; @@ -147,7 +172,9 @@ private: // Collect interferences between physical register use/defs. // Each interference is an SUnit and set of physical registers. SmallVector<SUnit*, 4> Interferences; - typedef DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMapT; + + using LRegsMapT = DenseMap<SUnit *, SmallVector<unsigned, 4>>; + LRegsMapT LRegsMap; /// Topo - A topological ordering for SUnits which permits fast IsReachable @@ -163,9 +190,8 @@ public: SchedulingPriorityQueue *availqueue, CodeGenOpt::Level OptLevel) : ScheduleDAGSDNodes(mf), - NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0), + NeedLatency(needlatency), AvailableQueue(availqueue), Topo(SUnits, nullptr) { - const TargetSubtargetInfo &STI = mf.getSubtarget(); if (DisableSchedCycles || !NeedLatency) HazardRec = new ScheduleHazardRecognizer(); @@ -267,6 +293,7 @@ private: return !NeedLatency; } }; + } // end anonymous namespace /// GetCostForDef - Looks up the register class and cost for a given definition. @@ -319,13 +346,13 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, /// Schedule - Schedule the DAG using list scheduling. void ScheduleDAGRRList::Schedule() { - DEBUG(dbgs() - << "********** List Scheduling BB#" << BB->getNumber() - << " '" << BB->getName() << "' **********\n"); + DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB) + << " '" << BB->getName() << "' **********\n"); CurCycle = 0; IssueCount = 0; - MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX; + MinAvailableCycle = + DisableSchedCycles ? 0 : std::numeric_limits<unsigned>::max(); NumLiveRegs = 0; // Allocate slots for each physical register, plus one for a special register // to track the virtual resource of a calling sequence. @@ -409,7 +436,7 @@ static bool IsChainDependent(SDNode *Outer, SDNode *Inner, unsigned NestLevel, const TargetInstrInfo *TII) { SDNode *N = Outer; - for (;;) { + while (true) { if (N == Inner) return true; // For a TokenFactor, examine each operand. There may be multiple ways @@ -456,7 +483,7 @@ static bool IsChainDependent(SDNode *Outer, SDNode *Inner, static SDNode * FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, const TargetInstrInfo *TII) { - for (;;) { + while (true) { // For a TokenFactor, examine each operand. There may be multiple ways // to get to the CALLSEQ_BEGIN, but we need to find the path with the // most nesting in order to ensure that we find the corresponding match. @@ -550,6 +577,7 @@ void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) { unsigned NestLevel = 0; unsigned MaxNest = 0; SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII); + assert(N && "Must find call sequence start"); SUnit *Def = &SUnits[N->getNodeId()]; CallSeqEndForStart[Def] = SU; @@ -571,7 +599,7 @@ void ScheduleDAGRRList::ReleasePending() { // If the available queue is empty, it is safe to reset MinAvailableCycle. if (AvailableQueue->empty()) - MinAvailableCycle = UINT_MAX; + MinAvailableCycle = std::numeric_limits<unsigned>::max(); // Check to see if any of the pending instructions are ready to issue. If // so, add them to the available queue. @@ -791,7 +819,8 @@ void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { AvailableQueue->remove(PredSU); } - assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!"); + assert(PredSU->NumSuccsLeft < std::numeric_limits<unsigned>::max() && + "NumSuccsLeft will overflow!"); ++PredSU->NumSuccsLeft; } @@ -821,9 +850,13 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { SUNode = SUNode->getGluedNode()) { if (SUNode->isMachineOpcode() && SUNode->getMachineOpcode() == TII->getCallFrameSetupOpcode()) { + SUnit *SeqEnd = CallSeqEndForStart[SU]; + assert(SeqEnd && "Call sequence start/end must be known"); + assert(!LiveRegDefs[CallResource]); + assert(!LiveRegGens[CallResource]); ++NumLiveRegs; LiveRegDefs[CallResource] = SU; - LiveRegGens[CallResource] = CallSeqEndForStart[SU]; + LiveRegGens[CallResource] = SeqEnd; } } @@ -835,6 +868,8 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { if (SUNode->isMachineOpcode() && SUNode->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) { assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); + assert(LiveRegDefs[CallResource]); + assert(LiveRegGens[CallResource]); --NumLiveRegs; LiveRegDefs[CallResource] = nullptr; LiveRegGens[CallResource] = nullptr; @@ -891,7 +926,7 @@ void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() { if (LookAhead == 0) return; - std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead); + std::vector<SUnit *>::const_iterator I = (Sequence.end() - LookAhead); unsigned HazardCycle = (*I)->getHeight(); for (auto E = Sequence.end(); I != E; ++I) { SUnit *SU = *I; @@ -1319,8 +1354,7 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) { // If we're in the middle of scheduling a call, don't begin scheduling // another call. Also, don't allow any physical registers to be live across // the call. - if ((Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) || - (Node->getMachineOpcode() == TII->getCallFrameSetupOpcode())) { + if (Node->getMachineOpcode() == TII->getCallFrameDestroyOpcode()) { // Check the special calling-sequence resource. unsigned CallResource = TRI->getNumRegs(); if (LiveRegDefs[CallResource]) { @@ -1390,27 +1424,32 @@ void ScheduleDAGRRList::releaseInterferences(unsigned Reg) { /// (3) No Interferences: may unschedule to break register interferences. SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { SUnit *CurSU = AvailableQueue->empty() ? nullptr : AvailableQueue->pop(); - while (CurSU) { - SmallVector<unsigned, 4> LRegs; - if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) - break; - DEBUG(dbgs() << " Interfering reg " << - (LRegs[0] == TRI->getNumRegs() ? "CallResource" - : TRI->getName(LRegs[0])) - << " SU #" << CurSU->NodeNum << '\n'); - std::pair<LRegsMapT::iterator, bool> LRegsPair = - LRegsMap.insert(std::make_pair(CurSU, LRegs)); - if (LRegsPair.second) { - CurSU->isPending = true; // This SU is not in AvailableQueue right now. - Interferences.push_back(CurSU); - } - else { - assert(CurSU->isPending && "Interferences are pending"); - // Update the interference with current live regs. - LRegsPair.first->second = LRegs; + auto FindAvailableNode = [&]() { + while (CurSU) { + SmallVector<unsigned, 4> LRegs; + if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) + break; + DEBUG(dbgs() << " Interfering reg "; + if (LRegs[0] == TRI->getNumRegs()) + dbgs() << "CallResource"; + else + dbgs() << printReg(LRegs[0], TRI); + dbgs() << " SU #" << CurSU->NodeNum << '\n'); + std::pair<LRegsMapT::iterator, bool> LRegsPair = + LRegsMap.insert(std::make_pair(CurSU, LRegs)); + if (LRegsPair.second) { + CurSU->isPending = true; // This SU is not in AvailableQueue right now. + Interferences.push_back(CurSU); + } + else { + assert(CurSU->isPending && "Interferences are pending"); + // Update the interference with current live regs. + LRegsPair.first->second = LRegs; + } + CurSU = AvailableQueue->pop(); } - CurSU = AvailableQueue->pop(); - } + }; + FindAvailableNode(); if (CurSU) return CurSU; @@ -1423,7 +1462,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { // Try unscheduling up to the point where it's safe to schedule // this node. SUnit *BtSU = nullptr; - unsigned LiveCycle = UINT_MAX; + unsigned LiveCycle = std::numeric_limits<unsigned>::max(); for (unsigned Reg : LRegs) { if (LiveRegGens[Reg]->getHeight() < LiveCycle) { BtSU = LiveRegGens[Reg]; @@ -1447,13 +1486,16 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { // If one or more successors has been unscheduled, then the current // node is no longer available. - if (!TrySU->isAvailable || !TrySU->NodeQueueId) + if (!TrySU->isAvailable || !TrySU->NodeQueueId) { + DEBUG(dbgs() << "TrySU not available; choosing node from queue\n"); CurSU = AvailableQueue->pop(); - else { + } else { + DEBUG(dbgs() << "TrySU available\n"); // Available and in AvailableQueue AvailableQueue->remove(TrySU); CurSU = TrySU; } + FindAvailableNode(); // Interferences has been mutated. We must break. break; } @@ -1540,7 +1582,8 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { while (AvailableQueue->empty() && !PendingQueue.empty()) { // Advance the cycle to free resources. Skip ahead to the next ready SU. - assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized"); + assert(MinAvailableCycle < std::numeric_limits<unsigned>::max() && + "MinAvailableCycle uninitialized"); AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle)); } } @@ -1553,17 +1596,11 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { #endif } -//===----------------------------------------------------------------------===// -// RegReductionPriorityQueue Definition -//===----------------------------------------------------------------------===// -// -// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers -// to reduce register pressure. -// namespace { + class RegReductionPQBase; -struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> { +struct queue_sort { bool isReady(SUnit* SU, unsigned CurCycle) const { return true; } }; @@ -1571,6 +1608,7 @@ struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> { template<class SF> struct reverse_sort : public queue_sort { SF &SortFunc; + reverse_sort(SF &sf) : SortFunc(sf) {} bool operator()(SUnit* left, SUnit* right) const { @@ -1590,6 +1628,7 @@ struct bu_ls_rr_sort : public queue_sort { }; RegReductionPQBase *SPQ; + bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} bool operator()(SUnit* left, SUnit* right) const; @@ -1603,8 +1642,8 @@ struct src_ls_rr_sort : public queue_sort { }; RegReductionPQBase *SPQ; - src_ls_rr_sort(RegReductionPQBase *spq) - : SPQ(spq) {} + + src_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} bool operator()(SUnit* left, SUnit* right) const; }; @@ -1617,8 +1656,8 @@ struct hybrid_ls_rr_sort : public queue_sort { }; RegReductionPQBase *SPQ; - hybrid_ls_rr_sort(RegReductionPQBase *spq) - : SPQ(spq) {} + + hybrid_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} bool isReady(SUnit *SU, unsigned CurCycle) const; @@ -1634,8 +1673,8 @@ struct ilp_ls_rr_sort : public queue_sort { }; RegReductionPQBase *SPQ; - ilp_ls_rr_sort(RegReductionPQBase *spq) - : SPQ(spq) {} + + ilp_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} bool isReady(SUnit *SU, unsigned CurCycle) const; @@ -1644,8 +1683,8 @@ struct ilp_ls_rr_sort : public queue_sort { class RegReductionPQBase : public SchedulingPriorityQueue { protected: - std::vector<SUnit*> Queue; - unsigned CurQueueId; + std::vector<SUnit *> Queue; + unsigned CurQueueId = 0; bool TracksRegPressure; bool SrcOrder; @@ -1656,13 +1695,12 @@ protected: const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; const TargetLowering *TLI; - ScheduleDAGRRList *scheduleDAG; + ScheduleDAGRRList *scheduleDAG = nullptr; // SethiUllmanNumbers - The SethiUllman number for each node. std::vector<unsigned> SethiUllmanNumbers; /// RegPressure - Tracking current reg pressure per register class. - /// std::vector<unsigned> RegPressure; /// RegLimit - Tracking the number of allocatable registers per register @@ -1677,9 +1715,8 @@ public: const TargetInstrInfo *tii, const TargetRegisterInfo *tri, const TargetLowering *tli) - : SchedulingPriorityQueue(hasReadyFilter), - CurQueueId(0), TracksRegPressure(tracksrp), SrcOrder(srcorder), - MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(nullptr) { + : SchedulingPriorityQueue(hasReadyFilter), TracksRegPressure(tracksrp), + SrcOrder(srcorder), MF(mf), TII(tii), TRI(tri), TLI(tli) { if (TracksRegPressure) { unsigned NumRC = TRI->getNumRegClasses(); RegLimit.resize(NumRC); @@ -1730,7 +1767,7 @@ public: void remove(SUnit *SU) override { assert(!Queue.empty() && "Queue is empty!"); assert(SU->NodeQueueId != 0 && "Not in queue!"); - std::vector<SUnit *>::iterator I = find(Queue, SU); + std::vector<SUnit *>::iterator I = llvm::find(Queue, SU); if (I != std::prev(Queue.end())) std::swap(*I, Queue.back()); Queue.pop_back(); @@ -1759,7 +1796,7 @@ protected: }; template<class SF> -static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) { +static SUnit *popFromQueueImpl(std::vector<SUnit *> &Q, SF &Picker) { std::vector<SUnit *>::iterator Best = Q.begin(); for (auto I = std::next(Q.begin()), E = Q.end(); I != E; ++I) if (Picker(*Best, *I)) @@ -1772,7 +1809,7 @@ static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) { } template<class SF> -SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) { +SUnit *popFromQueue(std::vector<SUnit *> &Q, SF &Picker, ScheduleDAG *DAG) { #ifndef NDEBUG if (DAG->StressSched) { reverse_sort<SF> RPicker(Picker); @@ -1783,6 +1820,13 @@ SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) { return popFromQueueImpl(Q, Picker); } +//===----------------------------------------------------------------------===// +// RegReductionPriorityQueue Definition +//===----------------------------------------------------------------------===// +// +// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers +// to reduce register pressure. +// template<class SF> class RegReductionPriorityQueue : public RegReductionPQBase { SF Picker; @@ -1815,7 +1859,7 @@ public: #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void dump(ScheduleDAG *DAG) const override { // Emulate pop() without clobbering NodeQueueIds. - std::vector<SUnit*> DumpQueue = Queue; + std::vector<SUnit *> DumpQueue = Queue; SF DumpPicker = Picker; while (!DumpQueue.empty()) { SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG); @@ -1826,17 +1870,11 @@ public: #endif }; -typedef RegReductionPriorityQueue<bu_ls_rr_sort> -BURegReductionPriorityQueue; - -typedef RegReductionPriorityQueue<src_ls_rr_sort> -SrcRegReductionPriorityQueue; +using BURegReductionPriorityQueue = RegReductionPriorityQueue<bu_ls_rr_sort>; +using SrcRegReductionPriorityQueue = RegReductionPriorityQueue<src_ls_rr_sort>; +using HybridBURRPriorityQueue = RegReductionPriorityQueue<hybrid_ls_rr_sort>; +using ILPBURRPriorityQueue = RegReductionPriorityQueue<ilp_ls_rr_sort>; -typedef RegReductionPriorityQueue<hybrid_ls_rr_sort> -HybridBURRPriorityQueue; - -typedef RegReductionPriorityQueue<ilp_ls_rr_sort> -ILPBURRPriorityQueue; } // end anonymous namespace //===----------------------------------------------------------------------===// @@ -2855,7 +2893,6 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, /// This results in the store being scheduled immediately /// after N, which shortens the U->N live range, reducing /// register pressure. -/// void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { // Visit all the nodes in topological order, working top-down. for (SUnit &SU : *SUnits) { @@ -3022,7 +3059,7 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() { // Public Constructor Functions //===----------------------------------------------------------------------===// -llvm::ScheduleDAGSDNodes * +ScheduleDAGSDNodes * llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel) { const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); @@ -3036,7 +3073,7 @@ llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, return SD; } -llvm::ScheduleDAGSDNodes * +ScheduleDAGSDNodes * llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel) { const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); @@ -3050,7 +3087,7 @@ llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, return SD; } -llvm::ScheduleDAGSDNodes * +ScheduleDAGSDNodes * llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel) { const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); @@ -3066,7 +3103,7 @@ llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, return SD; } -llvm::ScheduleDAGSDNodes * +ScheduleDAGSDNodes * llvm::createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel) { const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 3c8526ebb7029..c09b47af26a66 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -23,14 +23,14 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "pre-RA-sched" @@ -709,18 +709,17 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, // source order number as N. MachineBasicBlock *BB = Emitter.getBlock(); MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos(); - ArrayRef<SDDbgValue*> DVs = DAG->GetDbgValues(N); - for (unsigned i = 0, e = DVs.size(); i != e; ++i) { - if (DVs[i]->isInvalidated()) + for (auto DV : DAG->GetDbgValues(N)) { + if (DV->isInvalidated()) continue; - unsigned DVOrder = DVs[i]->getOrder(); + unsigned DVOrder = DV->getOrder(); if (!Order || DVOrder == Order) { - MachineInstr *DbgMI = Emitter.EmitDbgValue(DVs[i], VRBaseMap); + MachineInstr *DbgMI = Emitter.EmitDbgValue(DV, VRBaseMap); if (DbgMI) { - Orders.push_back(std::make_pair(DVOrder, DbgMI)); + Orders.push_back({DVOrder, DbgMI}); BB->insert(InsertPos, DbgMI); } - DVs[i]->setIsInvalidated(); + DV->setIsInvalidated(); } } } @@ -742,16 +741,17 @@ ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, } MachineBasicBlock *BB = Emitter.getBlock(); - if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI() || + auto IP = Emitter.getInsertPos(); + if (IP == BB->begin() || BB->back().isPHI() || // Fast-isel may have inserted some instructions, in which case the // BB->back().isPHI() test will not fire when we want it to. - std::prev(Emitter.getInsertPos())->isPHI()) { + std::prev(IP)->isPHI()) { // Did not insert any instruction. - Orders.push_back(std::make_pair(Order, (MachineInstr*)nullptr)); + Orders.push_back({Order, (MachineInstr *)nullptr}); return; } - Orders.push_back(std::make_pair(Order, &*std::prev(Emitter.getInsertPos()))); + Orders.push_back({Order, &*std::prev(IP)}); ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order); } @@ -856,8 +856,13 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { MachineBasicBlock::iterator BBBegin = BB->getFirstNonPHI(); // Sort the source order instructions and use the order to insert debug - // values. - std::sort(Orders.begin(), Orders.end(), less_first()); + // values. Use stable_sort so that DBG_VALUEs are inserted in the same order + // regardless of the host's implementation fo std::sort. + std::stable_sort(Orders.begin(), Orders.end(), less_first()); + std::stable_sort(DAG->DbgBegin(), DAG->DbgEnd(), + [](const SDDbgValue *LHS, const SDDbgValue *RHS) { + return LHS->getOrder() < RHS->getOrder(); + }); SDDbgInfo::DbgIterator DI = DAG->DbgBegin(); SDDbgInfo::DbgIterator DE = DAG->DbgEnd(); @@ -869,10 +874,12 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { // Insert all SDDbgValue's whose order(s) are before "Order". if (!MI) continue; - for (; DI != DE && - (*DI)->getOrder() >= LastOrder && (*DI)->getOrder() < Order; ++DI) { + for (; DI != DE; ++DI) { + if ((*DI)->getOrder() < LastOrder || (*DI)->getOrder() >= Order) + break; if ((*DI)->isInvalidated()) continue; + MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap); if (DbgMI) { if (!LastOrder) @@ -891,11 +898,13 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { // Add trailing DbgValue's before the terminator. FIXME: May want to add // some of them before one or more conditional branches? SmallVector<MachineInstr*, 8> DbgMIs; - while (DI != DE) { - if (!(*DI)->isInvalidated()) - if (MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap)) - DbgMIs.push_back(DbgMI); - ++DI; + for (; DI != DE; ++DI) { + if ((*DI)->isInvalidated()) + continue; + assert((*DI)->getOrder() >= LastOrder && + "emitting DBG_VALUE out of order"); + if (MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap)) + DbgMIs.push_back(DbgMI); } MachineBasicBlock *InsertBB = Emitter.getBlock(); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp index 631cb34717c4f..07b46b9183ab7 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -25,13 +25,13 @@ #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <climits> using namespace llvm; @@ -93,9 +93,8 @@ private: /// Schedule - Schedule the DAG using list scheduling. void ScheduleDAGVLIW::Schedule() { - DEBUG(dbgs() - << "********** List Scheduling BB#" << BB->getNumber() - << " '" << BB->getName() << "' **********\n"); + DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB) + << " '" << BB->getName() << "' **********\n"); // Build the scheduling graph. BuildSchedGraph(AA); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 16f425dc7969a..12a21e74079ec 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -37,6 +37,9 @@ #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" @@ -59,11 +62,8 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/Mutex.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -87,6 +87,15 @@ static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) { void SelectionDAG::DAGUpdateListener::NodeDeleted(SDNode*, SDNode*) {} void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {} +#define DEBUG_TYPE "selectiondag" + +static void NewSDValueDbgMsg(SDValue V, StringRef Msg, SelectionDAG *G) { + DEBUG( + dbgs() << Msg; + V.getNode()->dump(G); + ); +} + //===----------------------------------------------------------------------===// // ConstantFPSDNode Class //===----------------------------------------------------------------------===// @@ -116,8 +125,7 @@ bool ConstantFPSDNode::isValueValidForType(EVT VT, // ISD Namespace //===----------------------------------------------------------------------===// -bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal, - bool AllowShrink) { +bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal) { auto *BV = dyn_cast<BuildVectorSDNode>(N); if (!BV) return false; @@ -126,10 +134,9 @@ bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal, unsigned SplatBitSize; bool HasUndefs; unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits(); - unsigned MinSplatBits = AllowShrink ? 0 : EltSize; return BV->isConstantSplat(SplatVal, SplatUndef, SplatBitSize, HasUndefs, - MinSplatBits) && - EltSize >= SplatBitSize; + EltSize) && + EltSize == SplatBitSize; } // FIXME: AllOnes and AllZeros duplicate a lot of code. Could these be @@ -895,12 +902,14 @@ SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) } void SelectionDAG::init(MachineFunction &NewMF, - OptimizationRemarkEmitter &NewORE) { + OptimizationRemarkEmitter &NewORE, + Pass *PassPtr) { MF = &NewMF; + SDAGISelPass = PassPtr; ORE = &NewORE; TLI = getSubtarget().getTargetLowering(); TSI = getSubtarget().getSelectionDAGInfo(); - Context = &MF->getFunction()->getContext(); + Context = &MF->getFunction().getContext(); } SelectionDAG::~SelectionDAG() { @@ -1018,7 +1027,7 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) { assert(!VT.isVector() && "getZeroExtendInReg should use the vector element type instead of " "the vector type!"); - if (Op.getValueType() == VT) return Op; + if (Op.getValueType().getScalarType() == VT) return Op; unsigned BitWidth = Op.getScalarValueSizeInBits(); APInt Imm = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits()); @@ -1156,7 +1165,9 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL, SmallVector<SDValue, 8> Ops; for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) Ops.insert(Ops.end(), EltParts.begin(), EltParts.end()); - return getNode(ISD::BITCAST, DL, VT, getBuildVector(ViaVecVT, DL, Ops)); + + SDValue V = getNode(ISD::BITCAST, DL, VT, getBuildVector(ViaVecVT, DL, Ops)); + return V; } assert(Elt->getBitWidth() == EltVT.getSizeInBits() && @@ -1176,11 +1187,13 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL, N = newSDNode<ConstantSDNode>(isT, isO, Elt, DL.getDebugLoc(), EltVT); CSEMap.InsertNode(N, IP); InsertNode(N); + NewSDValueDbgMsg(SDValue(N, 0), "Creating constant: ", this); } SDValue Result(N, 0); if (VT.isVector()) Result = getSplatBuildVector(VT, DL, Result); + return Result; } @@ -1222,6 +1235,7 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP &V, const SDLoc &DL, SDValue Result(N, 0); if (VT.isVector()) Result = getSplatBuildVector(VT, DL, Result); + NewSDValueDbgMsg(Result, "Creating fp constant: ", this); return Result; } @@ -1317,7 +1331,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); if (Alignment == 0) - Alignment = MF->getFunction()->optForSize() + Alignment = MF->getFunction().optForSize() ? getDataLayout().getABITypeAlignment(C->getType()) : getDataLayout().getPrefTypeAlignment(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; @@ -1471,7 +1485,8 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, // Validate that all indices in Mask are within the range of the elements // input to the shuffle. int NElts = Mask.size(); - assert(llvm::all_of(Mask, [&](int M) { return M < (NElts * 2); }) && + assert(llvm::all_of(Mask, + [&](int M) { return M < (NElts * 2) && M >= -1; }) && "Index out of range"); // Copy the mask so we can do any needed cleanup. @@ -1622,7 +1637,9 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, CSEMap.InsertNode(N, IP); InsertNode(N); - return SDValue(N, 0); + SDValue V = SDValue(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; } SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) { @@ -1665,15 +1682,20 @@ SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) { SDValue SelectionDAG::getEHLabel(const SDLoc &dl, SDValue Root, MCSymbol *Label) { + return getLabelNode(ISD::EH_LABEL, dl, Root, Label); +} + +SDValue SelectionDAG::getLabelNode(unsigned Opcode, const SDLoc &dl, + SDValue Root, MCSymbol *Label) { FoldingSetNodeID ID; SDValue Ops[] = { Root }; - AddNodeIDNode(ID, ISD::EH_LABEL, getVTList(MVT::Other), Ops); + AddNodeIDNode(ID, Opcode, getVTList(MVT::Other), Ops); ID.AddPointer(Label); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - auto *N = newSDNode<EHLabelSDNode>(dl.getIROrder(), dl.getDebugLoc(), Label); + auto *N = newSDNode<LabelSDNode>(dl.getIROrder(), dl.getDebugLoc(), Label); createOperands(N, Ops); CSEMap.InsertNode(N, IP); @@ -1955,6 +1977,69 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2, return SDValue(); } +/// See if the specified operand can be simplified with the knowledge that only +/// the bits specified by Mask are used. +SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &Mask) { + switch (V.getOpcode()) { + default: + break; + case ISD::Constant: { + const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode()); + assert(CV && "Const value should be ConstSDNode."); + const APInt &CVal = CV->getAPIntValue(); + APInt NewVal = CVal & Mask; + if (NewVal != CVal) + return getConstant(NewVal, SDLoc(V), V.getValueType()); + break; + } + case ISD::OR: + case ISD::XOR: + // If the LHS or RHS don't contribute bits to the or, drop them. + if (MaskedValueIsZero(V.getOperand(0), Mask)) + return V.getOperand(1); + if (MaskedValueIsZero(V.getOperand(1), Mask)) + return V.getOperand(0); + break; + case ISD::SRL: + // Only look at single-use SRLs. + if (!V.getNode()->hasOneUse()) + break; + if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) { + // See if we can recursively simplify the LHS. + unsigned Amt = RHSC->getZExtValue(); + + // Watch out for shift count overflow though. + if (Amt >= Mask.getBitWidth()) + break; + APInt NewMask = Mask << Amt; + if (SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask)) + return getNode(ISD::SRL, SDLoc(V), V.getValueType(), SimplifyLHS, + V.getOperand(1)); + } + break; + case ISD::AND: { + // X & -1 -> X (ignoring bits which aren't demanded). + ConstantSDNode *AndVal = isConstOrConstSplat(V.getOperand(1)); + if (AndVal && Mask.isSubsetOf(AndVal->getAPIntValue())) + return V.getOperand(0); + break; + } + case ISD::ANY_EXTEND: { + SDValue Src = V.getOperand(0); + unsigned SrcBitWidth = Src.getScalarValueSizeInBits(); + // Being conservative here - only peek through if we only demand bits in the + // non-extended source (even though the extended bits are technically undef). + if (Mask.getActiveBits() > SrcBitWidth) + break; + APInt SrcMask = Mask.trunc(SrcBitWidth); + if (SDValue DemandedSrc = GetDemandedBits(Src, SrcMask)) + return getNode(ISD::ANY_EXTEND, SDLoc(V), V.getValueType(), DemandedSrc); + break; + } + } + return SDValue(); +} + /// SignBitIsZero - Return true if the sign bit of Op is known to be zero. We /// use this predicate to simplify operations downstream. bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const { @@ -1972,6 +2057,30 @@ bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask, return Mask.isSubsetOf(Known.Zero); } +/// Helper function that checks to see if a node is a constant or a +/// build vector of splat constants at least within the demanded elts. +static ConstantSDNode *isConstOrDemandedConstSplat(SDValue N, + const APInt &DemandedElts) { + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) + return CN; + if (N.getOpcode() != ISD::BUILD_VECTOR) + return nullptr; + EVT VT = N.getValueType(); + ConstantSDNode *Cst = nullptr; + unsigned NumElts = VT.getVectorNumElements(); + assert(DemandedElts.getBitWidth() == NumElts && "Unexpected vector size"); + for (unsigned i = 0; i != NumElts; ++i) { + if (!DemandedElts[i]) + continue; + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(i)); + if (!C || (Cst && Cst->getAPIntValue() != C->getAPIntValue()) || + C->getValueType(0) != VT.getScalarType()) + return nullptr; + Cst = C; + } + return Cst; +} + /// If a SHL/SRA/SRL node has a constant or splat constant shift amount that /// is less than the element bit-width of the shift node, return it. static const APInt *getValidShiftAmountConstant(SDValue V) { @@ -2005,6 +2114,20 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, unsigned BitWidth = Op.getScalarValueSizeInBits(); Known = KnownBits(BitWidth); // Don't know anything. + + if (auto *C = dyn_cast<ConstantSDNode>(Op)) { + // We know all of the bits for a constant! + Known.One = C->getAPIntValue(); + Known.Zero = ~Known.One; + return; + } + if (auto *C = dyn_cast<ConstantFPSDNode>(Op)) { + // We know all of the bits for a constant fp! + Known.One = C->getValueAPF().bitcastToAPInt(); + Known.Zero = ~Known.One; + return; + } + if (Depth == 6) return; // Limit search depth. @@ -2016,11 +2139,6 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, unsigned Opcode = Op.getOpcode(); switch (Opcode) { - case ISD::Constant: - // We know all of the bits for a constant! - Known.One = cast<ConstantSDNode>(Op)->getAPIntValue(); - Known.Zero = ~Known.One; - break; case ISD::BUILD_VECTOR: // Collect the known bits that are shared by every demanded vector element. assert(NumElts == Op.getValueType().getVectorNumElements() && @@ -2045,7 +2163,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, Known.Zero &= Known2.Zero; // If we don't know any bits, early out. - if (!Known.One && !Known.Zero) + if (Known.isUnknown()) break; } break; @@ -2083,7 +2201,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, Known.Zero &= Known2.Zero; } // If we don't know any bits, early out. - if (!Known.One && !Known.Zero) + if (Known.isUnknown()) break; if (!!DemandedRHS) { SDValue RHS = Op.getOperand(1); @@ -2109,11 +2227,45 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, Known.Zero &= Known2.Zero; } // If we don't know any bits, early out. - if (!Known.One && !Known.Zero) + if (Known.isUnknown()) break; } break; } + case ISD::INSERT_SUBVECTOR: { + // If we know the element index, demand any elements from the subvector and + // the remainder from the src its inserted into, otherwise demand them all. + SDValue Src = Op.getOperand(0); + SDValue Sub = Op.getOperand(1); + ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2)); + unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); + if (SubIdx && SubIdx->getAPIntValue().ule(NumElts - NumSubElts)) { + Known.One.setAllBits(); + Known.Zero.setAllBits(); + uint64_t Idx = SubIdx->getZExtValue(); + APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); + if (!!DemandedSubElts) { + computeKnownBits(Sub, Known, DemandedSubElts, Depth + 1); + if (Known.isUnknown()) + break; // early-out. + } + APInt SubMask = APInt::getBitsSet(NumElts, Idx, Idx + NumSubElts); + APInt DemandedSrcElts = DemandedElts & ~SubMask; + if (!!DemandedSrcElts) { + computeKnownBits(Src, Known2, DemandedSrcElts, Depth + 1); + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; + } + } else { + computeKnownBits(Sub, Known, Depth + 1); + if (Known.isUnknown()) + break; // early-out. + computeKnownBits(Src, Known2, Depth + 1); + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; + } + break; + } case ISD::EXTRACT_SUBVECTOR: { // If we know the element index, just demand that subvector elements, // otherwise demand them all. @@ -2132,10 +2284,11 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, } case ISD::BITCAST: { SDValue N0 = Op.getOperand(0); - unsigned SubBitWidth = N0.getScalarValueSizeInBits(); + EVT SubVT = N0.getValueType(); + unsigned SubBitWidth = SubVT.getScalarSizeInBits(); - // Ignore bitcasts from floating point. - if (!N0.getValueType().isInteger()) + // Ignore bitcasts from unsupported types. + if (!(SubVT.isInteger() || SubVT.isFloatingPoint())) break; // Fast handling of 'identity' bitcasts. @@ -2193,7 +2346,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, Known.One &= Known2.One.lshr(Offset).trunc(BitWidth); Known.Zero &= Known2.Zero.lshr(Offset).trunc(BitWidth); // If we don't know any bits, early out. - if (!Known.One && !Known.Zero) + if (Known.isUnknown()) break; } } @@ -2264,22 +2417,23 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, break; } case ISD::SELECT: - computeKnownBits(Op.getOperand(2), Known, Depth+1); + case ISD::VSELECT: + computeKnownBits(Op.getOperand(2), Known, DemandedElts, Depth+1); // If we don't know any bits, early out. - if (!Known.One && !Known.Zero) + if (Known.isUnknown()) break; - computeKnownBits(Op.getOperand(1), Known2, Depth+1); + computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth+1); // Only known if known in both the LHS and RHS. Known.One &= Known2.One; Known.Zero &= Known2.Zero; break; case ISD::SELECT_CC: - computeKnownBits(Op.getOperand(3), Known, Depth+1); + computeKnownBits(Op.getOperand(3), Known, DemandedElts, Depth+1); // If we don't know any bits, early out. - if (!Known.One && !Known.Zero) + if (Known.isUnknown()) break; - computeKnownBits(Op.getOperand(2), Known2, Depth+1); + computeKnownBits(Op.getOperand(2), Known2, DemandedElts, Depth+1); // Only known if known in both the LHS and RHS. Known.One &= Known2.One; @@ -2308,35 +2462,49 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, case ISD::SHL: if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) { computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); - Known.Zero <<= *ShAmt; - Known.One <<= *ShAmt; + unsigned Shift = ShAmt->getZExtValue(); + Known.Zero <<= Shift; + Known.One <<= Shift; // Low bits are known zero. - Known.Zero.setLowBits(ShAmt->getZExtValue()); + Known.Zero.setLowBits(Shift); } break; case ISD::SRL: if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) { computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); - Known.Zero.lshrInPlace(*ShAmt); - Known.One.lshrInPlace(*ShAmt); + unsigned Shift = ShAmt->getZExtValue(); + Known.Zero.lshrInPlace(Shift); + Known.One.lshrInPlace(Shift); // High bits are known zero. - Known.Zero.setHighBits(ShAmt->getZExtValue()); + Known.Zero.setHighBits(Shift); + } else if (auto *BV = dyn_cast<BuildVectorSDNode>(Op.getOperand(1))) { + // If the shift amount is a vector of constants see if we can bound + // the number of upper zero bits. + unsigned ShiftAmountMin = BitWidth; + for (unsigned i = 0; i != BV->getNumOperands(); ++i) { + if (auto *C = dyn_cast<ConstantSDNode>(BV->getOperand(i))) { + const APInt &ShAmt = C->getAPIntValue(); + if (ShAmt.ult(BitWidth)) { + ShiftAmountMin = std::min<unsigned>(ShiftAmountMin, + ShAmt.getZExtValue()); + continue; + } + } + // Don't know anything. + ShiftAmountMin = 0; + break; + } + + Known.Zero.setHighBits(ShiftAmountMin); } break; case ISD::SRA: if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) { computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); - Known.Zero.lshrInPlace(*ShAmt); - Known.One.lshrInPlace(*ShAmt); - // If we know the value of the sign bit, then we know it is copied across - // the high bits by the shift amount. - APInt SignMask = APInt::getSignMask(BitWidth); - SignMask.lshrInPlace(*ShAmt); // Adjust to where it is now in the mask. - if (Known.Zero.intersects(SignMask)) { - Known.Zero.setHighBits(ShAmt->getZExtValue());// New bits are known zero. - } else if (Known.One.intersects(SignMask)) { - Known.One.setHighBits(ShAmt->getZExtValue()); // New bits are known one. - } + unsigned Shift = ShAmt->getZExtValue(); + // Sign extend known zero/one bit (else is unknown). + Known.Zero.ashrInPlace(Shift); + Known.One.ashrInPlace(Shift); } break; case ISD::SIGN_EXTEND_INREG: { @@ -2414,49 +2582,33 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, } case ISD::ZERO_EXTEND_VECTOR_INREG: { EVT InVT = Op.getOperand(0).getValueType(); - unsigned InBits = InVT.getScalarSizeInBits(); - Known = Known.trunc(InBits); - computeKnownBits(Op.getOperand(0), Known, - DemandedElts.zext(InVT.getVectorNumElements()), - Depth + 1); + APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements()); + computeKnownBits(Op.getOperand(0), Known, InDemandedElts, Depth + 1); Known = Known.zext(BitWidth); - Known.Zero.setBitsFrom(InBits); + Known.Zero.setBitsFrom(InVT.getScalarSizeInBits()); break; } case ISD::ZERO_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); - unsigned InBits = InVT.getScalarSizeInBits(); - Known = Known.trunc(InBits); computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); Known = Known.zext(BitWidth); - Known.Zero.setBitsFrom(InBits); + Known.Zero.setBitsFrom(InVT.getScalarSizeInBits()); break; } // TODO ISD::SIGN_EXTEND_VECTOR_INREG case ISD::SIGN_EXTEND: { - EVT InVT = Op.getOperand(0).getValueType(); - unsigned InBits = InVT.getScalarSizeInBits(); - - Known = Known.trunc(InBits); computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); - // If the sign bit is known to be zero or one, then sext will extend // it to the top bits, else it will just zext. Known = Known.sext(BitWidth); break; } case ISD::ANY_EXTEND: { - EVT InVT = Op.getOperand(0).getValueType(); - unsigned InBits = InVT.getScalarSizeInBits(); - Known = Known.trunc(InBits); computeKnownBits(Op.getOperand(0), Known, Depth+1); Known = Known.zext(BitWidth); break; } case ISD::TRUNCATE: { - EVT InVT = Op.getOperand(0).getValueType(); - unsigned InBits = InVT.getScalarSizeInBits(); - Known = Known.zext(InBits); computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); Known = Known.trunc(BitWidth); break; @@ -2755,7 +2907,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); // If we don't know any bits, early out. - if (!Known.One && !Known.Zero) + if (Known.isUnknown()) break; computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1); Known.Zero &= Known2.Zero; @@ -2764,11 +2916,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, } case ISD::FrameIndex: case ISD::TargetFrameIndex: - if (unsigned Align = InferPtrAlignment(Op)) { - // The low bits are known zero if the pointer is aligned. - Known.Zero.setLowBits(Log2_32(Align)); - break; - } + TLI->computeKnownBitsForFrameIndex(Op, Known, DemandedElts, *this, Depth); break; default: @@ -2783,7 +2931,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, break; } - assert((Known.Zero & Known.One) == 0 && "Bits known to be one AND zero?"); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); } SelectionDAG::OverflowKind SelectionDAG::computeOverflowKind(SDValue N0, @@ -2873,12 +3021,17 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const { unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, unsigned Depth) const { EVT VT = Op.getValueType(); - assert(VT.isInteger() && "Invalid VT!"); + assert((VT.isInteger() || VT.isFloatingPoint()) && "Invalid VT!"); unsigned VTBits = VT.getScalarSizeInBits(); unsigned NumElts = DemandedElts.getBitWidth(); unsigned Tmp, Tmp2; unsigned FirstAnswer = 1; + if (auto *C = dyn_cast<ConstantSDNode>(Op)) { + const APInt &Val = C->getAPIntValue(); + return Val.getNumSignBits(); + } + if (Depth == 6) return 1; // Limit search depth. @@ -2894,11 +3047,6 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits(); return VTBits-Tmp; - case ISD::Constant: { - const APInt &Val = cast<ConstantSDNode>(Op)->getAPIntValue(); - return Val.getNumSignBits(); - } - case ISD::BUILD_VECTOR: Tmp = VTBits; for (unsigned i = 0, e = Op.getNumOperands(); (i < e) && (Tmp > 1); ++i) { @@ -2952,32 +3100,63 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return Tmp; } + case ISD::BITCAST: { + SDValue N0 = Op.getOperand(0); + EVT SrcVT = N0.getValueType(); + unsigned SrcBits = SrcVT.getScalarSizeInBits(); + + // Ignore bitcasts from unsupported types.. + if (!(SrcVT.isInteger() || SrcVT.isFloatingPoint())) + break; + + // Fast handling of 'identity' bitcasts. + if (VTBits == SrcBits) + return ComputeNumSignBits(N0, DemandedElts, Depth + 1); + + // Bitcast 'large element' scalar/vector to 'small element' vector. + // TODO: Handle cases other than 'sign splat' when we have a use case. + // Requires handling of DemandedElts and Endianness. + if ((SrcBits % VTBits) == 0) { + assert(Op.getValueType().isVector() && "Expected bitcast to vector"); + Tmp = ComputeNumSignBits(N0, Depth + 1); + if (Tmp == SrcBits) + return VTBits; + } + break; + } + case ISD::SIGN_EXTEND: - case ISD::SIGN_EXTEND_VECTOR_INREG: Tmp = VTBits - Op.getOperand(0).getScalarValueSizeInBits(); - return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp; - + return ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1) + Tmp; case ISD::SIGN_EXTEND_INREG: // Max of the input and what this extends. Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getScalarSizeInBits(); Tmp = VTBits-Tmp+1; - - Tmp2 = ComputeNumSignBits(Op.getOperand(0), Depth+1); + Tmp2 = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1); return std::max(Tmp, Tmp2); + case ISD::SIGN_EXTEND_VECTOR_INREG: { + SDValue Src = Op.getOperand(0); + EVT SrcVT = Src.getValueType(); + APInt DemandedSrcElts = DemandedElts.zext(SrcVT.getVectorNumElements()); + Tmp = VTBits - SrcVT.getScalarSizeInBits(); + return ComputeNumSignBits(Src, DemandedSrcElts, Depth+1) + Tmp; + } case ISD::SRA: Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1); // SRA X, C -> adds C sign bits. - if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1))) { + if (ConstantSDNode *C = + isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts)) { APInt ShiftVal = C->getAPIntValue(); ShiftVal += Tmp; Tmp = ShiftVal.uge(VTBits) ? VTBits : ShiftVal.getZExtValue(); } return Tmp; case ISD::SHL: - if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1))) { + if (ConstantSDNode *C = + isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts)) { // shl destroys sign bits. - Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1); if (C->getAPIntValue().uge(VTBits) || // Bad shift. C->getAPIntValue().uge(Tmp)) break; // Shifted all sign bits out. return Tmp - C->getZExtValue(); @@ -2987,9 +3166,9 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, case ISD::OR: case ISD::XOR: // NOT is handled here. // Logical binary ops preserve the number of sign bits at the worst. - Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1); if (Tmp != 1) { - Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); + Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth+1); FirstAnswer = std::min(Tmp, Tmp2); // We computed what we know about the sign bits as our first // answer. Now proceed to the generic code that uses @@ -2998,15 +3177,17 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, break; case ISD::SELECT: - Tmp = ComputeNumSignBits(Op.getOperand(1), Depth+1); + case ISD::VSELECT: + Tmp = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth+1); if (Tmp == 1) return 1; // Early out. - Tmp2 = ComputeNumSignBits(Op.getOperand(2), Depth+1); + Tmp2 = ComputeNumSignBits(Op.getOperand(2), DemandedElts, Depth+1); return std::min(Tmp, Tmp2); case ISD::SELECT_CC: - Tmp = ComputeNumSignBits(Op.getOperand(2), Depth+1); + Tmp = ComputeNumSignBits(Op.getOperand(2), DemandedElts, Depth+1); if (Tmp == 1) return 1; // Early out. - Tmp2 = ComputeNumSignBits(Op.getOperand(3), Depth+1); + Tmp2 = ComputeNumSignBits(Op.getOperand(3), DemandedElts, Depth+1); return std::min(Tmp, Tmp2); + case ISD::SMIN: case ISD::SMAX: case ISD::UMIN: @@ -3041,16 +3222,16 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, case ISD::ROTL: case ISD::ROTR: if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { - unsigned RotAmt = C->getZExtValue() & (VTBits-1); + unsigned RotAmt = C->getAPIntValue().urem(VTBits); // Handle rotate right by N like a rotate left by 32-N. if (Op.getOpcode() == ISD::ROTR) - RotAmt = (VTBits-RotAmt) & (VTBits-1); + RotAmt = (VTBits - RotAmt) % VTBits; // If we aren't rotating out all of the known-in sign bits, return the // number that are left. This handles rotl(sext(x), 1) for example. Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); - if (Tmp > RotAmt+1) return Tmp-RotAmt; + if (Tmp > (RotAmt + 1)) return (Tmp - RotAmt); } break; case ISD::ADD: @@ -3391,7 +3572,9 @@ static SDValue FoldCONCAT_VECTORS(const SDLoc &DL, EVT VT, ? DAG.getZExtOrTrunc(Op, DL, SVT) : DAG.getSExtOrTrunc(Op, DL, SVT); - return DAG.getBuildVector(VT, DL, Elts); + SDValue V = DAG.getBuildVector(VT, DL, Elts); + NewSDValueDbgMsg(V, "New node fold concat vectors: ", &DAG); + return V; } /// Gets or creates the specified node. @@ -3407,7 +3590,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT) { CSEMap.InsertNode(N, IP); InsertNode(N); - return SDValue(N, 0); + SDValue V = SDValue(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; } SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, @@ -3768,7 +3953,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, } InsertNode(N); - return SDValue(N, 0); + SDValue V = SDValue(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; } static std::pair<APInt, bool> FoldValue(unsigned Opcode, const APInt &C1, @@ -3906,18 +4093,31 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, assert(BV1->getNumOperands() == BV2->getNumOperands() && "Out of sync!"); EVT SVT = VT.getScalarType(); + EVT LegalSVT = SVT; + if (NewNodesMustHaveLegalTypes && LegalSVT.isInteger()) { + LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT); + if (LegalSVT.bitsLT(SVT)) + return SDValue(); + } SmallVector<SDValue, 4> Outputs; for (unsigned I = 0, E = BV1->getNumOperands(); I != E; ++I) { SDValue V1 = BV1->getOperand(I); SDValue V2 = BV2->getOperand(I); - // Avoid BUILD_VECTOR nodes that perform implicit truncation. - // FIXME: This is valid and could be handled by truncation. + if (SVT.isInteger()) { + if (V1->getValueType(0).bitsGT(SVT)) + V1 = getNode(ISD::TRUNCATE, DL, SVT, V1); + if (V2->getValueType(0).bitsGT(SVT)) + V2 = getNode(ISD::TRUNCATE, DL, SVT, V2); + } + if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT) return SDValue(); // Fold one vector element. SDValue ScalarResult = getNode(Opcode, DL, SVT, V1, V2); + if (LegalSVT != SVT) + ScalarResult = getNode(ISD::SIGN_EXTEND, DL, LegalSVT, ScalarResult); // Scalar folding only succeeded if the result is a constant or UNDEF. if (!ScalarResult.isUndef() && ScalarResult.getOpcode() != ISD::Constant && @@ -3936,6 +4136,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, return getBuildVector(VT, SDLoc(), Outputs); } +// TODO: Merge with FoldConstantArithmetic SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef<SDValue> Ops, @@ -4027,7 +4228,9 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, ScalarResults.push_back(ScalarResult); } - return getBuildVector(VT, DL, ScalarResults); + SDValue V = getBuildVector(VT, DL, ScalarResults); + NewSDValueDbgMsg(V, "New node fold constant vector: ", this); + return V; } SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, @@ -4297,6 +4500,15 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), N2); } } + + // EXTRACT_VECTOR_ELT of v1iX EXTRACT_SUBVECTOR could be formed + // when vector types are scalarized and v1iX is legal. + // vextract (v1iX extract_subvector(vNiX, Idx)) -> vextract(vNiX,Idx) + if (N1.getOpcode() == ISD::EXTRACT_SUBVECTOR && + N1.getValueType().getVectorNumElements() == 1) { + return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), + N1.getOperand(1)); + } break; case ISD::EXTRACT_ELEMENT: assert(N2C && (unsigned)N2C->getZExtValue() < 2 && "Bad EXTRACT_ELEMENT!"); @@ -4518,7 +4730,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, } InsertNode(N); - return SDValue(N, 0); + SDValue V = SDValue(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; } SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, @@ -4553,8 +4767,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, return V; // Vector constant folding. SDValue Ops[] = {N1, N2, N3}; - if (SDValue V = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops)) + if (SDValue V = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops)) { + NewSDValueDbgMsg(V, "New node vector constant folding: ", this); return V; + } break; } case ISD::SELECT: @@ -4626,7 +4842,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, } InsertNode(N); - return SDValue(N, 0); + SDValue V = SDValue(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; } SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, @@ -4882,8 +5100,8 @@ static bool shouldLowerMemFuncForSize(const MachineFunction &MF) { // On Darwin, -Os means optimize for size without hurting performance, so // only really optimize for size when -Oz (MinSize) is used. if (MF.getTarget().getTargetTriple().isOSDarwin()) - return MF.getFunction()->optForMinSize(); - return MF.getFunction()->optForSize(); + return MF.getFunction().optForMinSize(); + return MF.getFunction().optForSize(); } static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, @@ -5558,21 +5776,15 @@ SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, const SDLoc &dl) { SDValue SelectionDAG::getMemIntrinsicNode( unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue> Ops, - EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align, bool Vol, - bool ReadMem, bool WriteMem, unsigned Size) { + EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align, + MachineMemOperand::Flags Flags, unsigned Size) { if (Align == 0) // Ensure that codegen never sees alignment 0 Align = getEVTAlignment(MemVT); - MachineFunction &MF = getMachineFunction(); - auto Flags = MachineMemOperand::MONone; - if (WriteMem) - Flags |= MachineMemOperand::MOStore; - if (ReadMem) - Flags |= MachineMemOperand::MOLoad; - if (Vol) - Flags |= MachineMemOperand::MOVolatile; if (!Size) Size = MemVT.getStoreSize(); + + MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, Flags, Size, Align); @@ -5597,6 +5809,8 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTList, Ops); + ID.AddInteger(getSyntheticNodeSubclassData<MemIntrinsicSDNode>( + Opcode, dl.getIROrder(), VTList, MemVT, MMO)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { @@ -5622,7 +5836,8 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, /// MachinePointerInfo record from it. This is particularly useful because the /// code generator has many cases where it doesn't bother passing in a /// MachinePointerInfo to getLoad or getStore when it has "FI+Cst". -static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr, +static MachinePointerInfo InferPointerInfo(const MachinePointerInfo &Info, + SelectionDAG &DAG, SDValue Ptr, int64_t Offset = 0) { // If this is FI+Offset, we can model it. if (const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) @@ -5633,7 +5848,7 @@ static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr, if (Ptr.getOpcode() != ISD::ADD || !isa<ConstantSDNode>(Ptr.getOperand(1)) || !isa<FrameIndexSDNode>(Ptr.getOperand(0))) - return MachinePointerInfo(); + return Info; int FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex(); return MachinePointerInfo::getFixedStack( @@ -5645,14 +5860,15 @@ static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr, /// MachinePointerInfo record from it. This is particularly useful because the /// code generator has many cases where it doesn't bother passing in a /// MachinePointerInfo to getLoad or getStore when it has "FI+Cst". -static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr, +static MachinePointerInfo InferPointerInfo(const MachinePointerInfo &Info, + SelectionDAG &DAG, SDValue Ptr, SDValue OffsetOp) { // If the 'Offset' value isn't a constant, we can't handle this. if (ConstantSDNode *OffsetNode = dyn_cast<ConstantSDNode>(OffsetOp)) - return InferPointerInfo(DAG, Ptr, OffsetNode->getSExtValue()); + return InferPointerInfo(Info, DAG, Ptr, OffsetNode->getSExtValue()); if (OffsetOp.isUndef()) - return InferPointerInfo(DAG, Ptr); - return MachinePointerInfo(); + return InferPointerInfo(Info, DAG, Ptr); + return Info; } SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, @@ -5672,7 +5888,7 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, // If we don't have a PtrInfo, infer the trivial frame index case to simplify // clients. if (PtrInfo.V.isNull()) - PtrInfo = InferPointerInfo(*this, Ptr, Offset); + PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset); MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand( @@ -5791,7 +6007,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val, assert((MMOFlags & MachineMemOperand::MOLoad) == 0); if (PtrInfo.V.isNull()) - PtrInfo = InferPointerInfo(*this, Ptr); + PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr); MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand( @@ -5841,7 +6057,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, assert((MMOFlags & MachineMemOperand::MOLoad) == 0); if (PtrInfo.V.isNull()) - PtrInfo = InferPointerInfo(*this, Ptr); + PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr); MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand( @@ -6118,7 +6334,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, } InsertNode(N); - return SDValue(N, 0); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; } SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, @@ -6171,7 +6389,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, createOperands(N, Ops); } InsertNode(N); - return SDValue(N, 0); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; } SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, @@ -6580,14 +6800,16 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) { unsigned OrigOpc = Node->getOpcode(); unsigned NewOpc; bool IsUnary = false; + bool IsTernary = false; switch (OrigOpc) { - default: + default: llvm_unreachable("mutateStrictFPToFP called with unexpected opcode!"); case ISD::STRICT_FADD: NewOpc = ISD::FADD; break; case ISD::STRICT_FSUB: NewOpc = ISD::FSUB; break; case ISD::STRICT_FMUL: NewOpc = ISD::FMUL; break; case ISD::STRICT_FDIV: NewOpc = ISD::FDIV; break; case ISD::STRICT_FREM: NewOpc = ISD::FREM; break; + case ISD::STRICT_FMA: NewOpc = ISD::FMA; IsTernary = true; break; case ISD::STRICT_FSQRT: NewOpc = ISD::FSQRT; IsUnary = true; break; case ISD::STRICT_FPOW: NewOpc = ISD::FPOW; break; case ISD::STRICT_FPOWI: NewOpc = ISD::FPOWI; break; @@ -6614,10 +6836,14 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) { SDNode *Res = nullptr; if (IsUnary) Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1) }); + else if (IsTernary) + Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1), + Node->getOperand(2), + Node->getOperand(3)}); else Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1), Node->getOperand(2) }); - + // MorphNodeTo can operate in two ways: if an existing node with the // specified operands exists, it can just return it. Otherwise, it // updates the node in place to have the requested operands. @@ -6630,7 +6856,7 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) { RemoveDeadNode(Node); } - return Res; + return Res; } /// getMachineNode - These are used for target selectors to create a new node @@ -6794,32 +7020,125 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, /// getDbgValue - Creates a SDDbgValue node. /// /// SDNode -SDDbgValue *SelectionDAG::getDbgValue(MDNode *Var, MDNode *Expr, SDNode *N, - unsigned R, bool IsIndirect, uint64_t Off, +SDDbgValue *SelectionDAG::getDbgValue(DIVariable *Var, DIExpression *Expr, + SDNode *N, unsigned R, bool IsIndirect, const DebugLoc &DL, unsigned O) { assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); return new (DbgInfo->getAlloc()) - SDDbgValue(Var, Expr, N, R, IsIndirect, Off, DL, O); + SDDbgValue(Var, Expr, N, R, IsIndirect, DL, O); } /// Constant -SDDbgValue *SelectionDAG::getConstantDbgValue(MDNode *Var, MDNode *Expr, - const Value *C, uint64_t Off, +SDDbgValue *SelectionDAG::getConstantDbgValue(DIVariable *Var, + DIExpression *Expr, + const Value *C, const DebugLoc &DL, unsigned O) { assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); - return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, C, Off, DL, O); + return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, C, DL, O); } /// FrameIndex -SDDbgValue *SelectionDAG::getFrameIndexDbgValue(MDNode *Var, MDNode *Expr, - unsigned FI, uint64_t Off, +SDDbgValue *SelectionDAG::getFrameIndexDbgValue(DIVariable *Var, + DIExpression *Expr, unsigned FI, const DebugLoc &DL, unsigned O) { assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); - return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, FI, Off, DL, O); + return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, FI, DL, O); +} + +void SelectionDAG::transferDbgValues(SDValue From, SDValue To, + unsigned OffsetInBits, unsigned SizeInBits, + bool InvalidateDbg) { + SDNode *FromNode = From.getNode(); + SDNode *ToNode = To.getNode(); + assert(FromNode && ToNode && "Can't modify dbg values"); + + // PR35338 + // TODO: assert(From != To && "Redundant dbg value transfer"); + // TODO: assert(FromNode != ToNode && "Intranode dbg value transfer"); + if (From == To || FromNode == ToNode) + return; + + if (!FromNode->getHasDebugValue()) + return; + + SmallVector<SDDbgValue *, 2> ClonedDVs; + for (SDDbgValue *Dbg : GetDbgValues(FromNode)) { + if (Dbg->getKind() != SDDbgValue::SDNODE || Dbg->isInvalidated()) + continue; + + // TODO: assert(!Dbg->isInvalidated() && "Transfer of invalid dbg value"); + + // Just transfer the dbg value attached to From. + if (Dbg->getResNo() != From.getResNo()) + continue; + + DIVariable *Var = Dbg->getVariable(); + auto *Expr = Dbg->getExpression(); + // If a fragment is requested, update the expression. + if (SizeInBits) { + // When splitting a larger (e.g., sign-extended) value whose + // lower bits are described with an SDDbgValue, do not attempt + // to transfer the SDDbgValue to the upper bits. + if (auto FI = Expr->getFragmentInfo()) + if (OffsetInBits + SizeInBits > FI->SizeInBits) + continue; + auto Fragment = DIExpression::createFragmentExpression(Expr, OffsetInBits, + SizeInBits); + if (!Fragment) + continue; + Expr = *Fragment; + } + // Clone the SDDbgValue and move it to To. + SDDbgValue *Clone = + getDbgValue(Var, Expr, ToNode, To.getResNo(), Dbg->isIndirect(), + Dbg->getDebugLoc(), Dbg->getOrder()); + ClonedDVs.push_back(Clone); + + if (InvalidateDbg) + Dbg->setIsInvalidated(); + } + + for (SDDbgValue *Dbg : ClonedDVs) + AddDbgValue(Dbg, ToNode, false); +} + +void SelectionDAG::salvageDebugInfo(SDNode &N) { + if (!N.getHasDebugValue()) + return; + for (auto DV : GetDbgValues(&N)) { + if (DV->isInvalidated()) + continue; + switch (N.getOpcode()) { + default: + break; + case ISD::ADD: + SDValue N0 = N.getOperand(0); + SDValue N1 = N.getOperand(1); + if (!isConstantIntBuildVectorOrConstantInt(N0) && + isConstantIntBuildVectorOrConstantInt(N1)) { + uint64_t Offset = N.getConstantOperandVal(1); + // Rewrite an ADD constant node into a DIExpression. Since we are + // performing arithmetic to compute the variable's *value* in the + // DIExpression, we need to mark the expression with a + // DW_OP_stack_value. + auto *DIExpr = DV->getExpression(); + DIExpr = DIExpression::prepend(DIExpr, DIExpression::NoDeref, Offset, + DIExpression::NoDeref, + DIExpression::WithStackValue); + SDDbgValue *Clone = + getDbgValue(DV->getVariable(), DIExpr, N0.getNode(), N0.getResNo(), + DV->isIndirect(), DV->getDebugLoc(), DV->getOrder()); + DV->setIsInvalidated(); + AddDbgValue(Clone, N0.getNode(), false); + DEBUG(dbgs() << "SALVAGE: Rewriting"; N0.getNode()->dumprFull(this); + dbgs() << " into " << *DIExpr << '\n'); + } + } + } } namespace { @@ -6859,7 +7178,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) { assert(From != To.getNode() && "Cannot replace uses of with self"); // Preserve Debug Values - TransferDbgValues(FromN, To); + transferDbgValues(FromN, To); // Iterate over all the existing uses of From. New uses will be added // to the beginning of the use list, which we avoid visiting. @@ -6918,7 +7237,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To) { for (unsigned i = 0, e = From->getNumValues(); i != e; ++i) if (From->hasAnyUseOfValue(i)) { assert((i < To->getNumValues()) && "Invalid To location"); - TransferDbgValues(SDValue(From, i), SDValue(To, i)); + transferDbgValues(SDValue(From, i), SDValue(To, i)); } // Iterate over just the existing users of From. See the comments in @@ -6962,7 +7281,7 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) { // Preserve Debug Info. for (unsigned i = 0, e = From->getNumValues(); i != e; ++i) - TransferDbgValues(SDValue(From, i), *To); + transferDbgValues(SDValue(From, i), *To); // Iterate over just the existing users of From. See the comments in // the ReplaceAllUsesWith above. @@ -7009,7 +7328,7 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){ } // Preserve Debug Info. - TransferDbgValues(From, To); + transferDbgValues(From, To); // Iterate over just the existing users of From. See the comments in // the ReplaceAllUsesWith above. @@ -7087,7 +7406,7 @@ void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From, if (Num == 1) return ReplaceAllUsesOfValueWith(*From, *To); - TransferDbgValues(*From, *To); + transferDbgValues(*From, *To); // Read up all the uses and make records of them. This helps // processing new uses that are introduced during the @@ -7236,35 +7555,6 @@ void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) { DbgInfo->add(DB, SD, isParameter); } -/// TransferDbgValues - Transfer SDDbgValues. Called in replace nodes. -void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) { - if (From == To || !From.getNode()->getHasDebugValue()) - return; - SDNode *FromNode = From.getNode(); - SDNode *ToNode = To.getNode(); - ArrayRef<SDDbgValue *> DVs = GetDbgValues(FromNode); - SmallVector<SDDbgValue *, 2> ClonedDVs; - for (ArrayRef<SDDbgValue *>::iterator I = DVs.begin(), E = DVs.end(); - I != E; ++I) { - SDDbgValue *Dbg = *I; - // Only add Dbgvalues attached to same ResNo. - if (Dbg->getKind() == SDDbgValue::SDNODE && - Dbg->getSDNode() == From.getNode() && - Dbg->getResNo() == From.getResNo() && !Dbg->isInvalidated()) { - assert(FromNode != ToNode && - "Should not transfer Debug Values intranode"); - SDDbgValue *Clone = - getDbgValue(Dbg->getVariable(), Dbg->getExpression(), ToNode, - To.getResNo(), Dbg->isIndirect(), Dbg->getOffset(), - Dbg->getDebugLoc(), Dbg->getOrder()); - ClonedDVs.push_back(Clone); - Dbg->setIsInvalidated(); - } - } - for (SDDbgValue *I : ClonedDVs) - AddDbgValue(I, ToNode, false); -} - SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, SDValue NewMemOp) { assert(isa<MemSDNode>(NewMemOp.getNode()) && "Expected a memop node"); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp index 0d69441ebb7f7..544da362be698 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -1,5 +1,4 @@ -//===-- llvm/CodeGen/SelectionDAGAddressAnalysis.cpp ------- DAG Address -//Analysis ---*- C++ -*-===// +//==- llvm/CodeGen/SelectionDAGAddressAnalysis.cpp - DAG Address Analysis --==// // // The LLVM Compiler Infrastructure // @@ -7,15 +6,18 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/Support/Casting.h" +#include <cstdint> -namespace llvm { +using namespace llvm; bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other, const SelectionDAG &DAG, int64_t &Off) { @@ -55,7 +57,7 @@ bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other, /// Parses tree in Ptr for base, index, offset addresses. BaseIndexOffset BaseIndexOffset::match(SDValue Ptr, const SelectionDAG &DAG) { // (((B + I*M) + c)) + c ... - SDValue Base = Ptr; + SDValue Base = DAG.getTargetLoweringInfo().unwrapAddress(Ptr); SDValue Index = SDValue(); int64_t Offset = 0; bool IsIndexSignExt = false; @@ -112,4 +114,3 @@ BaseIndexOffset BaseIndexOffset::match(SDValue Ptr, const SelectionDAG &DAG) { } return BaseIndexOffset(Base, Index, Offset, IsIndexSignExt); } -} // end namespace llvm diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 127312076207c..71cb8cb78f6d3 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1,4 +1,4 @@ -//===-- SelectionDAGBuilder.cpp - Selection-DAG building ------------------===// +//===- SelectionDAGBuilder.cpp - Selection-DAG building -------------------===// // // The LLVM Compiler Infrastructure // @@ -12,63 +12,113 @@ //===----------------------------------------------------------------------===// #include "SelectionDAGBuilder.h" -#include "SDNodeDbgValue.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/ADT/Twine.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/Analysis.h" -#include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" -#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineValueType.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" #include "llvm/CodeGen/StackMaps.h" +#include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/ValueTypes.h" #include "llvm/CodeGen/WinEHFuncInfo.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" -#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/InlineAsm.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" #include "llvm/IR/Statepoint.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/MC/MCContext.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" -#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <cstring> +#include <iterator> +#include <limits> +#include <numeric> +#include <tuple> #include <utility> +#include <vector> + using namespace llvm; #define DEBUG_TYPE "isel" @@ -78,11 +128,18 @@ using namespace llvm; static unsigned LimitFloatPrecision; static cl::opt<unsigned, true> -LimitFPPrecision("limit-float-precision", - cl::desc("Generate low-precision inline sequences " - "for some float libcalls"), - cl::location(LimitFloatPrecision), - cl::init(0)); + LimitFPPrecision("limit-float-precision", + cl::desc("Generate low-precision inline sequences " + "for some float libcalls"), + cl::location(LimitFloatPrecision), cl::Hidden, + cl::init(0)); + +static cl::opt<unsigned> SwitchPeelThreshold( + "switch-peel-threshold", cl::Hidden, cl::init(66), + cl::desc("Set the case probability threshold for peeling the case from a " + "switch statement. A value greater than 100 will void this " + "optimization")); + // Limit the width of DAG chains. This is important in general to prevent // DAG-based analysis from blowing up. For example, alias analysis and // load clustering may not complete in reasonable time. It is difficult to @@ -101,7 +158,7 @@ static const unsigned MaxParallelChains = 64; // True if the Value passed requires ABI mangling as it is a parameter to a // function or a return value from a function which is not an intrinsic. -static bool isABIRegCopy(const Value * V) { +static bool isABIRegCopy(const Value *V) { const bool IsRetInst = V && isa<ReturnInst>(V); const bool IsCallInst = V && isa<CallInst>(V); const bool IsInLineAsm = @@ -554,7 +611,6 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, const Value *V, bool IsABIRegCopy) { - EVT ValueVT = Val.getValueType(); assert(ValueVT.isVector() && "Not a vector"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -600,7 +656,6 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, Val = DAG.getNode( ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); - } else { assert(PartVT.getSizeInBits() > ValueVT.getSizeInBits() && "lossy conversion of vector to scalar type"); @@ -677,8 +732,6 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, } } -RegsForValue::RegsForValue() { IsABIMangled = false; } - RegsForValue::RegsForValue(const SmallVector<unsigned, 4> ®s, MVT regvt, EVT valuevt, bool IsABIMangledValue) : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs), @@ -888,7 +941,24 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, SDValue Res = DAG.getTargetConstant(Flag, dl, MVT::i32); Ops.push_back(Res); - unsigned SP = TLI.getStackPointerRegisterToSaveRestore(); + if (Code == InlineAsm::Kind_Clobber) { + // Clobbers should always have a 1:1 mapping with registers, and may + // reference registers that have illegal (e.g. vector) types. Hence, we + // shouldn't try to apply any sort of splitting logic to them. + assert(Regs.size() == RegVTs.size() && Regs.size() == ValueVTs.size() && + "No 1:1 mapping from clobbers to regs?"); + unsigned SP = TLI.getStackPointerRegisterToSaveRestore(); + (void)SP; + for (unsigned I = 0, E = ValueVTs.size(); I != E; ++I) { + Ops.push_back(DAG.getRegister(Regs[I], RegVTs[I])); + assert( + (Regs[I] != SP || + DAG.getMachineFunction().getFrameInfo().hasOpaqueSPAdjustment()) && + "If we clobbered the stack pointer, MFI should know about it."); + } + return; + } + for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) { unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]); MVT RegisterVT = RegVTs[Value]; @@ -896,11 +966,6 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, assert(Reg < Regs.size() && "Mismatch in # registers expected"); unsigned TheReg = Regs[Reg++]; Ops.push_back(DAG.getRegister(TheReg, RegisterVT)); - - if (TheReg == SP && Code == InlineAsm::Kind_Clobber) { - // If we clobbered the stack pointer, MFI should know about it. - assert(DAG.getMachineFunction().getFrameInfo().hasOpaqueSPAdjustment()); - } } } } @@ -1025,12 +1090,10 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, DIExpression *Expr = DI->getExpression(); assert(Variable->isValidLocationForIntrinsic(dl) && "Expected inlined-at fields to agree"); - uint64_t Offset = DI->getOffset(); SDDbgValue *SDV; if (Val.getNode()) { - if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, Offset, false, - Val)) { - SDV = getDbgValue(Val, Variable, Expr, Offset, dl, DbgSDNodeOrder); + if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, false, Val)) { + SDV = getDbgValue(Val, Variable, Expr, dl, DbgSDNodeOrder); DAG.AddDbgValue(SDV, Val.getNode(), false); } } else @@ -1409,7 +1472,9 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { // Leave Outs empty so that LowerReturn won't try to load return // registers the usual way. SmallVector<EVT, 1> PtrValueVTs; - ComputeValueVTs(TLI, DL, PointerType::getUnqual(F->getReturnType()), + ComputeValueVTs(TLI, DL, + F->getReturnType()->getPointerTo( + DAG.getDataLayout().getAllocaAddrSpace()), PtrValueVTs); SDValue RetPtr = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), @@ -1421,22 +1486,15 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); - // An aggregate return value cannot wrap around the address space, so - // offsets to its parts don't wrap either. - SDNodeFlags Flags; - Flags.setNoUnsignedWrap(true); - SmallVector<SDValue, 4> Chains(NumValues); for (unsigned i = 0; i != NumValues; ++i) { - SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), - RetPtr.getValueType(), RetPtr, - DAG.getIntPtrConstant(Offsets[i], - getCurSDLoc()), - Flags); - Chains[i] = DAG.getStore(Chain, getCurSDLoc(), - SDValue(RetOp.getNode(), RetOp.getResNo() + i), - // FIXME: better loc info would be nice. - Add, MachinePointerInfo()); + // An aggregate return value cannot wrap around the address space, so + // offsets to its parts don't wrap either. + SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr, Offsets[i]); + Chains[i] = DAG.getStore( + Chain, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + i), + // FIXME: better loc info would be nice. + Ptr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction())); } Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), @@ -1515,9 +1573,9 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { EVT(TLI.getPointerTy(DL)))); } - bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); + bool isVarArg = DAG.getMachineFunction().getFunction().isVarArg(); CallingConv::ID CallConv = - DAG.getMachineFunction().getFunction()->getCallingConv(); + DAG.getMachineFunction().getFunction().getCallingConv(); Chain = DAG.getTargetLoweringInfo().LowerReturn( Chain, CallConv, isVarArg, Outs, OutVals, getCurSDLoc(), DAG); @@ -1623,7 +1681,6 @@ static bool InBlock(const Value *V, const BasicBlock *BB) { /// EmitBranchForMergedCondition - Helper method for FindMergedConditions. /// This function emits a branch and is used at the leaves of an OR or an /// AND operator tree. -/// void SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB, @@ -1659,7 +1716,7 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, } CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr, - TBB, FBB, CurBB, TProb, FProb); + TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb); SwitchCases.push_back(CB); return; } @@ -1668,7 +1725,7 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, // Create a CaseBlock record representing this branch. ISD::CondCode Opc = InvertCond ? ISD::SETNE : ISD::SETEQ; CaseBlock CB(Opc, Cond, ConstantInt::getTrue(*DAG.getContext()), - nullptr, TBB, FBB, CurBB, TProb, FProb); + nullptr, TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb); SwitchCases.push_back(CB); } @@ -1712,7 +1769,7 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, // If this node is not part of the or/and tree, emit it as a branch. if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) || - BOpc != Opc || !BOp->hasOneUse() || + BOpc != unsigned(Opc) || !BOp->hasOneUse() || BOp->getParent() != CurBB->getBasicBlock() || !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) || !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) { @@ -1867,7 +1924,6 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // je foo // cmp D, E // jle foo - // if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) { Instruction::BinaryOps Opcode = BOp->getOpcode(); if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() && @@ -1907,7 +1963,7 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // Create a CaseBlock record representing this branch. CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()), - nullptr, Succ0MBB, Succ1MBB, BrMBB); + nullptr, Succ0MBB, Succ1MBB, BrMBB, getCurSDLoc()); // Use visitSwitchCase to actually insert the fast branch sequence for this // cond branch. @@ -1920,7 +1976,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, MachineBasicBlock *SwitchBB) { SDValue Cond; SDValue CondLHS = getValue(CB.CmpLHS); - SDLoc dl = getCurSDLoc(); + SDLoc dl = CB.DL; // Build the setcc now. if (!CB.CmpMHS) { @@ -2054,7 +2110,7 @@ static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL, const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); MachineFunction &MF = DAG.getMachineFunction(); - Value *Global = TLI.getSDagStackGuard(*MF.getFunction()->getParent()); + Value *Global = TLI.getSDagStackGuard(*MF.getFunction().getParent()); MachineSDNode *Node = DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD, DL, PtrTy, Chain); if (Global) { @@ -2088,15 +2144,18 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, SDValue Guard; SDLoc dl = getCurSDLoc(); SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy); - const Module &M = *ParentBB->getParent()->getFunction()->getParent(); + const Module &M = *ParentBB->getParent()->getFunction().getParent(); unsigned Align = DL->getPrefTypeAlignment(Type::getInt8PtrTy(M.getContext())); // Generate code to load the content of the guard slot. - SDValue StackSlot = DAG.getLoad( + SDValue GuardVal = DAG.getLoad( PtrTy, dl, DAG.getEntryNode(), StackSlotPtr, MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), Align, MachineMemOperand::MOVolatile); + if (TLI.useStackGuardXorFP()) + GuardVal = TLI.emitStackGuardXorFP(DAG, GuardVal, dl); + // Retrieve guard check function, nullptr if instrumentation is inlined. if (const Value *GuardCheck = TLI.getSSPStackGuardCheck(M)) { // The target provides a guard check function to validate the guard value. @@ -2108,7 +2167,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - Entry.Node = StackSlot; + Entry.Node = GuardVal; Entry.Ty = FnTy->getParamType(0); if (Fn->hasAttribute(1, Attribute::AttrKind::InReg)) Entry.IsInReg = true; @@ -2141,7 +2200,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, // Perform the comparison via a subtract/getsetcc. EVT VT = Guard.getValueType(); - SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, StackSlot); + SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, GuardVal); SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), @@ -2151,7 +2210,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, // If the sub is not 0, then we know the guard/stackslot do not equal, so // branch to failure MBB. SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, - MVT::Other, StackSlot.getOperand(0), + MVT::Other, GuardVal.getOperand(0), Cmp, DAG.getBasicBlock(SPD.getFailureMBB())); // Otherwise branch to success MBB. SDValue Br = DAG.getNode(ISD::BR, dl, @@ -2530,7 +2589,7 @@ static bool isVectorReductionOp(const User *I) { case Instruction::FAdd: case Instruction::FMul: if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst)) - if (FPOp->getFastMathFlags().unsafeAlgebra()) + if (FPOp->getFastMathFlags().isFast()) break; LLVM_FALLTHROUGH; default: @@ -2576,7 +2635,7 @@ static bool isVectorReductionOp(const User *I) { if (Inst->getOpcode() == OpCode || isa<PHINode>(U)) { if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst)) - if (!isa<PHINode>(FPOp) && !FPOp->getFastMathFlags().unsafeAlgebra()) + if (!isa<PHINode>(FPOp) && !FPOp->getFastMathFlags().isFast()) return false; UsersToVisit.push_back(U); } else if (const ShuffleVectorInst *ShufInst = @@ -2670,7 +2729,7 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { Flags.setNoInfs(FMF.noInfs()); Flags.setNoNaNs(FMF.noNaNs()); Flags.setNoSignedZeros(FMF.noSignedZeros()); - Flags.setUnsafeAlgebra(FMF.unsafeAlgebra()); + Flags.setUnsafeAlgebra(FMF.isFast()); SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(), Op1, Op2, Flags); @@ -2779,7 +2838,7 @@ void SelectionDAGBuilder::visitFCmp(const User &I) { // Check if the condition of the select has one use or two users that are both // selects with the same condition. static bool hasOnlySelectUsers(const Value *Cond) { - return all_of(Cond->users(), [](const Value *V) { + return llvm::all_of(Cond->users(), [](const Value *V) { return isa<SelectInst>(V); }); } @@ -3447,7 +3506,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { SDValue AllocSize = getValue(I.getArraySize()); - EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout()); + EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout(), DL.getAllocaAddrSpace()); if (AllocSize.getValueType() != IntPtr) AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr); @@ -3468,17 +3527,15 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { // an address inside an alloca. SDNodeFlags Flags; Flags.setNoUnsignedWrap(true); - AllocSize = DAG.getNode(ISD::ADD, dl, - AllocSize.getValueType(), AllocSize, - DAG.getIntPtrConstant(StackAlign - 1, dl), Flags); + AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize, + DAG.getConstant(StackAlign - 1, dl, IntPtr), Flags); // Mask out the low bits for alignment purposes. - AllocSize = DAG.getNode(ISD::AND, dl, - AllocSize.getValueType(), AllocSize, - DAG.getIntPtrConstant(~(uint64_t)(StackAlign - 1), - dl)); + AllocSize = + DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize, + DAG.getConstant(~(uint64_t)(StackAlign - 1), dl, IntPtr)); - SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align, dl) }; + SDValue Ops[] = {getRoot(), AllocSize, DAG.getConstant(Align, dl, IntPtr)}; SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other); SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, dl, VTs, Ops); setValue(&I, DSA); @@ -3807,18 +3864,16 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, // // When the first GEP operand is a single pointer - it is the uniform base we // are looking for. If first operand of the GEP is a splat vector - we -// extract the spalt value and use it as a uniform base. +// extract the splat value and use it as a uniform base. // In all other cases the function returns 'false'. -// static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index, SelectionDAGBuilder* SDB) { - SelectionDAG& DAG = SDB->DAG; LLVMContext &Context = *DAG.getContext(); assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type"); const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr); - if (!GEP || GEP->getNumOperands() > 2) + if (!GEP) return false; const Value *GEPPtr = GEP->getPointerOperand(); @@ -3827,7 +3882,15 @@ static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index, else if (!(Ptr = getSplatValue(GEPPtr))) return false; - Value *IndexVal = GEP->getOperand(1); + unsigned FinalIndex = GEP->getNumOperands() - 1; + Value *IndexVal = GEP->getOperand(FinalIndex); + + // Ensure all the other indices are 0. + for (unsigned i = 1; i < FinalIndex; ++i) { + auto *C = dyn_cast<ConstantInt>(GEP->getOperand(i)); + if (!C || !C->isZero()) + return false; + } // The operands of the GEP may be defined in another basic block. // In this case we'll not find nodes for the operands. @@ -3837,13 +3900,6 @@ static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index, Base = SDB->getValue(Ptr); Index = SDB->getValue(IndexVal); - // Suppress sign extension. - if (SExtInst* Sext = dyn_cast<SExtInst>(IndexVal)) { - if (SDB->findValue(Sext->getOperand(0))) { - IndexVal = Sext->getOperand(0); - Index = SDB->getValue(IndexVal); - } - } if (!Index.getValueType().isVector()) { unsigned GEPWidth = GEP->getType()->getVectorNumElements(); EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth); @@ -4082,7 +4138,8 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); - if (I.getAlignment() < VT.getSizeInBits() / 8) + if (!TLI.supportsUnalignedAtomics() && + I.getAlignment() < VT.getStoreSize()) report_fatal_error("Cannot generate unaligned atomic load"); MachineMemOperand *MMO = @@ -4118,7 +4175,7 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getValueOperand()->getType()); - if (I.getAlignment() < VT.getSizeInBits() / 8) + if (I.getAlignment() < VT.getStoreSize()) report_fatal_error("Cannot generate unaligned atomic store"); SDValue OutChain = @@ -4157,7 +4214,9 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, // Info is set by getTgtMemInstrinsic TargetLowering::IntrinsicInfo Info; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic); + bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, + DAG.getMachineFunction(), + Intrinsic); // Add the intrinsic ID as an integer operand if it's not a target intrinsic. if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID || @@ -4183,11 +4242,10 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, SDValue Result; if (IsTgtIntrinsic) { // This is target intrinsic that touches memory - Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), - VTs, Ops, Info.memVT, - MachinePointerInfo(Info.ptrVal, Info.offset), - Info.align, Info.vol, - Info.readMem, Info.writeMem, Info.size); + Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, + Ops, Info.memVT, + MachinePointerInfo(Info.ptrVal, Info.offset), Info.align, + Info.flags, Info.size); } else if (!HasChain) { Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops); } else if (!I.getType()->isVoidTy()) { @@ -4370,7 +4428,6 @@ static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, /// limited-precision mode. static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { - // TODO: What fast-math-flags should be set on the floating-point nodes? if (Op.getValueType() == MVT::f32 && @@ -4469,7 +4526,6 @@ static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, /// limited-precision mode. static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { - // TODO: What fast-math-flags should be set on the floating-point nodes? if (Op.getValueType() == MVT::f32 && @@ -4567,7 +4623,6 @@ static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, /// limited-precision mode. static SDValue expandLog10(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { - // TODO: What fast-math-flags should be set on the floating-point nodes? if (Op.getValueType() == MVT::f32 && @@ -4695,7 +4750,6 @@ static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS, return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS); } - /// ExpandPowI - Expand a llvm.powi intrinsic. static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS, SelectionDAG &DAG) { @@ -4712,8 +4766,8 @@ static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS, if (Val == 0) return DAG.getConstantFP(1.0, DL, LHS.getValueType()); - const Function *F = DAG.getMachineFunction().getFunction(); - if (!F->optForSize() || + const Function &F = DAG.getMachineFunction().getFunction(); + if (!F.optForSize() || // If optimizing for size, don't insert too many multiplies. // This inserts up to 5 multiplies. countPopulation(Val) + Log2_32(Val) < 7) { @@ -4766,12 +4820,12 @@ static unsigned getUnderlyingArgReg(const SDValue &N) { } } -/// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function -/// argument, create the corresponding DBG_VALUE machine instruction for it now. -/// At the end of instruction selection, they will be inserted to the entry BB. +/// If the DbgValueInst is a dbg_value of a function argument, create the +/// corresponding DBG_VALUE machine instruction for it now. At the end of +/// instruction selection, they will be inserted to the entry BB. bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( const Value *V, DILocalVariable *Variable, DIExpression *Expr, - DILocation *DL, int64_t Offset, bool IsDbgDeclare, const SDValue &N) { + DILocation *DL, bool IsDbgDeclare, const SDValue &N) { const Argument *Arg = dyn_cast<Argument>(V); if (!Arg) return false; @@ -4779,17 +4833,11 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( MachineFunction &MF = DAG.getMachineFunction(); const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); - // Ignore inlined function arguments here. - // - // FIXME: Should we be checking DL->inlinedAt() to determine this? - if (!Variable->getScope()->getSubprogram()->describes(MF.getFunction())) - return false; - bool IsIndirect = false; Optional<MachineOperand> Op; // Some arguments' frame index is recorded during argument lowering. int FI = FuncInfo.getArgumentFrameIndex(Arg); - if (FI != INT_MAX) + if (FI != std::numeric_limits<int>::max()) Op = MachineOperand::CreateFI(FI); if (!Op && N.getNode()) { @@ -4806,22 +4854,48 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( } } + if (!Op && N.getNode()) + // Check if frame index is available. + if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode())) + if (FrameIndexSDNode *FINode = + dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) + Op = MachineOperand::CreateFI(FINode->getIndex()); + if (!Op) { // Check if ValueMap has reg number. DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V); if (VMI != FuncInfo.ValueMap.end()) { + const auto &TLI = DAG.getTargetLoweringInfo(); + RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second, + V->getType(), isABIRegCopy(V)); + unsigned NumRegs = + std::accumulate(RFV.RegCount.begin(), RFV.RegCount.end(), 0); + if (NumRegs > 1) { + unsigned I = 0; + unsigned Offset = 0; + auto RegisterVT = RFV.RegVTs.begin(); + for (auto RegCount : RFV.RegCount) { + unsigned RegisterSize = (RegisterVT++)->getSizeInBits(); + for (unsigned E = I + RegCount; I != E; ++I) { + // The vregs are guaranteed to be allocated in sequence. + Op = MachineOperand::CreateReg(VMI->second + I, false); + auto FragmentExpr = DIExpression::createFragmentExpression( + Expr, Offset, RegisterSize); + if (!FragmentExpr) + continue; + FuncInfo.ArgDbgValues.push_back( + BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsDbgDeclare, + Op->getReg(), Variable, *FragmentExpr)); + Offset += RegisterSize; + } + } + return true; + } Op = MachineOperand::CreateReg(VMI->second, false); IsIndirect = IsDbgDeclare; } } - if (!Op && N.getNode()) - // Check if frame index is available. - if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode())) - if (FrameIndexSDNode *FINode = - dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) - Op = MachineOperand::CreateFI(FINode->getIndex()); - if (!Op) return false; @@ -4830,12 +4904,12 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( if (Op->isReg()) FuncInfo.ArgDbgValues.push_back( BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect, - Op->getReg(), Offset, Variable, Expr)); + Op->getReg(), Variable, Expr)); else FuncInfo.ArgDbgValues.push_back( BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE)) .add(*Op) - .addImm(Offset) + .addImm(0) .addMetadata(Variable) .addMetadata(Expr)); @@ -4845,18 +4919,18 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( /// Return the appropriate SDDbgValue based on N. SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N, DILocalVariable *Variable, - DIExpression *Expr, int64_t Offset, + DIExpression *Expr, const DebugLoc &dl, unsigned DbgSDNodeOrder) { if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) { // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe // stack slot locations as such instead of as indirectly addressed // locations. - return DAG.getFrameIndexDbgValue(Variable, Expr, FISDN->getIndex(), 0, dl, + return DAG.getFrameIndexDbgValue(Variable, Expr, FISDN->getIndex(), dl, DbgSDNodeOrder); } - return DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(), false, - Offset, dl, DbgSDNodeOrder); + return DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(), false, dl, + DbgSDNodeOrder); } // VisualStudio defines setjmp as _setjmp @@ -4971,8 +5045,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } case Intrinsic::memcpy_element_unordered_atomic: { - const ElementUnorderedAtomicMemCpyInst &MI = - cast<ElementUnorderedAtomicMemCpyInst>(I); + const AtomicMemCpyInst &MI = cast<AtomicMemCpyInst>(I); SDValue Dst = getValue(MI.getRawDest()); SDValue Src = getValue(MI.getRawSource()); SDValue Length = getValue(MI.getLength()); @@ -5010,7 +5083,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } case Intrinsic::memmove_element_unordered_atomic: { - auto &MI = cast<ElementUnorderedAtomicMemMoveInst>(I); + auto &MI = cast<AtomicMemMoveInst>(I); SDValue Dst = getValue(MI.getRawDest()); SDValue Src = getValue(MI.getRawSource()); SDValue Length = getValue(MI.getLength()); @@ -5048,7 +5121,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } case Intrinsic::memset_element_unordered_atomic: { - auto &MI = cast<ElementUnorderedAtomicMemSetInst>(I); + auto &MI = cast<AtomicMemSetInst>(I); SDValue Dst = getValue(MI.getRawDest()); SDValue Val = getValue(MI.getValue()); SDValue Length = getValue(MI.getLength()); @@ -5086,30 +5159,48 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.setRoot(CallResult.second); return nullptr; } + case Intrinsic::dbg_addr: case Intrinsic::dbg_declare: { - const DbgDeclareInst &DI = cast<DbgDeclareInst>(I); + const DbgInfoIntrinsic &DI = cast<DbgInfoIntrinsic>(I); DILocalVariable *Variable = DI.getVariable(); DIExpression *Expression = DI.getExpression(); - const Value *Address = DI.getAddress(); assert(Variable && "Missing variable"); - if (!Address) { - DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); - return nullptr; - } // Check if address has undef value. - if (isa<UndefValue>(Address) || + const Value *Address = DI.getVariableLocation(); + if (!Address || isa<UndefValue>(Address) || (Address->use_empty() && !isa<Argument>(Address))) { DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); return nullptr; } - // Byval arguments with frame indices were already handled after argument - // lowering and before isel. - const auto *Arg = - dyn_cast<Argument>(Address->stripInBoundsConstantOffsets()); - if (Arg && FuncInfo.getArgumentFrameIndex(Arg) != INT_MAX) + bool isParameter = Variable->isParameter() || isa<Argument>(Address); + + // Check if this variable can be described by a frame index, typically + // either as a static alloca or a byval parameter. + int FI = std::numeric_limits<int>::max(); + if (const auto *AI = + dyn_cast<AllocaInst>(Address->stripInBoundsConstantOffsets())) { + if (AI->isStaticAlloca()) { + auto I = FuncInfo.StaticAllocaMap.find(AI); + if (I != FuncInfo.StaticAllocaMap.end()) + FI = I->second; + } + } else if (const auto *Arg = dyn_cast<Argument>( + Address->stripInBoundsConstantOffsets())) { + FI = FuncInfo.getArgumentFrameIndex(Arg); + } + + // llvm.dbg.addr is control dependent and always generates indirect + // DBG_VALUE instructions. llvm.dbg.declare is handled as a frame index in + // the MachineFunction variable table. + if (FI != std::numeric_limits<int>::max()) { + if (Intrinsic == Intrinsic::dbg_addr) + DAG.AddDbgValue(DAG.getFrameIndexDbgValue(Variable, Expression, FI, dl, + SDNodeOrder), + getRoot().getNode(), isParameter); return nullptr; + } SDValue &N = NodeMap[Address]; if (!N.getNode() && isa<Argument>(Address)) @@ -5120,26 +5211,25 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address)) Address = BCI->getOperand(0); // Parameters are handled specially. - bool isParameter = Variable->isParameter() || isa<Argument>(Address); auto FINode = dyn_cast<FrameIndexSDNode>(N.getNode()); if (isParameter && FINode) { // Byval parameter. We have a frame index at this point. SDV = DAG.getFrameIndexDbgValue(Variable, Expression, - FINode->getIndex(), 0, dl, SDNodeOrder); + FINode->getIndex(), dl, SDNodeOrder); } else if (isa<Argument>(Address)) { // Address is an argument, so try to emit its dbg value using // virtual register info from the FuncInfo.ValueMap. - EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, true, N); + EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true, N); return nullptr; } else { SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(), - true, 0, dl, SDNodeOrder); + true, dl, SDNodeOrder); } DAG.AddDbgValue(SDV, N.getNode(), isParameter); } else { // If Address is an argument then try to emit its dbg value using // virtual register info from the FuncInfo.ValueMap. - if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, true, + if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true, N)) { DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); } @@ -5152,15 +5242,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DILocalVariable *Variable = DI.getVariable(); DIExpression *Expression = DI.getExpression(); - uint64_t Offset = DI.getOffset(); const Value *V = DI.getValue(); if (!V) return nullptr; SDDbgValue *SDV; if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V)) { - SDV = DAG.getConstantDbgValue(Variable, Expression, V, Offset, dl, - SDNodeOrder); + SDV = DAG.getConstantDbgValue(Variable, Expression, V, dl, SDNodeOrder); DAG.AddDbgValue(SDV, nullptr, false); return nullptr; } @@ -5171,10 +5259,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (!N.getNode() && isa<Argument>(V)) // Check unused arguments map. N = UnusedArgNodeMap[V]; if (N.getNode()) { - if (EmitFuncArgumentDbgValue(V, Variable, Expression, dl, Offset, false, - N)) + if (EmitFuncArgumentDbgValue(V, Variable, Expression, dl, false, N)) return nullptr; - SDV = getDbgValue(N, Variable, Expression, Offset, dl, SDNodeOrder); + SDV = getDbgValue(N, Variable, Expression, dl, SDNodeOrder); DAG.AddDbgValue(SDV, N.getNode(), false); return nullptr; } @@ -5213,12 +5300,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::eh_unwind_init: DAG.getMachineFunction().setCallsUnwindInit(true); return nullptr; - case Intrinsic::eh_dwarf_cfa: { + case Intrinsic::eh_dwarf_cfa: setValue(&I, DAG.getNode(ISD::EH_DWARF_CFA, sdl, TLI.getPointerTy(DAG.getDataLayout()), getValue(I.getArgOperand(0)))); return nullptr; - } case Intrinsic::eh_sjlj_callsite: { MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(0)); @@ -5247,17 +5333,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.setRoot(Op.getValue(1)); return nullptr; } - case Intrinsic::eh_sjlj_longjmp: { + case Intrinsic::eh_sjlj_longjmp: DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, sdl, MVT::Other, getRoot(), getValue(I.getArgOperand(0)))); return nullptr; - } - case Intrinsic::eh_sjlj_setup_dispatch: { + case Intrinsic::eh_sjlj_setup_dispatch: DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_SETUP_DISPATCH, sdl, MVT::Other, getRoot())); return nullptr; - } - case Intrinsic::masked_gather: visitMaskedGather(I); return nullptr; @@ -5430,6 +5513,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::experimental_constrained_fmul: case Intrinsic::experimental_constrained_fdiv: case Intrinsic::experimental_constrained_frem: + case Intrinsic::experimental_constrained_fma: case Intrinsic::experimental_constrained_sqrt: case Intrinsic::experimental_constrained_pow: case Intrinsic::experimental_constrained_powi: @@ -5534,11 +5618,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.setRoot(Res.getValue(1)); return nullptr; } - case Intrinsic::stackrestore: { + case Intrinsic::stackrestore: Res = getValue(I.getArgOperand(0)); DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, sdl, MVT::Other, getRoot(), Res)); return nullptr; - } case Intrinsic::get_dynamic_area_offset: { SDValue Op = getRoot(); EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); @@ -5557,7 +5640,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::stackguard: { EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); MachineFunction &MF = DAG.getMachineFunction(); - const Module &M = *MF.getFunction()->getParent(); + const Module &M = *MF.getFunction().getParent(); SDValue Chain = getRoot(); if (TLI.useLoadStackGuardNode()) { Res = getLoadStackGuard(DAG, sdl, Chain); @@ -5568,6 +5651,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { MachinePointerInfo(Global, 0), Align, MachineMemOperand::MOVolatile); } + if (TLI.useStackGuardXorFP()) + Res = TLI.emitStackGuardXorFP(DAG, Res, sdl); DAG.setRoot(Chain); setValue(&I, Res); return nullptr; @@ -5624,9 +5709,22 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; case Intrinsic::assume: case Intrinsic::var_annotation: - // Discard annotate attributes and assumptions + case Intrinsic::sideeffect: + // Discard annotate attributes, assumptions, and artificial side-effects. return nullptr; + case Intrinsic::codeview_annotation: { + // Emit a label associated with this metadata. + MachineFunction &MF = DAG.getMachineFunction(); + MCSymbol *Label = + MF.getMMI().getContext().createTempSymbol("annotation", true); + Metadata *MD = cast<MetadataAsValue>(I.getArgOperand(0))->getMetadata(); + MF.addCodeViewAnnotation(Label, cast<MDNode>(MD)); + Res = DAG.getLabelNode(ISD::ANNOTATION_LABEL, sdl, getRoot(), Label); + DAG.setRoot(Res); + return nullptr; + } + case Intrinsic::init_trampoline: { const Function *F = cast<Function>(I.getArgOperand(1)->stripPointerCasts()); @@ -5643,17 +5741,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.setRoot(Res); return nullptr; } - case Intrinsic::adjust_trampoline: { + case Intrinsic::adjust_trampoline: setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl, TLI.getPointerTy(DAG.getDataLayout()), getValue(I.getArgOperand(0)))); return nullptr; - } case Intrinsic::gcroot: { - MachineFunction &MF = DAG.getMachineFunction(); - const Function *F = MF.getFunction(); - (void)F; - assert(F->hasGC() && + assert(DAG.getMachineFunction().getFunction().hasGC() && "only valid in functions with gc specified, enforced by Verifier"); assert(GFI && "implied by previous"); const Value *Alloca = I.getArgOperand(0)->stripPointerCasts(); @@ -5670,11 +5764,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, sdl, MVT::i32)); return nullptr; - case Intrinsic::expect: { + case Intrinsic::expect: // Just replace __builtin_expect(exp, c) with EXP. setValue(&I, getValue(I.getArgOperand(0))); return nullptr; - } case Intrinsic::debugtrap: case Intrinsic::trap: { @@ -5728,6 +5821,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::prefetch: { SDValue Ops[5]; unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); + auto Flags = rw == 0 ? MachineMemOperand::MOLoad :MachineMemOperand::MOStore; Ops[0] = getRoot(); Ops[1] = getValue(I.getArgOperand(0)); Ops[2] = getValue(I.getArgOperand(1)); @@ -5738,9 +5832,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { EVT::getIntegerVT(*Context, 8), MachinePointerInfo(I.getArgOperand(0)), 0, /* align */ - false, /* volatile */ - rw==0, /* read */ - rw==1)); /* write */ + Flags)); return nullptr; } case Intrinsic::lifetime_start: @@ -5792,27 +5884,22 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::donothing: // ignore return nullptr; - case Intrinsic::experimental_stackmap: { + case Intrinsic::experimental_stackmap: visitStackmap(I); return nullptr; - } case Intrinsic::experimental_patchpoint_void: - case Intrinsic::experimental_patchpoint_i64: { + case Intrinsic::experimental_patchpoint_i64: visitPatchpoint(&I); return nullptr; - } - case Intrinsic::experimental_gc_statepoint: { + case Intrinsic::experimental_gc_statepoint: LowerStatepoint(ImmutableStatepoint(&I)); return nullptr; - } - case Intrinsic::experimental_gc_result: { + case Intrinsic::experimental_gc_result: visitGCResult(cast<GCResultInst>(I)); return nullptr; - } - case Intrinsic::experimental_gc_relocate: { + case Intrinsic::experimental_gc_relocate: visitGCRelocate(cast<GCRelocateInst>(I)); return nullptr; - } case Intrinsic::instrprof_increment: llvm_unreachable("instrprof failed to lower an increment"); case Intrinsic::instrprof_value_profile: @@ -5851,7 +5938,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // Get the symbol that defines the frame offset. auto *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts()); auto *Idx = cast<ConstantInt>(I.getArgOperand(2)); - unsigned IdxVal = unsigned(Idx->getLimitedValue(INT_MAX)); + unsigned IdxVal = + unsigned(Idx->getLimitedValue(std::numeric_limits<int>::max())); MCSymbol *FrameAllocSym = MF.getMMI().getContext().getOrCreateFrameAllocSymbol( GlobalValue::dropLLVMManglingEscape(Fn->getName()), IdxVal); @@ -5932,12 +6020,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::experimental_vector_reduce_umax: case Intrinsic::experimental_vector_reduce_umin: case Intrinsic::experimental_vector_reduce_fmax: - case Intrinsic::experimental_vector_reduce_fmin: { + case Intrinsic::experimental_vector_reduce_fmin: visitVectorReduce(I, Intrinsic); return nullptr; } - - } } void SelectionDAGBuilder::visitConstrainedFPIntrinsic( @@ -5961,6 +6047,9 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( case Intrinsic::experimental_constrained_frem: Opcode = ISD::STRICT_FREM; break; + case Intrinsic::experimental_constrained_fma: + Opcode = ISD::STRICT_FMA; + break; case Intrinsic::experimental_constrained_sqrt: Opcode = ISD::STRICT_FSQRT; break; @@ -6007,10 +6096,15 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( SDVTList VTs = DAG.getVTList(ValueVTs); SDValue Result; if (FPI.isUnaryOp()) - Result = DAG.getNode(Opcode, sdl, VTs, + Result = DAG.getNode(Opcode, sdl, VTs, { Chain, getValue(FPI.getArgOperand(0)) }); + else if (FPI.isTernaryOp()) + Result = DAG.getNode(Opcode, sdl, VTs, + { Chain, getValue(FPI.getArgOperand(0)), + getValue(FPI.getArgOperand(1)), + getValue(FPI.getArgOperand(2)) }); else - Result = DAG.getNode(Opcode, sdl, VTs, + Result = DAG.getNode(Opcode, sdl, VTs, { Chain, getValue(FPI.getArgOperand(0)), getValue(FPI.getArgOperand(1)) }); @@ -6081,7 +6175,7 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, if (MF.hasEHFunclets()) { assert(CLI.CS); WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo(); - EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CS->getInstruction()), + EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CS.getInstruction()), BeginLabel, EndLabel); } else { MF.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel); @@ -6189,7 +6283,6 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, SelectionDAGBuilder &Builder) { - // Check to see if this load can be trivially constant folded, e.g. if the // input is from a string literal. if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) { @@ -6553,10 +6646,10 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { // Check for well-known libc/libm calls. If the function is internal, it // can't be a library call. Don't do the check if marked as nobuiltin for - // some reason. + // some reason or the call site requires strict floating point semantics. LibFunc Func; - if (!I.isNoBuiltin() && !F->hasLocalLinkage() && F->hasName() && - LibInfo->getLibFunc(*F, Func) && + if (!I.isNoBuiltin() && !I.isStrictFP() && !F->hasLocalLinkage() && + F->hasName() && LibInfo->getLibFunc(*F, Func) && LibInfo->hasOptimizedCodeGen(Func)) { switch (Func) { default: break; @@ -6735,7 +6828,7 @@ public: RegsForValue AssignedRegs; explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info) - : TargetLowering::AsmOperandInfo(info), CallOperand(nullptr,0) { + : TargetLowering::AsmOperandInfo(info), CallOperand(nullptr, 0) { } /// Whether or not this operand accesses memory @@ -6767,7 +6860,7 @@ public: // If this is an indirect operand, the operand is a pointer to the // accessed type. if (isIndirect) { - llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy); + PointerType *PtrTy = dyn_cast<PointerType>(OpTy); if (!PtrTy) report_fatal_error("Indirect operand for inline asm not a pointer!"); OpTy = PtrTy->getElementType(); @@ -6799,7 +6892,7 @@ public: } }; -typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector; +using SDISelAsmOperandInfoVector = SmallVector<SDISelAsmOperandInfo, 16>; } // end anonymous namespace @@ -6879,7 +6972,6 @@ static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location, /// allocation. This produces generally horrible, but correct, code. /// /// OpInfo describes the operand. -/// static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI, const SDLoc &DL, SDISelAsmOperandInfo &OpInfo) { @@ -7013,6 +7105,8 @@ static bool createVirtualRegs(SmallVector<unsigned, 4> &Regs, unsigned NumRegs, return true; } +namespace { + class ExtraFlags { unsigned Flags = 0; @@ -7028,7 +7122,7 @@ public: Flags |= IA->getDialect() * InlineAsm::Extra_AsmDialect; } - void update(const llvm::TargetLowering::AsmOperandInfo &OpInfo) { + void update(const TargetLowering::AsmOperandInfo &OpInfo) { // Ideally, we would only check against memory constraints. However, the // meaning of an Other constraint can be target-specific and we can't easily // reason about it. Therefore, be conservative and set MayLoad/MayStore @@ -7047,8 +7141,9 @@ public: unsigned get() const { return Flags; } }; +} // end anonymous namespace + /// visitInlineAsm - Handle a call to an InlineAsm object. -/// void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); @@ -7207,13 +7302,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { RegsForValue RetValRegs; // IndirectStoresToEmit - The set of stores to emit after the inline asm node. - std::vector<std::pair<RegsForValue, Value*> > IndirectStoresToEmit; + std::vector<std::pair<RegsForValue, Value *>> IndirectStoresToEmit; for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; switch (OpInfo.Type) { - case InlineAsm::isOutput: { + case InlineAsm::isOutput: if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass && OpInfo.ConstraintType != TargetLowering::C_Register) { // Memory output, or 'other' output (e.g. 'X' constraint). @@ -7264,7 +7359,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { : InlineAsm::Kind_RegDef, false, 0, getCurSDLoc(), DAG, AsmNodeOperands); break; - } + case InlineAsm::isInput: { SDValue InOperandVal = OpInfo.CallOperand; @@ -7397,7 +7492,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { dl, DAG, AsmNodeOperands); break; } - case InlineAsm::isClobber: { + case InlineAsm::isClobber: // Add the clobbered value to the operand list, so that the register // allocator is aware that the physreg got clobbered. if (!OpInfo.AssignedRegs.Regs.empty()) @@ -7406,7 +7501,6 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { AsmNodeOperands); break; } - } } // Finish up input operands. Set the input chain and add the flag last. @@ -7453,7 +7547,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { return; } - std::vector<std::pair<SDValue, const Value *> > StoresToEmit; + std::vector<std::pair<SDValue, const Value *>> StoresToEmit; // Process indirect outputs, first output all of the flagged copies out of // physregs. @@ -7865,13 +7959,13 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I, switch (Intrinsic) { case Intrinsic::experimental_vector_reduce_fadd: - if (FMF.unsafeAlgebra()) + if (FMF.isFast()) Res = DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2); else Res = DAG.getNode(ISD::VECREDUCE_STRICT_FADD, dl, VT, Op1, Op2); break; case Intrinsic::experimental_vector_reduce_fmul: - if (FMF.unsafeAlgebra()) + if (FMF.isFast()) Res = DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2); else Res = DAG.getNode(ISD::VECREDUCE_STRICT_FMUL, dl, VT, Op1, Op2); @@ -7903,14 +7997,12 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I, case Intrinsic::experimental_vector_reduce_umin: Res = DAG.getNode(ISD::VECREDUCE_UMIN, dl, VT, Op1); break; - case Intrinsic::experimental_vector_reduce_fmax: { + case Intrinsic::experimental_vector_reduce_fmax: Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1, SDFlags); break; - } - case Intrinsic::experimental_vector_reduce_fmin: { + case Intrinsic::experimental_vector_reduce_fmin: Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1, SDFlags); break; - } default: llvm_unreachable("Unhandled vector reduce intrinsic"); } @@ -7955,10 +8047,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { uint64_t Offset = OldOffsets[i]; MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), RetVT); unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), RetVT); - unsigned RegisterVTSize = RegisterVT.getSizeInBits(); + unsigned RegisterVTByteSZ = RegisterVT.getSizeInBits() / 8; RetTys.append(NumRegs, RegisterVT); for (unsigned j = 0; j != NumRegs; ++j) - Offsets.push_back(Offset + j * RegisterVTSize); + Offsets.push_back(Offset + j * RegisterVTByteSZ); } } @@ -7996,6 +8088,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Entry.IsSwiftError = false; Entry.Alignment = Align; CLI.getArgs().insert(CLI.getArgs().begin(), Entry); + CLI.NumFixedArgs += 1; CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext()); // sret demotion isn't compatible with tail-calls, since the sret argument @@ -8148,8 +8241,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { } getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT, - CLI.CS ? CLI.CS->getInstruction() : nullptr, ExtendKind, - true); + CLI.CS.getInstruction(), ExtendKind, true); for (unsigned j = 0; j != NumParts; ++j) { // if it isn't first piece, alignment must be 1 @@ -8209,7 +8301,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { // The instruction result is the result of loading from the // hidden sret parameter. SmallVector<EVT, 1> PVTs; - Type *PtrRetTy = PointerType::getUnqual(OrigRetTy); + Type *PtrRetTy = OrigRetTy->getPointerTo(DL.getAllocaAddrSpace()); ComputeValueVTs(*this, DL, PtrRetTy, PVTs); assert(PVTs.size() == 1 && "Pointers should fit in one register"); @@ -8326,9 +8418,9 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) { return true; } -typedef DenseMap<const Argument *, - std::pair<const AllocaInst *, const StoreInst *>> - ArgCopyElisionMapTy; +using ArgCopyElisionMapTy = + DenseMap<const Argument *, + std::pair<const AllocaInst *, const StoreInst *>>; /// Scan the entry block of the function in FuncInfo for arguments that look /// like copies into a local alloca. Record any copied arguments in @@ -8503,7 +8595,9 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // Put in an sret pointer parameter before all the other parameters. SmallVector<EVT, 1> ValueVTs; ComputeValueVTs(*TLI, DAG.getDataLayout(), - PointerType::getUnqual(F.getReturnType()), ValueVTs); + F.getReturnType()->getPointerTo( + DAG.getDataLayout().getAllocaAddrSpace()), + ValueVTs); // NOTE: Assuming that a pointer will never break down to more than one VT // or one register. @@ -8657,7 +8751,9 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // from the sret argument into it. SmallVector<EVT, 1> ValueVTs; ComputeValueVTs(*TLI, DAG.getDataLayout(), - PointerType::getUnqual(F.getReturnType()), ValueVTs); + F.getReturnType()->getPointerTo( + DAG.getDataLayout().getAllocaAddrSpace()), + ValueVTs); MVT VT = ValueVTs[0].getSimpleVT(); MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT); Optional<ISD::NodeType> AssertOp = None; @@ -8749,11 +8845,19 @@ void SelectionDAGISel::LowerArguments(const Function &F) { SDB->setValue(&Arg, Res); if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) { + // We want to associate the argument with the frame index, among + // involved operands, that correspond to the lowest address. The + // getCopyFromParts function, called earlier, is swapping the order of + // the operands to BUILD_PAIR depending on endianness. The result of + // that swapping is that the least significant bits of the argument will + // be in the first operand of the BUILD_PAIR node, and the most + // significant bits will be in the second operand. + unsigned LowAddressOp = DAG.getDataLayout().isBigEndian() ? 1 : 0; if (LoadSDNode *LNode = - dyn_cast<LoadSDNode>(Res.getOperand(0).getNode())) + dyn_cast<LoadSDNode>(Res.getOperand(LowAddressOp).getNode())) if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) - FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex()); + FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex()); } // Update the SwiftErrorVRegDefMap. @@ -8813,7 +8917,6 @@ void SelectionDAGISel::LowerArguments(const Function &F) { /// directly add them, because expansion might result in multiple MBB's for one /// BB. As such, the start of the BB might correspond to a different MBB than /// the end. -/// void SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { const TerminatorInst *TI = LLVMBB->getTerminator(); @@ -9249,10 +9352,12 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters, BitTestInfo BTI; std::sort(CBV.begin(), CBV.end(), [](const CaseBits &a, const CaseBits &b) { - // Sort by probability first, number of bits second. + // Sort by probability first, number of bits second, bit mask third. if (a.ExtraProb != b.ExtraProb) return a.ExtraProb > b.ExtraProb; - return a.Bits > b.Bits; + if (a.Bits != b.Bits) + return a.Bits > b.Bits; + return a.Mask < b.Mask; }); for (auto &CB : CBV) { @@ -9441,10 +9546,15 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, } if (TM.getOptLevel() != CodeGenOpt::None) { - // Order cases by probability so the most likely case will be checked first. + // Here, we order cases by probability so the most likely case will be + // checked first. However, two clusters can have the same probability in + // which case their relative ordering is non-deterministic. So we use Low + // as a tie-breaker as clusters are guaranteed to never overlap. std::sort(W.FirstCluster, W.LastCluster + 1, [](const CaseCluster &a, const CaseCluster &b) { - return a.Prob > b.Prob; + return a.Prob != b.Prob ? + a.Prob > b.Prob : + a.Low->getValue().slt(b.Low->getValue()); }); // Rearrange the case blocks so that the last one falls through if possible @@ -9570,8 +9680,8 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, } // The false probability is the sum of all unhandled cases. - CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, I->Prob, - UnhandledProbs); + CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, + getCurSDLoc(), I->Prob, UnhandledProbs); if (CurMBB == SwitchMBB) visitSwitchCase(CB, SwitchMBB); @@ -9627,7 +9737,7 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, I++; } - for (;;) { + while (true) { // Our binary search tree differs from a typical BST in that ours can have up // to three values in each leaf. The pivot selection above doesn't take that // into account, which means the tree might require more nodes and be less @@ -9722,7 +9832,7 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, // Create the CaseBlock record that will be used to lower the branch. CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB, - LeftProb, RightProb); + getCurSDLoc(), LeftProb, RightProb); if (W.MBB == SwitchMBB) visitSwitchCase(CB, SwitchMBB); @@ -9730,6 +9840,76 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, SwitchCases.push_back(CB); } +// Scale CaseProb after peeling a case with the probablity of PeeledCaseProb +// from the swith statement. +static BranchProbability scaleCaseProbality(BranchProbability CaseProb, + BranchProbability PeeledCaseProb) { + if (PeeledCaseProb == BranchProbability::getOne()) + return BranchProbability::getZero(); + BranchProbability SwitchProb = PeeledCaseProb.getCompl(); + + uint32_t Numerator = CaseProb.getNumerator(); + uint32_t Denominator = SwitchProb.scale(CaseProb.getDenominator()); + return BranchProbability(Numerator, std::max(Numerator, Denominator)); +} + +// Try to peel the top probability case if it exceeds the threshold. +// Return current MachineBasicBlock for the switch statement if the peeling +// does not occur. +// If the peeling is performed, return the newly created MachineBasicBlock +// for the peeled switch statement. Also update Clusters to remove the peeled +// case. PeeledCaseProb is the BranchProbability for the peeled case. +MachineBasicBlock *SelectionDAGBuilder::peelDominantCaseCluster( + const SwitchInst &SI, CaseClusterVector &Clusters, + BranchProbability &PeeledCaseProb) { + MachineBasicBlock *SwitchMBB = FuncInfo.MBB; + // Don't perform if there is only one cluster or optimizing for size. + if (SwitchPeelThreshold > 100 || !FuncInfo.BPI || Clusters.size() < 2 || + TM.getOptLevel() == CodeGenOpt::None || + SwitchMBB->getParent()->getFunction().optForMinSize()) + return SwitchMBB; + + BranchProbability TopCaseProb = BranchProbability(SwitchPeelThreshold, 100); + unsigned PeeledCaseIndex = 0; + bool SwitchPeeled = false; + for (unsigned Index = 0; Index < Clusters.size(); ++Index) { + CaseCluster &CC = Clusters[Index]; + if (CC.Prob < TopCaseProb) + continue; + TopCaseProb = CC.Prob; + PeeledCaseIndex = Index; + SwitchPeeled = true; + } + if (!SwitchPeeled) + return SwitchMBB; + + DEBUG(dbgs() << "Peeled one top case in switch stmt, prob: " << TopCaseProb + << "\n"); + + // Record the MBB for the peeled switch statement. + MachineFunction::iterator BBI(SwitchMBB); + ++BBI; + MachineBasicBlock *PeeledSwitchMBB = + FuncInfo.MF->CreateMachineBasicBlock(SwitchMBB->getBasicBlock()); + FuncInfo.MF->insert(BBI, PeeledSwitchMBB); + + ExportFromCurrentBlock(SI.getCondition()); + auto PeeledCaseIt = Clusters.begin() + PeeledCaseIndex; + SwitchWorkListItem W = {SwitchMBB, PeeledCaseIt, PeeledCaseIt, + nullptr, nullptr, TopCaseProb.getCompl()}; + lowerWorkItem(W, SI.getCondition(), SwitchMBB, PeeledSwitchMBB); + + Clusters.erase(PeeledCaseIt); + for (CaseCluster &CC : Clusters) { + DEBUG(dbgs() << "Scale the probablity for one cluster, before scaling: " + << CC.Prob << "\n"); + CC.Prob = scaleCaseProbality(CC.Prob, TopCaseProb); + DEBUG(dbgs() << "After scaling: " << CC.Prob << "\n"); + } + PeeledCaseProb = TopCaseProb; + return PeeledSwitchMBB; +} + void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { // Extract cases from the switch. BranchProbabilityInfo *BPI = FuncInfo.BPI; @@ -9783,9 +9963,15 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { } } + // The branch probablity of the peeled case. + BranchProbability PeeledCaseProb = BranchProbability::getZero(); + MachineBasicBlock *PeeledSwitchMBB = + peelDominantCaseCluster(SI, Clusters, PeeledCaseProb); + // If there is only the default destination, jump there directly. MachineBasicBlock *SwitchMBB = FuncInfo.MBB; if (Clusters.empty()) { + assert(PeeledSwitchMBB == SwitchMBB); SwitchMBB->addSuccessor(DefaultMBB); if (DefaultMBB != NextBlock(SwitchMBB)) { DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, @@ -9817,8 +10003,14 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { SwitchWorkList WorkList; CaseClusterIt First = Clusters.begin(); CaseClusterIt Last = Clusters.end() - 1; - auto DefaultProb = getEdgeProbability(SwitchMBB, DefaultMBB); - WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr, DefaultProb}); + auto DefaultProb = getEdgeProbability(PeeledSwitchMBB, DefaultMBB); + // Scale the branchprobability for DefaultMBB if the peel occurs and + // DefaultMBB is not replaced. + if (PeeledCaseProb != BranchProbability::getZero() && + DefaultMBB == FuncInfo.MBBMap[SI.getDefaultDest()]) + DefaultProb = scaleCaseProbality(DefaultProb, PeeledCaseProb); + WorkList.push_back( + {PeeledSwitchMBB, First, Last, nullptr, nullptr, DefaultProb}); while (!WorkList.empty()) { SwitchWorkListItem W = WorkList.back(); @@ -9826,7 +10018,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { unsigned NumClusters = W.LastCluster - W.FirstCluster + 1; if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None && - !DefaultMBB->getParent()->getFunction()->optForMinSize()) { + !DefaultMBB->getParent()->getFunction().optForMinSize()) { // For optimized builds, lower large range as a balanced binary tree. splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB); continue; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index ac1d6aae65a52..9e7c2bc6821bf 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -1,4 +1,4 @@ -//===-- SelectionDAGBuilder.h - Selection-DAG building --------*- C++ -*---===// +//===- SelectionDAGBuilder.h - Selection-DAG building -----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -16,67 +16,75 @@ #include "StatepointLowering.h" #include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/CallSite.h" -#include "llvm/IR/Constants.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Statepoint.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/CodeGen.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetLowering.h" +#include <algorithm> +#include <cassert> +#include <cstdint> #include <utility> #include <vector> namespace llvm { -class AddrSpaceCastInst; class AllocaInst; +class AtomicCmpXchgInst; +class AtomicRMWInst; class BasicBlock; -class BitCastInst; class BranchInst; class CallInst; +class CatchPadInst; +class CatchReturnInst; +class CatchSwitchInst; +class CleanupPadInst; +class CleanupReturnInst; +class Constant; +class ConstantInt; +class ConstrainedFPIntrinsic; class DbgValueInst; -class ExtractElementInst; -class FCmpInst; -class FPExtInst; -class FPToSIInst; -class FPToUIInst; -class FPTruncInst; -class Function; +class DataLayout; +class DIExpression; +class DILocalVariable; +class DILocation; +class FenceInst; class FunctionLoweringInfo; -class GetElementPtrInst; class GCFunctionInfo; -class ICmpInst; -class IntToPtrInst; +class GCRelocateInst; +class GCResultInst; class IndirectBrInst; class InvokeInst; -class InsertElementInst; -class Instruction; +class LandingPadInst; +class LLVMContext; class LoadInst; class MachineBasicBlock; -class MachineInstr; -class MachineRegisterInfo; -class MDNode; -class MVT; class PHINode; -class PtrToIntInst; +class ResumeInst; class ReturnInst; class SDDbgValue; -class SExtInst; -class SelectInst; -class ShuffleVectorInst; -class SIToFPInst; class StoreInst; class SwitchInst; -class DataLayout; class TargetLibraryInfo; -class TargetLowering; -class TruncInst; -class UIToFPInst; -class UnreachableInst; +class TargetMachine; +class Type; class VAArgInst; -class ZExtInst; +class UnreachableInst; +class Use; +class User; +class Value; //===----------------------------------------------------------------------===// /// SelectionDAGBuilder - This is the common target-independent lowering @@ -84,7 +92,7 @@ class ZExtInst; /// class SelectionDAGBuilder { /// CurInst - The current instruction being visited - const Instruction *CurInst; + const Instruction *CurInst = nullptr; DenseMap<const Value*, SDValue> NodeMap; @@ -94,13 +102,15 @@ class SelectionDAGBuilder { /// DanglingDebugInfo - Helper type for DanglingDebugInfoMap. class DanglingDebugInfo { - const DbgValueInst* DI; + const DbgValueInst* DI = nullptr; DebugLoc dl; - unsigned SDNodeOrder; + unsigned SDNodeOrder = 0; + public: - DanglingDebugInfo() : DI(nullptr), dl(DebugLoc()), SDNodeOrder(0) { } + DanglingDebugInfo() = default; DanglingDebugInfo(const DbgValueInst *di, DebugLoc DL, unsigned SDNO) : DI(di), dl(std::move(DL)), SDNodeOrder(SDNO) {} + const DbgValueInst* getDI() { return DI; } DebugLoc getdl() { return dl; } unsigned getSDNodeOrder() { return SDNodeOrder; } @@ -120,8 +130,8 @@ public: /// State used while lowering a statepoint sequence (gc_statepoint, /// gc_relocate, and gc_result). See StatepointLowering.hpp/cpp for details. StatepointLoweringState StatepointLowering; -private: +private: /// PendingExports - CopyToReg nodes that copy values to virtual registers /// for export to other blocks need to be emitted before any terminator /// instruction, but they have no other ordering requirements. We bunch them @@ -189,23 +199,22 @@ private: } }; - typedef std::vector<CaseCluster> CaseClusterVector; - typedef CaseClusterVector::iterator CaseClusterIt; + using CaseClusterVector = std::vector<CaseCluster>; + using CaseClusterIt = CaseClusterVector::iterator; struct CaseBits { - uint64_t Mask; - MachineBasicBlock* BB; - unsigned Bits; + uint64_t Mask = 0; + MachineBasicBlock* BB = nullptr; + unsigned Bits = 0; BranchProbability ExtraProb; + CaseBits() = default; CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits, BranchProbability Prob): - Mask(mask), BB(bb), Bits(bits), ExtraProb(Prob) { } - - CaseBits() : Mask(0), BB(nullptr), Bits(0) {} + Mask(mask), BB(bb), Bits(bits), ExtraProb(Prob) {} }; - typedef std::vector<CaseBits> CaseBitsVector; + using CaseBitsVector = std::vector<CaseBits>; /// Sort Clusters and merge adjacent cases. void sortAndRangeify(CaseClusterVector &Clusters); @@ -214,15 +223,6 @@ private: /// SelectionDAGBuilder and SDISel for the code generation of additional basic /// blocks needed by multi-case switch statements. struct CaseBlock { - CaseBlock(ISD::CondCode cc, const Value *cmplhs, const Value *cmprhs, - const Value *cmpmiddle, MachineBasicBlock *truebb, - MachineBasicBlock *falsebb, MachineBasicBlock *me, - BranchProbability trueprob = BranchProbability::getUnknown(), - BranchProbability falseprob = BranchProbability::getUnknown()) - : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs), - TrueBB(truebb), FalseBB(falsebb), ThisBB(me), TrueProb(trueprob), - FalseProb(falseprob) {} - // CC - the condition code to use for the case block's setcc node ISD::CondCode CC; @@ -237,14 +237,25 @@ private: // ThisBB - the block into which to emit the code for the setcc and branches MachineBasicBlock *ThisBB; + /// The debug location of the instruction this CaseBlock was + /// produced from. + SDLoc DL; + // TrueProb/FalseProb - branch weights. BranchProbability TrueProb, FalseProb; + + CaseBlock(ISD::CondCode cc, const Value *cmplhs, const Value *cmprhs, + const Value *cmpmiddle, MachineBasicBlock *truebb, + MachineBasicBlock *falsebb, MachineBasicBlock *me, + SDLoc dl, + BranchProbability trueprob = BranchProbability::getUnknown(), + BranchProbability falseprob = BranchProbability::getUnknown()) + : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs), + TrueBB(truebb), FalseBB(falsebb), ThisBB(me), DL(dl), + TrueProb(trueprob), FalseProb(falseprob) {} }; struct JumpTable { - JumpTable(unsigned R, unsigned J, MachineBasicBlock *M, - MachineBasicBlock *D): Reg(R), JTI(J), MBB(M), Default(D) {} - /// Reg - the virtual register containing the index of the jump table entry //. to jump to. unsigned Reg; @@ -255,39 +266,38 @@ private: /// Default - the MBB of the default bb, which is a successor of the range /// check MBB. This is when updating PHI nodes in successors. MachineBasicBlock *Default; + + JumpTable(unsigned R, unsigned J, MachineBasicBlock *M, + MachineBasicBlock *D): Reg(R), JTI(J), MBB(M), Default(D) {} }; struct JumpTableHeader { - JumpTableHeader(APInt F, APInt L, const Value *SV, MachineBasicBlock *H, - bool E = false) - : First(std::move(F)), Last(std::move(L)), SValue(SV), HeaderBB(H), - Emitted(E) {} APInt First; APInt Last; const Value *SValue; MachineBasicBlock *HeaderBB; bool Emitted; + + JumpTableHeader(APInt F, APInt L, const Value *SV, MachineBasicBlock *H, + bool E = false) + : First(std::move(F)), Last(std::move(L)), SValue(SV), HeaderBB(H), + Emitted(E) {} }; - typedef std::pair<JumpTableHeader, JumpTable> JumpTableBlock; + using JumpTableBlock = std::pair<JumpTableHeader, JumpTable>; struct BitTestCase { - BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr, - BranchProbability Prob): - Mask(M), ThisBB(T), TargetBB(Tr), ExtraProb(Prob) { } uint64_t Mask; MachineBasicBlock *ThisBB; MachineBasicBlock *TargetBB; BranchProbability ExtraProb; + + BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr, + BranchProbability Prob): + Mask(M), ThisBB(T), TargetBB(Tr), ExtraProb(Prob) {} }; - typedef SmallVector<BitTestCase, 3> BitTestInfo; + using BitTestInfo = SmallVector<BitTestCase, 3>; struct BitTestBlock { - BitTestBlock(APInt F, APInt R, const Value *SV, unsigned Rg, MVT RgVT, - bool E, bool CR, MachineBasicBlock *P, MachineBasicBlock *D, - BitTestInfo C, BranchProbability Pr) - : First(std::move(F)), Range(std::move(R)), SValue(SV), Reg(Rg), - RegVT(RgVT), Emitted(E), ContiguousRange(CR), Parent(P), Default(D), - Cases(std::move(C)), Prob(Pr) {} APInt First; APInt Range; const Value *SValue; @@ -300,6 +310,13 @@ private: BitTestInfo Cases; BranchProbability Prob; BranchProbability DefaultProb; + + BitTestBlock(APInt F, APInt R, const Value *SV, unsigned Rg, MVT RgVT, + bool E, bool CR, MachineBasicBlock *P, MachineBasicBlock *D, + BitTestInfo C, BranchProbability Pr) + : First(std::move(F)), Range(std::move(R)), SValue(SV), Reg(Rg), + RegVT(RgVT), Emitted(E), ContiguousRange(CR), Parent(P), Default(D), + Cases(std::move(C)), Prob(Pr) {} }; /// Return the range of value in [First..Last]. @@ -336,7 +353,7 @@ private: const ConstantInt *LT; BranchProbability DefaultProb; }; - typedef SmallVector<SwitchWorkListItem, 4> SwitchWorkList; + using SwitchWorkList = SmallVector<SwitchWorkListItem, 4>; /// Determine the rank by weight of CC in [First,Last]. If CC has more weight /// than each cluster in the range, its rank is 0. @@ -352,6 +369,10 @@ private: MachineBasicBlock *SwitchMBB, MachineBasicBlock *DefaultMBB); + /// Peel the top probability case if it exceeds the threshold + MachineBasicBlock *peelDominantCaseCluster(const SwitchInst &SI, + CaseClusterVector &Clusters, + BranchProbability &PeeledCaseProb); /// A class which encapsulates all of the information needed to generate a /// stack protector check and signals to isel via its state being initialized @@ -466,8 +487,7 @@ private: /// the same function, use the same failure basic block). class StackProtectorDescriptor { public: - StackProtectorDescriptor() - : ParentMBB(nullptr), SuccessMBB(nullptr), FailureMBB(nullptr) {} + StackProtectorDescriptor() = default; /// Returns true if all fields of the stack protector descriptor are /// initialized implying that we should/are ready to emit a stack protector. @@ -533,15 +553,15 @@ private: /// replace it with a compare/branch to the successor mbbs /// SuccessMBB/FailureMBB depending on whether or not the stack protector /// was violated. - MachineBasicBlock *ParentMBB; + MachineBasicBlock *ParentMBB = nullptr; /// A basic block visited on stack protector check success that contains the /// terminators of ParentMBB. - MachineBasicBlock *SuccessMBB; + MachineBasicBlock *SuccessMBB = nullptr; /// This basic block visited on stack protector check failure that will /// contain a call to __stack_chk_fail(). - MachineBasicBlock *FailureMBB; + MachineBasicBlock *FailureMBB = nullptr; /// Add a successor machine basic block to ParentMBB. If the successor mbb /// has not been created yet (i.e. if SuccMBB = 0), then the machine basic @@ -554,25 +574,29 @@ private: private: const TargetMachine &TM; + public: /// Lowest valid SDNodeOrder. The special case 0 is reserved for scheduling /// nodes without a corresponding SDNode. static const unsigned LowestSDNodeOrder = 1; SelectionDAG &DAG; - const DataLayout *DL; - AliasAnalysis *AA; + const DataLayout *DL = nullptr; + AliasAnalysis *AA = nullptr; const TargetLibraryInfo *LibInfo; /// SwitchCases - Vector of CaseBlock structures used to communicate /// SwitchInst code generation information. std::vector<CaseBlock> SwitchCases; + /// JTCases - Vector of JumpTable structures used to communicate /// SwitchInst code generation information. std::vector<JumpTableBlock> JTCases; + /// BitTestCases - Vector of BitTestBlock structures used to communicate /// SwitchInst code generation information. std::vector<BitTestBlock> BitTestCases; + /// A StackProtectorDescriptor structure used to communicate stack protector /// information in between SelectBasicBlock and FinishBasicBlock. StackProtectorDescriptor SPDescriptor; @@ -589,22 +613,19 @@ public: GCFunctionInfo *GFI; /// LPadToCallSiteMap - Map a landing pad to the call site indexes. - DenseMap<MachineBasicBlock*, SmallVector<unsigned, 4> > LPadToCallSiteMap; + DenseMap<MachineBasicBlock *, SmallVector<unsigned, 4>> LPadToCallSiteMap; /// HasTailCall - This is set to true if a call in the current /// block has been translated as a tail call. In this case, /// no subsequent DAG nodes should be created. - /// - bool HasTailCall; + bool HasTailCall = false; LLVMContext *Context; SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo, CodeGenOpt::Level ol) - : CurInst(nullptr), SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()), - DAG(dag), DL(nullptr), AA(nullptr), FuncInfo(funcinfo), - HasTailCall(false) { - } + : SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()), DAG(dag), + FuncInfo(funcinfo) {} void init(GCFunctionInfo *gfi, AliasAnalysis *AA, const TargetLibraryInfo *li); @@ -653,6 +674,7 @@ public: // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V, // generate the debug data structures now that we've seen its definition. void resolveDanglingDebugInfo(const Value *V, SDValue Val); + SDValue getValue(const Value *V); bool findValue(const Value *V) const; @@ -923,13 +945,12 @@ private: void emitInlineAsmError(ImmutableCallSite CS, const Twine &Message); - /// EmitFuncArgumentDbgValue - If V is an function argument then create - /// corresponding DBG_VALUE machine instruction for it now. At the end of - /// instruction selection, they will be inserted to the entry BB. + /// If V is an function argument then create corresponding DBG_VALUE machine + /// instruction for it now. At the end of instruction selection, they will be + /// inserted to the entry BB. bool EmitFuncArgumentDbgValue(const Value *V, DILocalVariable *Variable, DIExpression *Expr, DILocation *DL, - int64_t Offset, bool IsDbgDeclare, - const SDValue &N); + bool IsDbgDeclare, const SDValue &N); /// Return the next block after MBB, or nullptr if there is none. MachineBasicBlock *NextBlock(MachineBasicBlock *MBB); @@ -940,8 +961,8 @@ private: /// Return the appropriate SDDbgValue based on N. SDDbgValue *getDbgValue(SDValue N, DILocalVariable *Variable, - DIExpression *Expr, int64_t Offset, - const DebugLoc &dl, unsigned DbgSDNodeOrder); + DIExpression *Expr, const DebugLoc &dl, + unsigned DbgSDNodeOrder); }; /// RegsForValue - This struct represents the registers (physical or virtual) @@ -978,13 +999,11 @@ struct RegsForValue { /// Records if this value needs to be treated in an ABI dependant manner, /// different to normal type legalization. - bool IsABIMangled; - - RegsForValue(); + bool IsABIMangled = false; + RegsForValue() = default; RegsForValue(const SmallVector<unsigned, 4> ®s, MVT regvt, EVT valuevt, bool IsABIMangledValue = false); - RegsForValue(LLVMContext &Context, const TargetLowering &TLI, const DataLayout &DL, unsigned Reg, Type *Ty, bool IsABIMangledValue = false); @@ -1024,4 +1043,4 @@ struct RegsForValue { } // end namespace llvm -#endif +#endif // LLVM_LIB_CODEGEN_SELECTIONDAG_SELECTIONDAGBUILDER_H diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 3dd58975b1f10..dd30dc16378c4 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -1,4 +1,4 @@ -//===-- SelectionDAGDumper.cpp - Implement SelectionDAG::dump() -----------===// +//===- SelectionDAGDumper.cpp - Implement SelectionDAG::dump() ------------===// // // The LLVM Compiler Infrastructure // @@ -11,24 +11,42 @@ // //===----------------------------------------------------------------------===// -#include "ScheduleDAGSDNodes.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/CodeGen/ISDOpcodes.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Printable.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include <cstdint> +#include <iterator> + using namespace llvm; static cl::opt<bool> @@ -385,6 +403,7 @@ static Printable PrintNodeId(const SDNode &Node) { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void SDNode::dump() const { dump(nullptr); } + LLVM_DUMP_METHOD void SDNode::dump(const SelectionDAG *G) const { print(dbgs(), G); dbgs() << '\n'; @@ -402,6 +421,36 @@ void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const { } void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { + if (getFlags().hasNoUnsignedWrap()) + OS << " nuw"; + + if (getFlags().hasNoSignedWrap()) + OS << " nsw"; + + if (getFlags().hasExact()) + OS << " exact"; + + if (getFlags().hasUnsafeAlgebra()) + OS << " unsafe"; + + if (getFlags().hasNoNaNs()) + OS << " nnan"; + + if (getFlags().hasNoInfs()) + OS << " ninf"; + + if (getFlags().hasNoSignedZeros()) + OS << " nsz"; + + if (getFlags().hasAllowReciprocal()) + OS << " arcp"; + + if (getFlags().hasAllowContract()) + OS << " contract"; + + if (getFlags().hasVectorReduction()) + OS << " vector-reduction"; + if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) { if (!MN->memoperands_empty()) { OS << "<"; @@ -429,9 +478,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { } else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) { OS << '<' << CSDN->getAPIntValue() << '>'; } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) { - if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle()) + if (&CSDN->getValueAPF().getSemantics() == &APFloat::IEEEsingle()) OS << '<' << CSDN->getValueAPF().convertToFloat() << '>'; - else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble()) + else if (&CSDN->getValueAPF().getSemantics() == &APFloat::IEEEdouble()) OS << '<' << CSDN->getValueAPF().convertToDouble() << '>'; else { OS << "<APFloat("; @@ -479,7 +528,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { OS << LBB->getName() << " "; OS << (const void*)BBDN->getBasicBlock() << ">"; } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) { - OS << ' ' << PrintReg(R->getReg(), + OS << ' ' << printReg(R->getReg(), G ? G->getSubtarget().getRegisterInfo() : nullptr); } else if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(this)) { @@ -640,7 +689,8 @@ static bool printOperand(raw_ostream &OS, const SelectionDAG *G, } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -typedef SmallPtrSet<const SDNode *, 32> VisitedSDNodeSet; +using VisitedSDNodeSet = SmallPtrSet<const SDNode *, 32>; + static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent, const SelectionDAG *G, VisitedSDNodeSet &once) { if (!once.insert(N).second) // If we've been here before, return now. diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index bdf57e8058426..18f6997ef83c3 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/SelectionDAGISel.h" #include "ScheduleDAGSDNodes.h" #include "SelectionDAGBuilder.h" #include "llvm/ADT/APInt.h" @@ -26,7 +27,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" @@ -45,9 +46,12 @@ #include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/StackProtector.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" @@ -80,13 +84,9 @@ #include "llvm/Support/KnownBits.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" -#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include <algorithm> #include <cassert> @@ -212,7 +212,7 @@ namespace llvm { IS.OptLevel = NewOptLevel; IS.TM.setOptLevel(NewOptLevel); DEBUG(dbgs() << "\nChanging optimization level for Function " - << IS.MF->getFunction()->getName() << "\n"); + << IS.MF->getFunction().getName() << "\n"); DEBUG(dbgs() << "\tBefore: -O" << SavedOptLevel << " ; After: -O" << NewOptLevel << "\n"); SavedFastISel = IS.TM.Options.EnableFastISel; @@ -228,7 +228,7 @@ namespace llvm { if (IS.OptLevel == SavedOptLevel) return; DEBUG(dbgs() << "\nRestoring optimization level for Function " - << IS.MF->getFunction()->getName() << "\n"); + << IS.MF->getFunction().getName() << "\n"); DEBUG(dbgs() << "\tBefore: -O" << IS.OptLevel << " ; After: -O" << SavedOptLevel << "\n"); IS.OptLevel = SavedOptLevel; @@ -384,7 +384,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { assert((!EnableFastISelAbort || TM.Options.EnableFastISel) && "-fast-isel-abort > 0 requires -fast-isel"); - const Function &Fn = *mf.getFunction(); + const Function &Fn = mf.getFunction(); MF = &mf; // Reset the target options before resetting the optimization @@ -414,7 +414,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { SplitCriticalSideEffectEdges(const_cast<Function &>(Fn), DT, LI); - CurDAG->init(*MF, *ORE); + CurDAG->init(*MF, *ORE, this); FuncInfo->set(Fn, *MF, CurDAG); // Now get the optional analyzes if we want to. @@ -494,10 +494,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { DenseMap<unsigned, unsigned> LiveInMap; if (!FuncInfo->ArgDbgValues.empty()) - for (MachineRegisterInfo::livein_iterator LI = RegInfo->livein_begin(), - E = RegInfo->livein_end(); LI != E; ++LI) - if (LI->second) - LiveInMap.insert(std::make_pair(LI->first, LI->second)); + for (std::pair<unsigned, unsigned> LI : RegInfo->liveins()) + if (LI.second) + LiveInMap.insert(LI); // Insert DBG_VALUE instructions for function arguments to the entry block. for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) { @@ -529,12 +528,14 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { const MDNode *Expr = MI->getDebugExpression(); DebugLoc DL = MI->getDebugLoc(); bool IsIndirect = MI->isIndirectDebugValue(); - unsigned Offset = IsIndirect ? MI->getOperand(1).getImm() : 0; + if (IsIndirect) + assert(MI->getOperand(1).getImm() == 0 && + "DBG_VALUE with nonzero offset"); assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); // Def is never a terminator here, so it is ok to increment InsertPos. BuildMI(*EntryMBB, ++InsertPos, DL, TII->get(TargetOpcode::DBG_VALUE), - IsIndirect, LDI->second, Offset, Variable, Expr); + IsIndirect, LDI->second, Variable, Expr); // If this vreg is directly copied into an exported register then // that COPY instructions also need DBG_VALUE, if it is the only @@ -556,7 +557,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // declared, rather than whatever is attached to CopyUseMI. MachineInstr *NewMI = BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect, - CopyUseMI->getOperand(0).getReg(), Offset, Variable, Expr); + CopyUseMI->getOperand(0).getReg(), Variable, Expr); MachineBasicBlock::iterator Pos = CopyUseMI; EntryMBB->insertAfter(Pos, NewMI); } @@ -644,6 +645,9 @@ static void reportFastISelFailure(MachineFunction &MF, void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin, BasicBlock::const_iterator End, bool &HadTailCall) { + // Allow creating illegal types during DAG building for the basic block. + CurDAG->NewNodesMustHaveLegalTypes = false; + // Lower the instructions. If a call is emitted as a tail call, cease emitting // nodes for this block. for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I) { @@ -726,8 +730,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { BlockName = (MF->getName() + ":" + FuncInfo->MBB->getBasicBlock()->getName()).str(); } - DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber - << " '" << BlockName << "'\n"; CurDAG->dump()); + DEBUG(dbgs() << "Initial selection DAG: " << printMBBReference(*FuncInfo->MBB) + << " '" << BlockName << "'\n"; + CurDAG->dump()); if (ViewDAGCombine1 && MatchFilterBB) CurDAG->viewGraph("dag-combine1 input for " + BlockName); @@ -739,8 +744,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(BeforeLegalizeTypes, AA, OptLevel); } - DEBUG(dbgs() << "Optimized lowered selection DAG: BB#" << BlockNumber - << " '" << BlockName << "'\n"; CurDAG->dump()); + DEBUG(dbgs() << "Optimized lowered selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); // Second step, hack on the DAG until it only uses operations and types that // the target supports. @@ -754,8 +761,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { Changed = CurDAG->LegalizeTypes(); } - DEBUG(dbgs() << "Type-legalized selection DAG: BB#" << BlockNumber - << " '" << BlockName << "'\n"; CurDAG->dump()); + DEBUG(dbgs() << "Type-legalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); // Only allow creation of legal node types. CurDAG->NewNodesMustHaveLegalTypes = true; @@ -771,8 +780,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(AfterLegalizeTypes, AA, OptLevel); } - DEBUG(dbgs() << "Optimized type-legalized selection DAG: BB#" << BlockNumber - << " '" << BlockName << "'\n"; CurDAG->dump()); + DEBUG(dbgs() << "Optimized type-legalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); } { @@ -782,8 +793,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { } if (Changed) { - DEBUG(dbgs() << "Vector-legalized selection DAG: BB#" << BlockNumber - << " '" << BlockName << "'\n"; CurDAG->dump()); + DEBUG(dbgs() << "Vector-legalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); { NamedRegionTimer T("legalize_types2", "Type Legalization 2", GroupName, @@ -791,8 +804,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->LegalizeTypes(); } - DEBUG(dbgs() << "Vector/type-legalized selection DAG: BB#" << BlockNumber - << " '" << BlockName << "'\n"; CurDAG->dump()); + DEBUG(dbgs() << "Vector/type-legalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); if (ViewDAGCombineLT && MatchFilterBB) CurDAG->viewGraph("dag-combine-lv input for " + BlockName); @@ -804,8 +819,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(AfterLegalizeVectorOps, AA, OptLevel); } - DEBUG(dbgs() << "Optimized vector-legalized selection DAG: BB#" - << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); + DEBUG(dbgs() << "Optimized vector-legalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); } if (ViewLegalizeDAGs && MatchFilterBB) @@ -817,8 +834,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Legalize(); } - DEBUG(dbgs() << "Legalized selection DAG: BB#" << BlockNumber - << " '" << BlockName << "'\n"; CurDAG->dump()); + DEBUG(dbgs() << "Legalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); if (ViewDAGCombine2 && MatchFilterBB) CurDAG->viewGraph("dag-combine2 input for " + BlockName); @@ -830,8 +849,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(AfterLegalizeDAG, AA, OptLevel); } - DEBUG(dbgs() << "Optimized legalized selection DAG: BB#" << BlockNumber - << " '" << BlockName << "'\n"; CurDAG->dump()); + DEBUG(dbgs() << "Optimized legalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); if (OptLevel != CodeGenOpt::None) ComputeLiveOutVRegInfo(); @@ -847,8 +868,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { DoInstructionSelection(); } - DEBUG(dbgs() << "Selected selection DAG: BB#" << BlockNumber - << " '" << BlockName << "'\n"; CurDAG->dump()); + DEBUG(dbgs() << "Selected selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); if (ViewSchedDAGs && MatchFilterBB) CurDAG->viewGraph("scheduler input for " + BlockName); @@ -915,9 +938,9 @@ public: } // end anonymous namespace void SelectionDAGISel::DoInstructionSelection() { - DEBUG(dbgs() << "===== Instruction selection begins: BB#" - << FuncInfo->MBB->getNumber() - << " '" << FuncInfo->MBB->getName() << "'\n"); + DEBUG(dbgs() << "===== Instruction selection begins: " + << printMBBReference(*FuncInfo->MBB) << " '" + << FuncInfo->MBB->getName() << "'\n"); PreprocessISelDAG(); @@ -1138,7 +1161,7 @@ static void processDbgDeclares(FunctionLoweringInfo *FuncInfo) { // Look through casts and constant offset GEPs. These mostly come from // inalloca. - APInt Offset(DL.getPointerSizeInBits(0), 0); + APInt Offset(DL.getTypeSizeInBits(Address->getType()), 0); Address = Address->stripAndAccumulateInBoundsConstantOffsets(DL, Offset); // Check if the variable is a static alloca or a byval or inalloca @@ -1177,12 +1200,7 @@ static void propagateSwiftErrorVRegs(FunctionLoweringInfo *FuncInfo) { // For each machine basic block in reverse post order. ReversePostOrderTraversal<MachineFunction *> RPOT(FuncInfo->MF); - for (ReversePostOrderTraversal<MachineFunction *>::rpo_iterator - It = RPOT.begin(), - E = RPOT.end(); - It != E; ++It) { - MachineBasicBlock *MBB = *It; - + for (MachineBasicBlock *MBB : RPOT) { // For each swifterror value in the function. for(const auto *SwiftErrorVal : FuncInfo->SwiftErrorVals) { auto Key = std::make_pair(MBB, SwiftErrorVal); @@ -1253,6 +1271,8 @@ static void propagateSwiftErrorVRegs(FunctionLoweringInfo *FuncInfo) { // If we don't need a phi create a copy to the upward exposed vreg. if (!needPHI) { assert(UpwardsUse); + assert(!VRegs.empty() && + "No predecessors? Is the Calling Convention correct?"); unsigned DestReg = UUseVReg; BuildMI(*MBB, MBB->getFirstNonPHI(), DLoc, TII->get(TargetOpcode::COPY), DestReg) @@ -1282,10 +1302,10 @@ static void propagateSwiftErrorVRegs(FunctionLoweringInfo *FuncInfo) { } } -void preassignSwiftErrorRegs(const TargetLowering *TLI, - FunctionLoweringInfo *FuncInfo, - BasicBlock::const_iterator Begin, - BasicBlock::const_iterator End) { +static void preassignSwiftErrorRegs(const TargetLowering *TLI, + FunctionLoweringInfo *FuncInfo, + BasicBlock::const_iterator Begin, + BasicBlock::const_iterator End) { if (!TLI->supportSwiftError() || FuncInfo->SwiftErrorVals.empty()) return; @@ -2774,6 +2794,12 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table, Result = !::CheckType(Table, Index, N, SDISel.TLI, SDISel.CurDAG->getDataLayout()); return Index; + case SelectionDAGISel::OPC_CheckTypeRes: { + unsigned Res = Table[Index++]; + Result = !::CheckType(Table, Index, N.getValue(Res), SDISel.TLI, + SDISel.CurDAG->getDataLayout()); + return Index; + } case SelectionDAGISel::OPC_CheckChild0Type: case SelectionDAGISel::OPC_CheckChild1Type: case SelectionDAGISel::OPC_CheckChild2Type: @@ -2906,6 +2932,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, case ISD::CopyFromReg: case ISD::CopyToReg: case ISD::EH_LABEL: + case ISD::ANNOTATION_LABEL: case ISD::LIFETIME_START: case ISD::LIFETIME_END: NodeToMatch->setNodeId(-1); // Mark selected. @@ -3175,6 +3202,14 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, break; continue; + case OPC_CheckTypeRes: { + unsigned Res = MatcherTable[MatcherIndex++]; + if (!::CheckType(MatcherTable, MatcherIndex, N.getValue(Res), TLI, + CurDAG->getDataLayout())) + break; + continue; + } + case OPC_SwitchOpcode: { unsigned CurNodeOpcode = N.getOpcode(); unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart; @@ -3548,6 +3583,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, "NodeToMatch was removed partway through selection"); SelectionDAG::DAGNodeDeletedListener NDL(*CurDAG, [&](SDNode *N, SDNode *E) { + CurDAG->salvageDebugInfo(*N); auto &Chain = ChainNodesMatched; assert((!E || !is_contained(Chain, N)) && "Chain node replaced during MorphNode"); @@ -3725,6 +3761,25 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, } } +bool SelectionDAGISel::isOrEquivalentToAdd(const SDNode *N) const { + assert(N->getOpcode() == ISD::OR && "Unexpected opcode"); + auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); + if (!C) + return false; + + // Detect when "or" is used to add an offset to a stack object. + if (auto *FN = dyn_cast<FrameIndexSDNode>(N->getOperand(0))) { + MachineFrameInfo &MFI = MF->getFrameInfo(); + unsigned A = MFI.getObjectAlignment(FN->getIndex()); + assert(isPowerOf2_32(A) && "Unexpected alignment"); + int32_t Off = C->getSExtValue(); + // If the alleged offset fits in the zero bits guaranteed by + // the alignment, then this or is really an add. + return (Off >= 0) && (((A - 1) & Off) == unsigned(Off)); + } + return false; +} + void SelectionDAGISel::CannotYetSelect(SDNode *N) { std::string msg; raw_string_ostream Msg(msg); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index 11561dfa59474..be4ab094bf495 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -16,15 +16,13 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/IR/Constants.h" -#include "llvm/IR/DebugInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; #define DEBUG_TYPE "dag-printer" diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp index 55f70f7d9fd3f..3a283bc5fdc0c 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp @@ -1,4 +1,4 @@ -//===-- SelectionDAGTargetInfo.cpp - SelectionDAG Info --------------------===// +//===- SelectionDAGTargetInfo.cpp - SelectionDAG Info ---------------------===// // // The LLVM Compiler Infrastructure // @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/SelectionDAGTargetInfo.h" + using namespace llvm; -SelectionDAGTargetInfo::~SelectionDAGTargetInfo() {} +SelectionDAGTargetInfo::~SelectionDAGTargetInfo() = default; diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 5d78bba86d73b..3f64b49e35556 100644 --- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -1,4 +1,4 @@ -//===-- StatepointLowering.cpp - SDAGBuilder's statepoint code -----------===// +//===- StatepointLowering.cpp - SDAGBuilder's statepoint code -------------===// // // The LLVM Compiler Infrastructure // @@ -14,21 +14,44 @@ #include "StatepointLowering.h" #include "SelectionDAGBuilder.h" -#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineValueType.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/StackMaps.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/IR/CallingConv.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Statepoint.h" -#include "llvm/Target/TargetLowering.h" -#include <algorithm> +#include "llvm/IR/Type.h" +#include "llvm/Support/Casting.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <iterator> +#include <tuple> +#include <utility> + using namespace llvm; #define DEBUG_TYPE "statepoint-lowering" @@ -73,7 +96,7 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType, NumSlotsAllocatedForStatepoints++; MachineFrameInfo &MFI = Builder.DAG.getMachineFunction().getFrameInfo(); - unsigned SpillSize = ValueType.getSizeInBits() / 8; + unsigned SpillSize = ValueType.getStoreSize(); assert((SpillSize * 8) == ValueType.getSizeInBits() && "Size not in bytes?"); // First look for a previously created stack slot which is not in @@ -200,7 +223,6 @@ static Optional<int> findPreviousSpillSlot(const Value *Val, /// values on the stack between calls. static void reservePreviousStackSlotForValue(const Value *IncomingValue, SelectionDAGBuilder &Builder) { - SDValue Incoming = Builder.getValue(IncomingValue); if (isa<ConstantSDNode>(Incoming) || isa<FrameIndexSDNode>(Incoming)) { @@ -292,7 +314,6 @@ removeDuplicateGCPtrs(SmallVectorImpl<const Value *> &Bases, static std::pair<SDValue, SDNode *> lowerCallFromStatepointLoweringInfo( SelectionDAGBuilder::StatepointLoweringInfo &SI, SelectionDAGBuilder &Builder, SmallVectorImpl<SDValue> &PendingExports) { - SDValue ReturnValue, CallEndVal; std::tie(ReturnValue, CallEndVal) = Builder.lowerInvokable(SI.CLI, SI.EHPadBB); diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.h b/lib/CodeGen/SelectionDAG/StatepointLowering.h index b043184003a09..372c82a359f6b 100644 --- a/lib/CodeGen/SelectionDAG/StatepointLowering.h +++ b/lib/CodeGen/SelectionDAG/StatepointLowering.h @@ -1,4 +1,4 @@ -//===-- StatepointLowering.h - SDAGBuilder's statepoint code -*- C++ -*---===// +//===- StatepointLowering.h - SDAGBuilder's statepoint code ---*- C++ -*---===// // // The LLVM Compiler Infrastructure // @@ -16,11 +16,16 @@ #define LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallBitVector.h" -#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/ValueTypes.h" +#include <cassert> namespace llvm { + +class CallInst; class SelectionDAGBuilder; /// This class tracks both per-statepoint and per-selectiondag information. @@ -30,7 +35,7 @@ class SelectionDAGBuilder; /// works in concert with information in FunctionLoweringInfo. class StatepointLoweringState { public: - StatepointLoweringState() : NextSlotToAllocate(0) {} + StatepointLoweringState() = default; /// Reset all state tracking for a newly encountered safepoint. Also /// performs some consistency checking. @@ -69,7 +74,7 @@ public: /// before the next statepoint. If we weren't expecting to see /// it, we'll report an assertion. void relocCallVisited(const CallInst &RelocCall) { - auto I = find(PendingGCRelocateCalls, &RelocCall); + auto I = llvm::find(PendingGCRelocateCalls, &RelocCall); assert(I != PendingGCRelocateCalls.end() && "Visited unexpected gcrelocate call"); PendingGCRelocateCalls.erase(I); @@ -108,11 +113,12 @@ private: SmallBitVector AllocatedStackSlots; /// Points just beyond the last slot known to have been allocated - unsigned NextSlotToAllocate; + unsigned NextSlotToAllocate = 0; /// Keep track of pending gcrelocate calls for consistency check SmallVector<const CallInst *, 10> PendingGCRelocateCalls; }; + } // end namespace llvm #endif // LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 8652df7bbd706..58276052c10bd 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -11,7 +11,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Target/TargetLowering.h" +#include "llvm/CodeGen/TargetLowering.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/CallingConvLower.h" @@ -20,6 +20,9 @@ #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/TargetLoweringObjectFile.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalVariable.h" @@ -29,10 +32,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include <cctype> using namespace llvm; @@ -52,11 +52,11 @@ bool TargetLowering::isPositionIndependent() const { /// so, it sets Chain to the input chain of the tail call. bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const { - const Function *F = DAG.getMachineFunction().getFunction(); + const Function &F = DAG.getMachineFunction().getFunction(); // Conservatively require the attributes of the call to match those of // the return. Ignore noalias because it doesn't affect the call sequence. - AttributeList CallerAttrs = F->getAttributes(); + AttributeList CallerAttrs = F.getAttributes(); if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex) .removeAttribute(Attribute::NoAlias) .hasAttributes()) @@ -408,7 +408,7 @@ bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth, // Search for the smallest integer type with free casts to and from // Op's type. For expedience, just check power-of-2 integer types. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - unsigned DemandedSize = BitWidth - Demanded.countLeadingZeros(); + unsigned DemandedSize = Demanded.getActiveBits(); unsigned SmallVTBits = DemandedSize; if (!isPowerOf2_32(SmallVTBits)) SmallVTBits = NextPowerOf2(SmallVTBits); @@ -421,9 +421,8 @@ bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth, Op.getOpcode(), dl, SmallVT, DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)), DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1))); - bool NeedZext = DemandedSize > SmallVTBits; - SDValue Z = DAG.getNode(NeedZext ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, - dl, Op.getValueType(), X); + assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?"); + SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), X); return TLO.CombineTo(Op, Z); } } @@ -459,7 +458,7 @@ TargetLowering::SimplifyDemandedBits(SDNode *User, unsigned OpIdx, // If Old has more than one use then it must be Op, because the // AssumeSingleUse flag is not propogated to recursive calls of // SimplifyDemanded bits, so the only node with multiple use that - // it will attempt to combine will be opt. + // it will attempt to combine will be Op. assert(TLO.Old == Op); SmallVector <SDValue, 4> NewOps; @@ -470,7 +469,7 @@ TargetLowering::SimplifyDemandedBits(SDNode *User, unsigned OpIdx, } NewOps.push_back(User->getOperand(i)); } - TLO.DAG.UpdateNodeOperands(User, NewOps); + User = TLO.DAG.UpdateNodeOperands(User, NewOps); // Op has less users now, so we may be able to perform additional combines // with it. DCI.AddToWorklist(Op.getNode()); @@ -480,7 +479,7 @@ TargetLowering::SimplifyDemandedBits(SDNode *User, unsigned OpIdx, return true; } -bool TargetLowering::SimplifyDemandedBits(SDValue Op, APInt &DemandedMask, +bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -517,6 +516,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // Don't know anything. Known = KnownBits(BitWidth); + if (Op.getOpcode() == ISD::Constant) { + // We know all of the bits for a constant! + Known.One = cast<ConstantSDNode>(Op)->getAPIntValue(); + Known.Zero = ~Known.One; + return false; + } + // Other users may use these bits. if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) { if (Depth != 0) { @@ -539,11 +545,6 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, KnownBits Known2, KnownOut; switch (Op.getOpcode()) { - case ISD::Constant: - // We know all of the bits for a constant! - Known.One = cast<ConstantSDNode>(Op)->getAPIntValue(); - Known.Zero = ~Known.One; - return false; // Don't fall through, will infinitely loop. case ISD::BUILD_VECTOR: // Collect the known bits that are shared by every constant vector element. Known.Zero.setAllBits(); Known.One.setAllBits(); @@ -780,33 +781,38 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, break; } case ISD::SHL: - if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { - unsigned ShAmt = SA->getZExtValue(); + if (ConstantSDNode *SA = isConstOrConstSplat(Op.getOperand(1))) { SDValue InOp = Op.getOperand(0); // If the shift count is an invalid immediate, don't do anything. - if (ShAmt >= BitWidth) + if (SA->getAPIntValue().uge(BitWidth)) break; + unsigned ShAmt = SA->getZExtValue(); + // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a // single shift. We can do this if the bottom bits (which are shifted // out) are never demanded. - if (InOp.getOpcode() == ISD::SRL && - isa<ConstantSDNode>(InOp.getOperand(1))) { - if (ShAmt && (NewMask & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) { - unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue(); - unsigned Opc = ISD::SHL; - int Diff = ShAmt-C1; - if (Diff < 0) { - Diff = -Diff; - Opc = ISD::SRL; - } + if (InOp.getOpcode() == ISD::SRL) { + if (ConstantSDNode *SA2 = isConstOrConstSplat(InOp.getOperand(1))) { + if (ShAmt && (NewMask & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) { + if (SA2->getAPIntValue().ult(BitWidth)) { + unsigned C1 = SA2->getZExtValue(); + unsigned Opc = ISD::SHL; + int Diff = ShAmt-C1; + if (Diff < 0) { + Diff = -Diff; + Opc = ISD::SRL; + } - SDValue NewSA = - TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType()); - EVT VT = Op.getValueType(); - return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, - InOp.getOperand(0), NewSA)); + SDValue NewSA = + TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType()); + EVT VT = Op.getValueType(); + return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, + InOp.getOperand(0), + NewSA)); + } + } } } @@ -818,7 +824,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (InOp.getNode()->getOpcode() == ISD::ANY_EXTEND) { SDValue InnerOp = InOp.getOperand(0); EVT InnerVT = InnerOp.getValueType(); - unsigned InnerBits = InnerVT.getSizeInBits(); + unsigned InnerBits = InnerVT.getScalarSizeInBits(); if (ShAmt < InnerBits && NewMask.getActiveBits() <= InnerBits && isTypeDesirableForOp(ISD::SHL, InnerVT)) { EVT ShTy = getShiftAmountTy(InnerVT, DL); @@ -837,45 +843,42 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // (shl (anyext x), c2-c1). This requires that the bottom c1 bits // aren't demanded (as above) and that the shifted upper c1 bits of // x aren't demanded. - if (InOp.hasOneUse() && - InnerOp.getOpcode() == ISD::SRL && - InnerOp.hasOneUse() && - isa<ConstantSDNode>(InnerOp.getOperand(1))) { - unsigned InnerShAmt = cast<ConstantSDNode>(InnerOp.getOperand(1)) - ->getZExtValue(); - if (InnerShAmt < ShAmt && - InnerShAmt < InnerBits && - NewMask.getActiveBits() <= (InnerBits - InnerShAmt + ShAmt) && - NewMask.countTrailingZeros() >= ShAmt) { - SDValue NewSA = - TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, - Op.getOperand(1).getValueType()); - EVT VT = Op.getValueType(); - SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, - InnerOp.getOperand(0)); - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, - NewExt, NewSA)); + if (InOp.hasOneUse() && InnerOp.getOpcode() == ISD::SRL && + InnerOp.hasOneUse()) { + if (ConstantSDNode *SA2 = isConstOrConstSplat(InnerOp.getOperand(1))) { + unsigned InnerShAmt = SA2->getLimitedValue(InnerBits); + if (InnerShAmt < ShAmt && + InnerShAmt < InnerBits && + NewMask.getActiveBits() <= (InnerBits - InnerShAmt + ShAmt) && + NewMask.countTrailingZeros() >= ShAmt) { + SDValue NewSA = + TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, + Op.getOperand(1).getValueType()); + EVT VT = Op.getValueType(); + SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, + InnerOp.getOperand(0)); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, + NewExt, NewSA)); + } } } } - Known.Zero <<= SA->getZExtValue(); - Known.One <<= SA->getZExtValue(); + Known.Zero <<= ShAmt; + Known.One <<= ShAmt; // low bits known zero. - Known.Zero.setLowBits(SA->getZExtValue()); + Known.Zero.setLowBits(ShAmt); } break; case ISD::SRL: - if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { - EVT VT = Op.getValueType(); - unsigned ShAmt = SA->getZExtValue(); - unsigned VTSize = VT.getSizeInBits(); + if (ConstantSDNode *SA = isConstOrConstSplat(Op.getOperand(1))) { SDValue InOp = Op.getOperand(0); // If the shift count is an invalid immediate, don't do anything. - if (ShAmt >= BitWidth) + if (SA->getAPIntValue().uge(BitWidth)) break; + unsigned ShAmt = SA->getZExtValue(); APInt InDemandedMask = (NewMask << ShAmt); // If the shift is exact, then it does demand the low bits (and knows that @@ -886,21 +889,27 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a // single shift. We can do this if the top bits (which are shifted out) // are never demanded. - if (InOp.getOpcode() == ISD::SHL && - isa<ConstantSDNode>(InOp.getOperand(1))) { - if (ShAmt && (NewMask & APInt::getHighBitsSet(VTSize, ShAmt)) == 0) { - unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue(); - unsigned Opc = ISD::SRL; - int Diff = ShAmt-C1; - if (Diff < 0) { - Diff = -Diff; - Opc = ISD::SHL; - } + if (InOp.getOpcode() == ISD::SHL) { + if (ConstantSDNode *SA2 = isConstOrConstSplat(InOp.getOperand(1))) { + if (ShAmt && + (NewMask & APInt::getHighBitsSet(BitWidth, ShAmt)) == 0) { + if (SA2->getAPIntValue().ult(BitWidth)) { + unsigned C1 = SA2->getZExtValue(); + unsigned Opc = ISD::SRL; + int Diff = ShAmt-C1; + if (Diff < 0) { + Diff = -Diff; + Opc = ISD::SHL; + } - SDValue NewSA = - TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType()); - return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, - InOp.getOperand(0), NewSA)); + SDValue NewSA = + TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType()); + EVT VT = Op.getValueType(); + return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, + InOp.getOperand(0), + NewSA)); + } + } } } @@ -924,14 +933,14 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op.getOperand(0), Op.getOperand(1))); - if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + if (ConstantSDNode *SA = isConstOrConstSplat(Op.getOperand(1))) { EVT VT = Op.getValueType(); - unsigned ShAmt = SA->getZExtValue(); // If the shift count is an invalid immediate, don't do anything. - if (ShAmt >= BitWidth) + if (SA->getAPIntValue().uge(BitWidth)) break; + unsigned ShAmt = SA->getZExtValue(); APInt InDemandedMask = (NewMask << ShAmt); // If the shift is exact, then it does demand the low bits (and knows that @@ -979,15 +988,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, break; case ISD::SIGN_EXTEND_INREG: { EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + unsigned ExVTBits = ExVT.getScalarSizeInBits(); - APInt MsbMask = APInt::getHighBitsSet(BitWidth, 1); // If we only care about the highest bit, don't bother shifting right. - if (MsbMask == NewMask) { - unsigned ShAmt = ExVT.getScalarSizeInBits(); + if (NewMask.isSignMask()) { SDValue InOp = Op.getOperand(0); - unsigned VTBits = Op->getValueType(0).getScalarSizeInBits(); bool AlreadySignExtended = - TLO.DAG.ComputeNumSignBits(InOp) >= VTBits-ShAmt+1; + TLO.DAG.ComputeNumSignBits(InOp) >= BitWidth-ExVTBits+1; // However if the input is already sign extended we expect the sign // extension to be dropped altogether later and do not simplify. if (!AlreadySignExtended) { @@ -997,7 +1004,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (TLO.LegalTypes() && !ShiftAmtTy.isVector()) ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL); - SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, dl, + SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ExVTBits, dl, ShiftAmtTy); return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, Op.getValueType(), InOp, @@ -1005,26 +1012,15 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } } - // Sign extension. Compute the demanded bits in the result that are not - // present in the input. - APInt NewBits = - APInt::getHighBitsSet(BitWidth, - BitWidth - ExVT.getScalarSizeInBits()); - // If none of the extended bits are demanded, eliminate the sextinreg. - if ((NewBits & NewMask) == 0) + if (NewMask.getActiveBits() <= ExVTBits) return TLO.CombineTo(Op, Op.getOperand(0)); - APInt InSignBit = - APInt::getSignMask(ExVT.getScalarSizeInBits()).zext(BitWidth); - APInt InputDemandedBits = - APInt::getLowBitsSet(BitWidth, - ExVT.getScalarSizeInBits()) & - NewMask; + APInt InputDemandedBits = NewMask.getLoBits(ExVTBits); // Since the sign extended bits are demanded, we know that the sign // bit is demanded. - InputDemandedBits |= InSignBit; + InputDemandedBits.setBit(ExVTBits - 1); if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits, Known, TLO, Depth+1)) @@ -1035,16 +1031,17 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // top bits of the result. // If the input sign bit is known zero, convert this into a zero extension. - if (Known.Zero.intersects(InSignBit)) + if (Known.Zero[ExVTBits - 1]) return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg( Op.getOperand(0), dl, ExVT.getScalarType())); - if (Known.One.intersects(InSignBit)) { // Input sign bit known set - Known.One |= NewBits; - Known.Zero &= ~NewBits; + APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits); + if (Known.One[ExVTBits - 1]) { // Input sign bit known set + Known.One.setBitsFrom(ExVTBits); + Known.Zero &= Mask; } else { // Input sign bit unknown - Known.Zero &= ~NewBits; - Known.One &= ~NewBits; + Known.Zero &= Mask; + Known.One &= Mask; } break; } @@ -1072,61 +1069,47 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } case ISD::ZERO_EXTEND: { unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits(); - APInt InMask = NewMask.trunc(OperandBitWidth); // If none of the top bits are demanded, convert this into an any_extend. - APInt NewBits = - APInt::getHighBitsSet(BitWidth, BitWidth - OperandBitWidth) & NewMask; - if (!NewBits.intersects(NewMask)) + if (NewMask.getActiveBits() <= OperandBitWidth) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), Op.getOperand(0))); + APInt InMask = NewMask.trunc(OperandBitWidth); if (SimplifyDemandedBits(Op.getOperand(0), InMask, Known, TLO, Depth+1)) return true; assert(!Known.hasConflict() && "Bits known to be one AND zero?"); Known = Known.zext(BitWidth); - Known.Zero |= NewBits; + Known.Zero.setBitsFrom(OperandBitWidth); break; } case ISD::SIGN_EXTEND: { - EVT InVT = Op.getOperand(0).getValueType(); - unsigned InBits = InVT.getScalarSizeInBits(); - APInt InMask = APInt::getLowBitsSet(BitWidth, InBits); - APInt InSignBit = APInt::getOneBitSet(BitWidth, InBits - 1); - APInt NewBits = ~InMask & NewMask; + unsigned InBits = Op.getOperand(0).getValueType().getScalarSizeInBits(); // If none of the top bits are demanded, convert this into an any_extend. - if (NewBits == 0) + if (NewMask.getActiveBits() <= InBits) return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), Op.getOperand(0))); // Since some of the sign extended bits are demanded, we know that the sign // bit is demanded. - APInt InDemandedBits = InMask & NewMask; - InDemandedBits |= InSignBit; - InDemandedBits = InDemandedBits.trunc(InBits); + APInt InDemandedBits = NewMask.trunc(InBits); + InDemandedBits.setBit(InBits - 1); if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, Known, TLO, Depth+1)) return true; - Known = Known.zext(BitWidth); + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); + // If the sign bit is known one, the top bits match. + Known = Known.sext(BitWidth); // If the sign bit is known zero, convert this to a zero extend. - if (Known.Zero.intersects(InSignBit)) + if (Known.isNonNegative()) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), Op.getOperand(0))); - - // If the sign bit is known one, the top bits match. - if (Known.One.intersects(InSignBit)) { - Known.One |= NewBits; - assert((Known.Zero & NewBits) == 0); - } else { // Otherwise, top bits aren't known. - assert((Known.One & NewBits) == 0); - assert((Known.Zero & NewBits) == 0); - } break; } case ISD::ANY_EXTEND: { @@ -1305,6 +1288,19 @@ void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, Known.resetAll(); } +void TargetLowering::computeKnownBitsForFrameIndex(const SDValue Op, + KnownBits &Known, + const APInt &DemandedElts, + const SelectionDAG &DAG, + unsigned Depth) const { + assert(isa<FrameIndexSDNode>(Op) && "expected FrameIndex"); + + if (unsigned Align = DAG.InferPtrAlignment(Op)) { + // The low bits are known zero if the pointer is aligned. + Known.Zero.setLowBits(Log2_32(Align)); + } +} + /// This method can be implemented by targets that want to expose additional /// information about sign bits to the DAG Combiner. unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, @@ -2967,7 +2963,7 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d, SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, std::vector<SDNode *> *Created) const { - AttributeList Attr = DAG.getMachineFunction().getFunction()->getAttributes(); + AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (TLI.isIntDivCheap(N->getValueType(0), Attr)) return SDValue(N,0); // Lower SDIV as SDIV @@ -3436,8 +3432,6 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST, // The type of data as saved in memory. EVT MemSclVT = StVT.getScalarType(); - EVT PtrVT = BasePtr.getValueType(); - // Store Stride in bytes unsigned Stride = MemSclVT.getSizeInBits() / 8; EVT IdxVT = getVectorIdxTy(DAG.getDataLayout()); @@ -3450,8 +3444,7 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST, SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value, DAG.getConstant(Idx, SL, IdxVT)); - SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr, - DAG.getConstant(Idx * Stride, SL, PtrVT)); + SDValue Ptr = DAG.getObjectPtrOffset(SL, BasePtr, Idx * Stride); // This scalar TruncStore may be illegal, but we legalize it later. SDValue Store = DAG.getTruncStore( @@ -3474,6 +3467,8 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { EVT VT = LD->getValueType(0); EVT LoadedVT = LD->getMemoryVT(); SDLoc dl(LD); + auto &MF = DAG.getMachineFunction(); + if (VT.isFloatingPoint() || VT.isVector()) { EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits()); if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) { @@ -3498,13 +3493,13 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { // Copy the value to a (aligned) stack slot using (unaligned) integer // loads and stores, then do a (aligned) load from the stack slot. MVT RegVT = getRegisterType(*DAG.getContext(), intVT); - unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8; + unsigned LoadedBytes = LoadedVT.getStoreSize(); unsigned RegBytes = RegVT.getSizeInBits() / 8; unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes; // Make sure the stack slot is also aligned for the register type. SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT); - + auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex(); SmallVector<SDValue, 8> Stores; SDValue StackPtr = StackBase; unsigned Offset = 0; @@ -3523,13 +3518,14 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { MinAlign(LD->getAlignment(), Offset), LD->getMemOperand()->getFlags(), LD->getAAInfo()); // Follow the load with a store to the stack slot. Remember the store. - Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr, - MachinePointerInfo())); + Stores.push_back(DAG.getStore( + Load.getValue(1), dl, Load, StackPtr, + MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset))); // Increment the pointers. Offset += RegBytes; - Ptr = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, PtrIncrement); - StackPtr = DAG.getNode(ISD::ADD, dl, StackPtrVT, StackPtr, - StackPtrIncrement); + + Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement); + StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement); } // The last copy may be partial. Do an extending load. @@ -3543,15 +3539,17 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { // Follow the load with a store to the stack slot. Remember the store. // On big-endian machines this requires a truncating store to ensure // that the bits end up in the right place. - Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr, - MachinePointerInfo(), MemVT)); + Stores.push_back(DAG.getTruncStore( + Load.getValue(1), dl, Load, StackPtr, + MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT)); // The order of the stores doesn't matter - say it with a TokenFactor. SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); // Finally, perform the original load only redirected to the stack slot. Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase, - MachinePointerInfo(), LoadedVT); + MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), + LoadedVT); // Callers expect a MERGE_VALUES node. return std::make_pair(Load, TF); @@ -3581,8 +3579,8 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(), NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(), LD->getAAInfo()); - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); + + Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), NewLoadedVT, MinAlign(Alignment, IncrementSize), @@ -3591,8 +3589,8 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(), NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(), LD->getAAInfo()); - Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); + + Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), NewLoadedVT, MinAlign(Alignment, IncrementSize), @@ -3621,6 +3619,7 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, SDValue Val = ST->getValue(); EVT VT = Val.getValueType(); int Alignment = ST->getAlignment(); + auto &MF = DAG.getMachineFunction(); SDLoc dl(ST); if (ST->getMemoryVT().isFloatingPoint() || @@ -3649,16 +3648,18 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, EVT::getIntegerVT(*DAG.getContext(), StoredVT.getSizeInBits())); EVT PtrVT = Ptr.getValueType(); - unsigned StoredBytes = StoredVT.getSizeInBits() / 8; + unsigned StoredBytes = StoredVT.getStoreSize(); unsigned RegBytes = RegVT.getSizeInBits() / 8; unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes; // Make sure the stack slot is also aligned for the register type. SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT); + auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); // Perform the original store, only redirected to the stack slot. - SDValue Store = DAG.getTruncStore(Chain, dl, Val, StackPtr, - MachinePointerInfo(), StoredVT); + SDValue Store = DAG.getTruncStore( + Chain, dl, Val, StackPtr, + MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoredVT); EVT StackPtrVT = StackPtr.getValueType(); @@ -3670,8 +3671,9 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, // Do all but one copies using the full register width. for (unsigned i = 1; i < NumRegs; i++) { // Load one integer register's worth from the stack slot. - SDValue Load = - DAG.getLoad(RegVT, dl, Store, StackPtr, MachinePointerInfo()); + SDValue Load = DAG.getLoad( + RegVT, dl, Store, StackPtr, + MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)); // Store it to the final location. Remember the store. Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr, ST->getPointerInfo().getWithOffset(Offset), @@ -3679,9 +3681,8 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, ST->getMemOperand()->getFlags())); // Increment the pointers. Offset += RegBytes; - StackPtr = DAG.getNode(ISD::ADD, dl, StackPtrVT, - StackPtr, StackPtrIncrement); - Ptr = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, PtrIncrement); + StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement); + Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement); } // The last store may be partial. Do a truncating store. On big-endian @@ -3691,8 +3692,9 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, 8 * (StoredBytes - Offset)); // Load from the stack slot. - SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr, - MachinePointerInfo(), MemVT); + SDValue Load = DAG.getExtLoad( + ISD::EXTLOAD, dl, RegVT, Store, StackPtr, + MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT); Stores.push_back( DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr, @@ -3726,9 +3728,7 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, Ptr, ST->getPointerInfo(), NewStoredVT, Alignment, ST->getMemOperand()->getFlags()); - EVT PtrVT = Ptr.getValueType(); - Ptr = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, - DAG.getConstant(IncrementSize, dl, PtrVT)); + Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); Alignment = MinAlign(Alignment, IncrementSize); Store2 = DAG.getTruncStore( Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr, @@ -3767,7 +3767,7 @@ TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask, AddrVT); Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale); } else - Increment = DAG.getConstant(DataVT.getSizeInBits() / 8, DL, AddrVT); + Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT); return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment); } @@ -3797,7 +3797,7 @@ SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG, SDValue Index) const { SDLoc dl(Index); // Make sure the index type is big enough to compute in. - Index = DAG.getZExtOrTrunc(Index, dl, getPointerTy(DAG.getDataLayout())); + Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType()); EVT EltVT = VecVT.getVectorElementType(); |