diff options
Diffstat (limited to 'lib/CodeGen/SelectionDAG')
29 files changed, 5611 insertions, 2798 deletions
diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt index 75e816720f57..fbedf2c1d17a 100644 --- a/lib/CodeGen/SelectionDAG/CMakeLists.txt +++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt @@ -19,6 +19,7 @@ add_llvm_library(LLVMSelectionDAG SelectionDAGDumper.cpp SelectionDAGISel.cpp SelectionDAGPrinter.cpp + StatepointLowering.cpp ScheduleDAGVLIW.cpp TargetLowering.cpp TargetSelectionDAGInfo.cpp diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 2abcdd524512..5145731f6231 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -17,6 +17,8 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -32,7 +34,6 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" @@ -76,6 +77,10 @@ namespace { "slicing"), cl::init(false)); + static cl::opt<bool> + MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), + cl::desc("DAG combiner may split indexing from loads")); + //------------------------------ DAGCombiner ---------------------------------// class DAGCombiner { @@ -87,61 +92,70 @@ namespace { bool LegalTypes; bool ForCodeSize; - // Worklist of all of the nodes that need to be simplified. - // - // This has the semantics that when adding to the worklist, - // the item added must be next to be processed. It should - // also only appear once. The naive approach to this takes - // linear time. - // - // To reduce the insert/remove time to logarithmic, we use - // a set and a vector to maintain our worklist. - // - // The set contains the items on the worklist, but does not - // maintain the order they should be visited. - // - // The vector maintains the order nodes should be visited, but may - // contain duplicate or removed nodes. When choosing a node to - // visit, we pop off the order stack until we find an item that is - // also in the contents set. All operations are O(log N). - SmallPtrSet<SDNode*, 64> WorklistContents; - SmallVector<SDNode*, 64> WorklistOrder; + /// \brief Worklist of all of the nodes that need to be simplified. + /// + /// This must behave as a stack -- new nodes to process are pushed onto the + /// back and when processing we pop off of the back. + /// + /// The worklist will not contain duplicates but may contain null entries + /// due to nodes being deleted from the underlying DAG. + SmallVector<SDNode *, 64> Worklist; + + /// \brief Mapping from an SDNode to its position on the worklist. + /// + /// This is used to find and remove nodes from the worklist (by nulling + /// them) when they are deleted from the underlying DAG. It relies on + /// stable indices of nodes within the worklist. + DenseMap<SDNode *, unsigned> WorklistMap; + + /// \brief Set of nodes which have been combined (at least once). + /// + /// This is used to allow us to reliably add any operands of a DAG node + /// which have not yet been combined to the worklist. + SmallPtrSet<SDNode *, 64> CombinedNodes; // AA - Used for DAG load/store alias analysis. AliasAnalysis &AA; - /// AddUsersToWorklist - When an instruction is simplified, add all users of - /// the instruction to the work lists because they might get more simplified - /// now. - /// + /// When an instruction is simplified, add all users of the instruction to + /// the work lists because they might get more simplified now. void AddUsersToWorklist(SDNode *N) { for (SDNode *Node : N->uses()) AddToWorklist(Node); } - /// visit - call the node-specific routine that knows how to fold each - /// particular type of node. + /// Call the node-specific routine that folds each particular type of node. SDValue visit(SDNode *N); public: - /// AddToWorklist - Add to the work list making sure its instance is at the - /// back (next to be processed.) + /// Add to the worklist making sure its instance is at the back (next to be + /// processed.) void AddToWorklist(SDNode *N) { // Skip handle nodes as they can't usefully be combined and confuse the // zero-use deletion strategy. if (N->getOpcode() == ISD::HANDLENODE) return; - WorklistContents.insert(N); - WorklistOrder.push_back(N); + if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second) + Worklist.push_back(N); } - /// removeFromWorklist - remove all instances of N from the worklist. - /// + /// Remove all instances of N from the worklist. void removeFromWorklist(SDNode *N) { - WorklistContents.erase(N); + CombinedNodes.erase(N); + + auto It = WorklistMap.find(N); + if (It == WorklistMap.end()) + return; // Not in the worklist. + + // Null out the entry rather than erasing it to avoid a linear operation. + Worklist[It->second] = nullptr; + WorklistMap.erase(It); } + void deleteAndRecombine(SDNode *N); + bool recursivelyDeleteUnusedNodes(SDNode *N); + SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, bool AddTo = true); @@ -159,9 +173,9 @@ namespace { private: - /// SimplifyDemandedBits - Check the specified integer node value to see if - /// it can be simplified or if things it uses can be simplified by bit - /// propagation. If so, return true. + /// Check the specified integer node value to see if it can be simplified or + /// if things it uses can be simplified by bit propagation. + /// If so, return true. bool SimplifyDemandedBits(SDValue Op) { unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); APInt Demanded = APInt::getAllOnesValue(BitWidth); @@ -172,8 +186,19 @@ namespace { bool CombineToPreIndexedLoadStore(SDNode *N); bool CombineToPostIndexedLoadStore(SDNode *N); + SDValue SplitIndexingFromLoad(LoadSDNode *LD); bool SliceUpLoad(SDNode *N); + /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed + /// load. + /// + /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced. + /// \param InVecVT type of the input vector to EVE with bitcasts resolved. + /// \param EltNo index of the vector element to load. + /// \param OriginalLoad load that EVE came from to be replaced. + /// \returns EVE on success SDValue() on failure. + SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad( + SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad); void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad); SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace); SDValue SExtPromoteOperand(SDValue Op, EVT PVT); @@ -187,7 +212,7 @@ namespace { SDValue Trunc, SDValue ExtLoad, SDLoc DL, ISD::NodeType ExtType); - /// combine - call the node-specific routine that knows how to fold each + /// Call the node-specific routine that knows how to fold each /// particular type of node. If that doesn't do anything, try the /// target-specific DAG combines. SDValue combine(SDNode *N); @@ -251,6 +276,7 @@ namespace { SDValue visitFMA(SDNode *N); SDValue visitFDIV(SDNode *N); SDValue visitFREM(SDNode *N); + SDValue visitFSQRT(SDNode *N); SDValue visitFCOPYSIGN(SDNode *N); SDValue visitSINT_TO_FP(SDNode *N); SDValue visitUINT_TO_FP(SDNode *N); @@ -264,6 +290,8 @@ namespace { SDValue visitFCEIL(SDNode *N); SDValue visitFTRUNC(SDNode *N); SDValue visitFFLOOR(SDNode *N); + SDValue visitFMINNUM(SDNode *N); + SDValue visitFMAXNUM(SDNode *N); SDValue visitBRCOND(SDNode *N); SDValue visitBR_CC(SDNode *N); SDValue visitLOAD(SDNode *N); @@ -275,6 +303,8 @@ namespace { SDValue visitEXTRACT_SUBVECTOR(SDNode *N); SDValue visitVECTOR_SHUFFLE(SDNode *N); SDValue visitINSERT_SUBVECTOR(SDNode *N); + SDValue visitMLOAD(SDNode *N); + SDValue visitMSTORE(SDNode *N); SDValue XformToShuffleWithZero(SDNode *N); SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS); @@ -299,7 +329,12 @@ namespace { SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); SDValue BuildSDIV(SDNode *N); + SDValue BuildSDIVPow2(SDNode *N); SDValue BuildUDIV(SDNode *N); + SDValue BuildReciprocalEstimate(SDValue Op); + SDValue BuildRsqrtEstimate(SDValue Op); + SDValue BuildRsqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations); + SDValue BuildRsqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations); SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits = true); SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); @@ -316,17 +351,16 @@ namespace { SDValue GetDemandedBits(SDValue V, const APInt &Mask); - /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, + /// Walk up chain skipping non-aliasing memory nodes, /// looking for aliasing nodes and adding them to the Aliases vector. void GatherAllAliases(SDNode *N, SDValue OriginalChain, SmallVectorImpl<SDValue> &Aliases); - /// isAlias - Return true if there is any possibility that the two addresses - /// overlap. + /// Return true if there is any possibility that the two addresses overlap. bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const; - /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, - /// looking for a better chain (aliasing node.) + /// Walk up chain skipping non-aliasing memory nodes, looking for a better + /// chain (aliasing node.) SDValue FindBetterChain(SDNode *N, SDValue Chain); /// Merge consecutive store operations into a wide store. @@ -354,13 +388,13 @@ namespace { FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); } - /// Run - runs the dag combiner on all nodes in the work list + /// Runs the dag combiner on all nodes in the work list void Run(CombineLevel AtLevel); SelectionDAG &getDAG() const { return DAG; } - /// getShiftAmountTy - Returns a type large enough to hold any valid - /// shift amount - before type legalization these can be huge. + /// Returns a type large enough to hold any valid shift amount - before type + /// legalization these can be huge. EVT getShiftAmountTy(EVT LHSTy) { assert(LHSTy.isInteger() && "Shift amount is not an integer type!"); if (LHSTy.isVector()) @@ -369,15 +403,14 @@ namespace { : TLI.getPointerTy(); } - /// isTypeLegal - This method returns true if we are running before type - /// legalization or if the specified VT is legal. + /// This method returns true if we are running before type legalization or + /// if the specified VT is legal. bool isTypeLegal(const EVT &VT) { if (!LegalTypes) return true; return TLI.isTypeLegal(VT); } - /// getSetCCResultType - Convenience wrapper around - /// TargetLowering::getSetCCResultType + /// Convenience wrapper around TargetLowering::getSetCCResultType EVT getSetCCResultType(EVT VT) const { return TLI.getSetCCResultType(*DAG.getContext(), VT); } @@ -386,7 +419,7 @@ namespace { namespace { -/// WorklistRemover - This class is a DAGUpdateListener that removes any deleted +/// This class is a DAGUpdateListener that removes any deleted /// nodes from the worklist. class WorklistRemover : public SelectionDAG::DAGUpdateListener { DAGCombiner &DC; @@ -437,9 +470,24 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { // Helper Functions //===----------------------------------------------------------------------===// -/// isNegatibleForFree - Return 1 if we can compute the negated form of the -/// specified expression for the same cost as the expression itself, or 2 if we -/// can compute the negated form more cheaply than the expression itself. +void DAGCombiner::deleteAndRecombine(SDNode *N) { + removeFromWorklist(N); + + // If the operands of this node are only used by the node, they will now be + // dead. Make sure to re-visit them and recursively delete dead nodes. + for (const SDValue &Op : N->ops()) + // For an operand generating multiple values, one of the values may + // become dead allowing further simplification (e.g. split index + // arithmetic from an indexed load). + if (Op->hasOneUse() || Op->getNumValues() > 1) + AddToWorklist(Op.getNode()); + + DAG.DeleteNode(N); +} + +/// Return 1 if we can compute the negated form of the specified expression for +/// the same cost as the expression itself, or 2 if we can compute the negated +/// form more cheaply than the expression itself. static char isNegatibleForFree(SDValue Op, bool LegalOperations, const TargetLowering &TLI, const TargetOptions *Options, @@ -502,10 +550,10 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations, } } -/// GetNegatedExpression - If isNegatibleForFree returns true, this function -/// returns the newly negated expression. +/// If isNegatibleForFree returns true, return the newly negated expression. static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOperations, unsigned Depth = 0) { + const TargetOptions &Options = DAG.getTarget().Options; // fneg is removable even if it has multiple uses. if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0); @@ -522,12 +570,11 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, } case ISD::FADD: // FIXME: determine better conditions for this xform. - assert(DAG.getTarget().Options.UnsafeFPMath); + assert(Options.UnsafeFPMath); // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) if (isNegatibleForFree(Op.getOperand(0), LegalOperations, - DAG.getTargetLoweringInfo(), - &DAG.getTarget().Options, Depth+1)) + DAG.getTargetLoweringInfo(), &Options, Depth+1)) return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, LegalOperations, Depth+1), @@ -539,7 +586,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, Op.getOperand(0)); case ISD::FSUB: // We can't turn -(A-B) into B-A when we honor signed zeros. - assert(DAG.getTarget().Options.UnsafeFPMath); + assert(Options.UnsafeFPMath); // fold (fneg (fsub 0, B)) -> B if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0))) @@ -552,12 +599,11 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, case ISD::FMUL: case ISD::FDIV: - assert(!DAG.getTarget().Options.HonorSignDependentRoundingFPMath()); + assert(!Options.HonorSignDependentRoundingFPMath()); // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) if (isNegatibleForFree(Op.getOperand(0), LegalOperations, - DAG.getTargetLoweringInfo(), - &DAG.getTarget().Options, Depth+1)) + DAG.getTargetLoweringInfo(), &Options, Depth+1)) return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, LegalOperations, Depth+1), @@ -582,7 +628,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, } } -// isSetCCEquivalent - Return true if this node is a setcc, or is a select_cc +// Return true if this node is a setcc, or is a select_cc // that selects between the target values used for true and false, making it // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to // the appropriate nodes based on the type of node we are checking. This @@ -601,15 +647,19 @@ bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, !TLI.isConstFalseVal(N.getOperand(3).getNode())) return false; + if (TLI.getBooleanContents(N.getValueType()) == + TargetLowering::UndefinedBooleanContent) + return false; + LHS = N.getOperand(0); RHS = N.getOperand(1); CC = N.getOperand(4); return true; } -// isOneUseSetCC - Return true if this is a SetCC-equivalent operation with only -// one use. If this is true, it allows the users to invert the operation for -// free when it is profitable to do so. +/// Return true if this is a SetCC-equivalent operation with only one use. +/// If this is true, it allows the users to invert the operation for free when +/// it is profitable to do so. bool DAGCombiner::isOneUseSetCC(SDValue N) const { SDValue N0, N1, N2; if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse()) @@ -617,7 +667,7 @@ bool DAGCombiner::isOneUseSetCC(SDValue N) const { return false; } -/// isConstantSplatVector - Returns true if N is a BUILD_VECTOR node whose +/// Returns true if N is a BUILD_VECTOR node whose /// elements are all the same constant or undefined. static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) { BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N); @@ -638,7 +688,7 @@ static SDNode *isConstantBuildVectorOrConstantInt(SDValue N) { if (isa<ConstantSDNode>(N)) return N.getNode(); BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N); - if(BV && BV->isConstant()) + if (BV && BV->isConstant()) return BV; return nullptr; } @@ -664,6 +714,23 @@ static ConstantSDNode *isConstOrConstSplat(SDValue N) { return nullptr; } +// \brief Returns the SDNode if it is a constant splat BuildVector or constant +// float. +static ConstantFPSDNode *isConstOrConstSplatFP(SDValue N) { + if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N)) + return CN; + + if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { + BitVector UndefElements; + ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements); + + if (CN && UndefElements.none()) + return CN; + } + + return nullptr; +} + SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, SDValue N0, SDValue N1) { EVT VT = N0.getValueType(); @@ -671,10 +738,9 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, if (SDNode *L = isConstantBuildVectorOrConstantInt(N0.getOperand(1))) { if (SDNode *R = isConstantBuildVectorOrConstantInt(N1)) { // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) - SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, L, R); - if (!OpNode.getNode()) - return SDValue(); - return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); + if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, L, R)) + return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); + return SDValue(); } if (N0.hasOneUse()) { // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one @@ -692,10 +758,9 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, if (SDNode *R = isConstantBuildVectorOrConstantInt(N1.getOperand(1))) { if (SDNode *L = isConstantBuildVectorOrConstantInt(N0)) { // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) - SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, R, L); - if (!OpNode.getNode()) - return SDValue(); - return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); + if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, R, L)) + return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); + return SDValue(); } if (N1.hasOneUse()) { // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one @@ -720,11 +785,12 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, N->dump(&DAG); dbgs() << "\nWith: "; To[0].getNode()->dump(&DAG); - dbgs() << " and " << NumTo-1 << " other values\n"; - for (unsigned i = 0, e = NumTo; i != e; ++i) - assert((!To[i].getNode() || - N->getValueType(i) == To[i].getValueType()) && - "Cannot combine value to value of different type!")); + dbgs() << " and " << NumTo-1 << " other values\n"); + for (unsigned i = 0, e = NumTo; i != e; ++i) + assert((!To[i].getNode() || + N->getValueType(i) == To[i].getValueType()) && + "Cannot combine value to value of different type!"); + WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesWith(N, To); if (AddTo) { @@ -740,14 +806,8 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, // Finally, if the node is now dead, remove it from the graph. The node // may not be dead if the replacement process recursively simplified to // something else needing this node. - if (N->use_empty()) { - // Nodes can be reintroduced into the worklist. Make sure we do not - // process a node that has been replaced. - removeFromWorklist(N); - - // Finally, since the node is now dead, remove it from the graph. - DAG.DeleteNode(N); - } + if (N->use_empty()) + deleteAndRecombine(N); return SDValue(N, 0); } @@ -765,22 +825,12 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { // Finally, if the node is now dead, remove it from the graph. The node // may not be dead if the replacement process recursively simplified to // something else needing this node. - if (TLO.Old.getNode()->use_empty()) { - removeFromWorklist(TLO.Old.getNode()); - - // If the operands of this node are only used by the node, they will now - // be dead. Make sure to visit them first to delete dead nodes early. - for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i) - if (TLO.Old.getNode()->getOperand(i).getNode()->hasOneUse()) - AddToWorklist(TLO.Old.getNode()->getOperand(i).getNode()); - - DAG.DeleteNode(TLO.Old.getNode()); - } + if (TLO.Old.getNode()->use_empty()) + deleteAndRecombine(TLO.Old.getNode()); } -/// SimplifyDemandedBits - Check the specified integer node value to see if -/// it can be simplified or if things it uses can be simplified by bit -/// propagation. If so, return true. +/// Check the specified integer node value to see if it can be simplified or if +/// things it uses can be simplified by bit propagation. If so, return true. bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); APInt KnownZero, KnownOne; @@ -815,8 +865,7 @@ void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc); DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1)); - removeFromWorklist(Load); - DAG.DeleteNode(Load); + deleteAndRecombine(Load); AddToWorklist(Trunc.getNode()); } @@ -826,8 +875,8 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) { EVT MemVT = LD->getMemoryVT(); ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) - ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD - : ISD::EXTLOAD) + ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD + : ISD::EXTLOAD) : LD->getExtensionType(); Replace = true; return DAG.getExtLoad(ExtType, dl, PVT, @@ -889,9 +938,9 @@ SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) { return DAG.getZeroExtendInReg(NewOp, dl, OldVT); } -/// PromoteIntBinOp - Promote the specified integer binary operation if the -/// target indicates it is beneficial. e.g. On x86, it's usually better to -/// promote i16 operations to i32 since i16 instructions are longer. +/// Promote the specified integer binary operation if the target indicates it is +/// beneficial. e.g. On x86, it's usually better to promote i16 operations to +/// i32 since i16 instructions are longer. SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { if (!LegalOperations) return SDValue(); @@ -947,9 +996,9 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { return SDValue(); } -/// PromoteIntShiftOp - Promote the specified integer shift operation if the -/// target indicates it is beneficial. e.g. On x86, it's usually better to -/// promote i16 operations to i32 since i16 instructions are longer. +/// Promote the specified integer shift operation if the target indicates it is +/// beneficial. e.g. On x86, it's usually better to promote i16 operations to +/// i32 since i16 instructions are longer. SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { if (!LegalOperations) return SDValue(); @@ -1048,8 +1097,8 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { LoadSDNode *LD = cast<LoadSDNode>(N); EVT MemVT = LD->getMemoryVT(); ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) - ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD - : ISD::EXTLOAD) + ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD + : ISD::EXTLOAD) : LD->getExtensionType(); SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT, LD->getChain(), LD->getBasePtr(), @@ -1064,14 +1113,42 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1)); - removeFromWorklist(N); - DAG.DeleteNode(N); + deleteAndRecombine(N); AddToWorklist(Result.getNode()); return true; } return false; } +/// \brief Recursively delete a node which has no uses and any operands for +/// which it is the only use. +/// +/// Note that this both deletes the nodes and removes them from the worklist. +/// It also adds any nodes who have had a user deleted to the worklist as they +/// may now have only one use and subject to other combines. +bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) { + if (!N->use_empty()) + return false; + + SmallSetVector<SDNode *, 16> Nodes; + Nodes.insert(N); + do { + N = Nodes.pop_back_val(); + if (!N) + continue; + + if (N->use_empty()) { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + Nodes.insert(N->getOperand(i).getNode()); + + removeFromWorklist(N); + DAG.DeleteNode(N); + } else { + AddToWorklist(N); + } + } while (!Nodes.empty()); + return true; +} //===----------------------------------------------------------------------===// // Main DAG Combiner implementation @@ -1083,6 +1160,13 @@ void DAGCombiner::Run(CombineLevel AtLevel) { LegalOperations = Level >= AfterLegalizeVectorOps; LegalTypes = Level >= AfterLegalizeTypes; + // Early exit if this basic block is in an optnone function. + AttributeSet FnAttrs = + DAG.getMachineFunction().getFunction()->getAttributes(); + if (FnAttrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::OptimizeNone)) + return; + // Add all the dag nodes to the worklist. for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), E = DAG.allnodes_end(); I != E; ++I) @@ -1093,34 +1177,52 @@ void DAGCombiner::Run(CombineLevel AtLevel) { // changes of the root. HandleSDNode Dummy(DAG.getRoot()); - // The root of the dag may dangle to deleted nodes until the dag combiner is - // done. Set it to null to avoid confusion. - DAG.setRoot(SDValue()); - // while the worklist isn't empty, find a node and // try and combine it. - while (!WorklistContents.empty()) { + while (!WorklistMap.empty()) { SDNode *N; - // The WorklistOrder holds the SDNodes in order, but it may contain - // duplicates. - // In order to avoid a linear scan, we use a set (O(log N)) to hold what the - // worklist *should* contain, and check the node we want to visit is should - // actually be visited. + // The Worklist holds the SDNodes in order, but it may contain null entries. do { - N = WorklistOrder.pop_back_val(); - } while (!WorklistContents.erase(N)); + N = Worklist.pop_back_val(); + } while (!N); + + bool GoodWorklistEntry = WorklistMap.erase(N); + (void)GoodWorklistEntry; + assert(GoodWorklistEntry && + "Found a worklist entry without a corresponding map entry!"); // If N has no uses, it is dead. Make sure to revisit all N's operands once // N is deleted from the DAG, since they too may now be dead or may have a // reduced number of uses, allowing other xforms. - if (N->use_empty()) { - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - AddToWorklist(N->getOperand(i).getNode()); - - DAG.DeleteNode(N); + if (recursivelyDeleteUnusedNodes(N)) continue; + + WorklistRemover DeadNodes(*this); + + // If this combine is running after legalizing the DAG, re-legalize any + // nodes pulled off the worklist. + if (Level == AfterLegalizeDAG) { + SmallSetVector<SDNode *, 16> UpdatedNodes; + bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes); + + for (SDNode *LN : UpdatedNodes) { + AddToWorklist(LN); + AddUsersToWorklist(LN); + } + if (!NIsValid) + continue; } + DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG)); + + // Add any operands of the new node which have not yet been combined to the + // worklist as well. Because the worklist uniques things already, this + // won't repeatedly process the same operand. + CombinedNodes.insert(N); + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (!CombinedNodes.count(N->getOperand(i).getNode())) + AddToWorklist(N->getOperand(i).getNode()); + SDValue RV = combine(N); if (!RV.getNode()) @@ -1139,15 +1241,11 @@ void DAGCombiner::Run(CombineLevel AtLevel) { RV.getNode()->getOpcode() != ISD::DELETED_NODE && "Node was deleted but visit returned new node!"); - DEBUG(dbgs() << "\nReplacing.3 "; - N->dump(&DAG); - dbgs() << "\nWith: "; - RV.getNode()->dump(&DAG); - dbgs() << '\n'); + DEBUG(dbgs() << " ... into: "; + RV.getNode()->dump(&DAG)); // Transfer debug value. DAG.TransferDbgValues(SDValue(N, 0), RV); - WorklistRemover DeadNodes(*this); if (N->getNumValues() == RV.getNode()->getNumValues()) DAG.ReplaceAllUsesWith(N, RV.getNode()); else { @@ -1161,23 +1259,11 @@ void DAGCombiner::Run(CombineLevel AtLevel) { AddToWorklist(RV.getNode()); AddUsersToWorklist(RV.getNode()); - // Add any uses of the old node to the worklist in case this node is the - // last one that uses them. They may become dead after this node is - // deleted. - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - AddToWorklist(N->getOperand(i).getNode()); - // Finally, if the node is now dead, remove it from the graph. The node // may not be dead if the replacement process recursively simplified to - // something else needing this node. - if (N->use_empty()) { - // Nodes can be reintroduced into the worklist. Make sure we do not - // process a node that has been replaced. - removeFromWorklist(N); - - // Finally, since the node is now dead, remove it from the graph. - DAG.DeleteNode(N); - } + // something else needing this node. This will also take care of adding any + // operands which have lost a user to the worklist. + recursivelyDeleteUnusedNodes(N); } // If the root changed (e.g. it was a dead load, update the root). @@ -1239,6 +1325,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::FMA: return visitFMA(N); case ISD::FDIV: return visitFDIV(N); case ISD::FREM: return visitFREM(N); + case ISD::FSQRT: return visitFSQRT(N); case ISD::FCOPYSIGN: return visitFCOPYSIGN(N); case ISD::SINT_TO_FP: return visitSINT_TO_FP(N); case ISD::UINT_TO_FP: return visitUINT_TO_FP(N); @@ -1250,6 +1337,8 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::FNEG: return visitFNEG(N); case ISD::FABS: return visitFABS(N); case ISD::FFLOOR: return visitFFLOOR(N); + case ISD::FMINNUM: return visitFMINNUM(N); + case ISD::FMAXNUM: return visitFMAXNUM(N); case ISD::FCEIL: return visitFCEIL(N); case ISD::FTRUNC: return visitFTRUNC(N); case ISD::BRCOND: return visitBRCOND(N); @@ -1263,6 +1352,8 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N); + case ISD::MLOAD: return visitMLOAD(N); + case ISD::MSTORE: return visitMSTORE(N); } return SDValue(); } @@ -1342,8 +1433,8 @@ SDValue DAGCombiner::combine(SDNode *N) { return RV; } -/// getInputChainForNode - Given a node, return its input chain if it has one, -/// otherwise return a null sd operand. +/// Given a node, return its input chain if it has one, otherwise return a null +/// sd operand. static SDValue getInputChainForNode(SDNode *N) { if (unsigned NumOps = N->getNumOperands()) { if (N->getOperand(0).getValueType() == MVT::Other) @@ -1405,7 +1496,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { default: // Only add if it isn't already in the list. - if (SeenOps.insert(Op.getNode())) + if (SeenOps.insert(Op.getNode()).second) Ops.push_back(Op); else Changed = true; @@ -1446,33 +1537,10 @@ SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i)); } while (!N->use_empty()); - removeFromWorklist(N); - DAG.DeleteNode(N); + deleteAndRecombine(N); return SDValue(N, 0); // Return N so it doesn't get rechecked! } -static -SDValue combineShlAddConstant(SDLoc DL, SDValue N0, SDValue N1, - SelectionDAG &DAG) { - EVT VT = N0.getValueType(); - SDValue N00 = N0.getOperand(0); - SDValue N01 = N0.getOperand(1); - ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N01); - - if (N01C && N00.getOpcode() == ISD::ADD && N00.getNode()->hasOneUse() && - isa<ConstantSDNode>(N00.getOperand(1))) { - // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), ) - N0 = DAG.getNode(ISD::ADD, SDLoc(N0), VT, - DAG.getNode(ISD::SHL, SDLoc(N00), VT, - N00.getOperand(0), N01), - DAG.getNode(ISD::SHL, SDLoc(N01), VT, - N00.getOperand(1), N01)); - return DAG.getNode(ISD::ADD, DL, VT, N0, N1); - } - - return SDValue(); -} - SDValue DAGCombiner::visitADD(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -1589,16 +1657,6 @@ SDValue DAGCombiner::visitADD(SDNode *N) { } } - // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), ) - if (N0.getOpcode() == ISD::SHL && N0.getNode()->hasOneUse()) { - SDValue Result = combineShlAddConstant(SDLoc(N), N0, N1, DAG); - if (Result.getNode()) return Result; - } - if (N1.getOpcode() == ISD::SHL && N1.getNode()->hasOneUse()) { - SDValue Result = combineShlAddConstant(SDLoc(N), N1, N0, DAG); - if (Result.getNode()) return Result; - } - // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB) @@ -1642,6 +1700,17 @@ SDValue DAGCombiner::visitADD(SDNode *N) { return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt); } + // add X, (sextinreg Y i1) -> sub X, (and Y 1) + if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) { + VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1)); + if (TN->getVT() == MVT::i1) { + SDLoc DL(N); + SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), + DAG.getConstant(1, VT)); + return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt); + } + } + return SDValue(); } @@ -1807,6 +1876,17 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { VT); } + // sub X, (sextinreg Y i1) -> add X, (and Y 1) + if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) { + VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1)); + if (TN->getVT() == MVT::i1) { + SDLoc DL(N); + SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), + DAG.getConstant(1, VT)); + return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt); + } + } + return SDValue(); } @@ -2011,9 +2091,14 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { (-N1C->getAPIntValue()).isPowerOf2())) { // If dividing by powers of two is cheap, then don't perform the following // fold. - if (TLI.isPow2DivCheap()) + if (TLI.isPow2SDivCheap()) return SDValue(); + // Target-specific implementation of sdiv x, pow2. + SDValue Res = BuildSDIVPow2(N); + if (Res.getNode()) + return Res; + unsigned lg2 = N1C->getAPIntValue().countTrailingZeros(); // Splat the sign bit into the register @@ -2281,10 +2366,9 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { return SDValue(); } -/// SimplifyNodeWithTwoResults - Perform optimizations common to nodes that -/// compute two values. LoOp and HiOp give the opcodes for the two computations -/// that are being performed. Return true if a simplification was made. -/// +/// Perform optimizations common to nodes that compute two values. LoOp and HiOp +/// give the opcodes for the two computations that are being performed. Return +/// true if a simplification was made. SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, unsigned HiOp) { // If the high half is not needed, just compute the low half. @@ -2292,8 +2376,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, if (!HiExists && (!LegalOperations || TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) { - SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), - ArrayRef<SDUse>(N->op_begin(), N->op_end())); + SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops()); return CombineTo(N, Res, Res); } @@ -2302,8 +2385,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, if (!LoExists && (!LegalOperations || TLI.isOperationLegal(HiOp, N->getValueType(1)))) { - SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), - ArrayRef<SDUse>(N->op_begin(), N->op_end())); + SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops()); return CombineTo(N, Res, Res); } @@ -2313,8 +2395,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, // If the two computed results can be simplified separately, separate them. if (LoExists) { - SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), - ArrayRef<SDUse>(N->op_begin(), N->op_end())); + SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops()); AddToWorklist(Lo.getNode()); SDValue LoOpt = combine(Lo.getNode()); if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() && @@ -2324,8 +2405,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, } if (HiExists) { - SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), - ArrayRef<SDUse>(N->op_begin(), N->op_end())); + SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops()); AddToWorklist(Hi.getNode()); SDValue HiOpt = combine(Hi.getNode()); if (HiOpt.getNode() && HiOpt != Hi && @@ -2431,8 +2511,8 @@ SDValue DAGCombiner::visitUDIVREM(SDNode *N) { return SDValue(); } -/// SimplifyBinOpWithSameOpcodeHands - If this is a binary operator with -/// two operands of the same opcode, try to simplify it. +/// If this is a binary operator with two operands of the same opcode, try to +/// simplify it. SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); EVT VT = N0.getValueType(); @@ -2445,6 +2525,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { // fold (OP (zext x), (zext y)) -> (zext (OP x, y)) // fold (OP (sext x), (sext y)) -> (sext (OP x, y)) // fold (OP (aext x), (aext y)) -> (aext (OP x, y)) + // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y)) // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free) // // do not sink logical op inside of a vector extend, since it may combine @@ -2452,6 +2533,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { EVT Op0VT = N0.getOperand(0).getValueType(); if ((N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND || + N0.getOpcode() == ISD::BSWAP || // Avoid infinite looping with PromoteIntBinOp. (N0.getOpcode() == ISD::ANY_EXTEND && (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) || @@ -2598,9 +2680,17 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // fold (and x, 0) -> 0, vector edition if (ISD::isBuildVectorAllZeros(N0.getNode())) - return N0; + // do not return N0, because undef node may exist in N0 + return DAG.getConstant( + APInt::getNullValue( + N0.getValueType().getScalarType().getSizeInBits()), + N0.getValueType()); if (ISD::isBuildVectorAllZeros(N1.getNode())) - return N1; + // do not return N1, because undef node may exist in N1 + return DAG.getConstant( + APInt::getNullValue( + N1.getValueType().getScalarType().getSizeInBits()), + N1.getValueType()); // fold (and x, -1) -> x, vector edition if (ISD::isBuildVectorAllOnes(N0.getNode())) @@ -2708,6 +2798,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // actually legal and isn't going to get expanded, else this is a false // optimisation. bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD, + Load->getValueType(0), Load->getMemoryVT()); // Resize the constant to the same size as the original memory access before @@ -2834,7 +2925,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, BitWidth - MemVT.getScalarType().getSizeInBits())) && ((!LegalOperations && !LN0->isVolatile()) || - TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { + TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(), LN0->getBasePtr(), MemVT, LN0->getMemOperand()); @@ -2854,7 +2945,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, BitWidth - MemVT.getScalarType().getSizeInBits())) && ((!LegalOperations && !LN0->isVolatile()) || - TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { + TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(), LN0->getBasePtr(), MemVT, LN0->getMemOperand()); @@ -2880,10 +2971,11 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){ EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); EVT LoadedVT = LN0->getMemoryVT(); + EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; if (ExtVT == LoadedVT && - (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) { - EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; + (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, + ExtVT))) { SDValue NewLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, @@ -2898,7 +2990,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // Do not generate loads of non-round integer types since these can // be expensive (and would be wrong if the type is not byte sized). if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() && - (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) { + (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, + ExtVT))) { EVT PtrType = LN0->getOperand(1).getValueType(); unsigned Alignment = LN0->getAlignment(); @@ -2918,13 +3011,12 @@ SDValue DAGCombiner::visitAND(SDNode *N) { AddToWorklist(NewPtr.getNode()); - EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; SDValue Load = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, LN0->getChain(), NewPtr, LN0->getPointerInfo(), ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), - Alignment, LN0->getTBAAInfo()); + LN0->isInvariant(), Alignment, LN0->getAAInfo()); AddToWorklist(N); CombineTo(LN0, Load, Load.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -2971,8 +3063,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return SDValue(); } -/// MatchBSwapHWord - Match (a >> 8) | (a << 8) as (bswap a) >> 16 -/// +/// Match (a >> 8) | (a << 8) as (bswap a) >> 16. SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits) { if (!LegalOperations) @@ -3077,10 +3168,13 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, return Res; } -/// isBSwapHWordElement - Return true if the specified node is an element -/// that makes up a 32-bit packed halfword byteswap. i.e. -/// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8) -static bool isBSwapHWordElement(SDValue N, SmallVectorImpl<SDNode *> &Parts) { +/// Return true if the specified node is an element that makes up a 32-bit +/// packed halfword byteswap. +/// ((x & 0x000000ff) << 8) | +/// ((x & 0x0000ff00) >> 8) | +/// ((x & 0x00ff0000) << 8) | +/// ((x & 0xff000000) >> 8) +static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) { if (!N.getNode()->hasOneUse()) return false; @@ -3147,8 +3241,11 @@ static bool isBSwapHWordElement(SDValue N, SmallVectorImpl<SDNode *> &Parts) { return true; } -/// MatchBSwapHWord - Match a 32-bit packed halfword bswap. That is -/// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8) +/// Match a 32-bit packed halfword bswap. That is +/// ((x & 0x000000ff) << 8) | +/// ((x & 0x0000ff00) >> 8) | +/// ((x & 0x00ff0000) << 8) | +/// ((x & 0xff000000) >> 8) /// => (rotl (bswap x), 16) SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { if (!LegalOperations) @@ -3160,7 +3257,6 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { if (!TLI.isOperationLegal(ISD::BSWAP, VT)) return SDValue(); - SmallVector<SDNode*,4> Parts(4, (SDNode*)nullptr); // Look for either // (or (or (and), (and)), (or (and), (and))) // (or (or (or (and), (and)), (and)), (and)) @@ -3168,6 +3264,7 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { return SDValue(); SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); + SDNode *Parts[4] = {}; if (N1.getOpcode() == ISD::OR && N00.getNumOperands() == 2 && N01.getNumOperands() == 2) { @@ -3241,9 +3338,17 @@ SDValue DAGCombiner::visitOR(SDNode *N) { // fold (or x, -1) -> -1, vector edition if (ISD::isBuildVectorAllOnes(N0.getNode())) - return N0; + // do not return N0, because undef node may exist in N0 + return DAG.getConstant( + APInt::getAllOnesValue( + N0.getValueType().getScalarType().getSizeInBits()), + N0.getValueType()); if (ISD::isBuildVectorAllOnes(N1.getNode())) - return N1; + // do not return N1, because undef node may exist in N1 + return DAG.getConstant( + APInt::getAllOnesValue( + N1.getValueType().getScalarType().getSizeInBits()), + N1.getValueType()); // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1) // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf B, A, Mask2) @@ -3342,12 +3447,11 @@ SDValue DAGCombiner::visitOR(SDNode *N) { isa<ConstantSDNode>(N0.getOperand(1))) { ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1)); if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) { - SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1); - if (!COR.getNode()) - return SDValue(); - return DAG.getNode(ISD::AND, SDLoc(N), VT, - DAG.getNode(ISD::OR, SDLoc(N0), VT, - N0.getOperand(0), N1), COR); + if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1)) + return DAG.getNode( + ISD::AND, SDLoc(N), VT, + DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR); + return SDValue(); } } // fold (or (setcc x), (setcc y)) -> (setcc (or x, y)) @@ -3435,7 +3539,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return SDValue(); } -/// MatchRotateHalf - Match "(X shl/srl V1) & V2" where V2 may not be present. +/// Match "(X shl/srl V1) & V2" where V2 may not be present. static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { if (Op.getOpcode() == ISD::AND) { if (isa<ConstantSDNode>(Op.getOperand(1))) { @@ -3732,7 +3836,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { return RXOR; // fold !(x cc y) -> (x !cc y) - if (N1C && N1C->getAPIntValue() == 1 && isSetCCEquivalent(N0, LHS, RHS, CC)) { + if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) { bool isInt = LHS.getValueType().isInteger(); ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), isInt); @@ -3825,8 +3929,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { return SDValue(); } -/// visitShiftByConstant - Handle transforms common to the three shifts, when -/// the shift amount is a constant. +/// Handle transforms common to the three shifts, when the shift amount is a +/// constant. SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { // We can't and shouldn't fold opaque constants. if (Amt->isOpaque()) @@ -3963,8 +4067,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC && TLI.getBooleanContents(N00.getOperand(0).getValueType()) == TargetLowering::ZeroOrNegativeOneBooleanContent) { - SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, VT, N01CV, N1CV); - if (C.getNode()) + if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, VT, N01CV, N1CV)) return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C); } } else { @@ -4098,6 +4201,18 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { HiBitsMask); } + // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) + // Variant of version done on multiply, except mul by a power of 2 is turned + // into a shift. + APInt Val; + if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && + (isa<ConstantSDNode>(N0.getOperand(1)) || + isConstantSplatVector(N0.getOperand(1).getNode(), Val))) { + SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1); + SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1); + return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1); + } + if (N1C) { SDValue NewSHL = visitShiftByConstant(N, N1C); if (NewSHL.getNode()) @@ -4498,6 +4613,43 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) { return SDValue(); } + +/// \brief Generate Min/Max node +static SDValue combineMinNumMaxNum(SDLoc DL, EVT VT, SDValue LHS, SDValue RHS, + SDValue True, SDValue False, + ISD::CondCode CC, const TargetLowering &TLI, + SelectionDAG &DAG) { + if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True)) + return SDValue(); + + switch (CC) { + case ISD::SETOLT: + case ISD::SETOLE: + case ISD::SETLT: + case ISD::SETLE: + case ISD::SETULT: + case ISD::SETULE: { + unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM; + if (TLI.isOperationLegal(Opcode, VT)) + return DAG.getNode(Opcode, DL, VT, LHS, RHS); + return SDValue(); + } + case ISD::SETOGT: + case ISD::SETOGE: + case ISD::SETGT: + case ISD::SETGE: + case ISD::SETUGT: + case ISD::SETUGE: { + unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM; + if (TLI.isOperationLegal(Opcode, VT)) + return DAG.getNode(Opcode, DL, VT, LHS, RHS); + return SDValue(); + } + default: + return SDValue(); + } +} + SDValue DAGCombiner::visitSELECT(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -4577,9 +4729,31 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // fold selects based on a setcc into other things, such as min/max/abs if (N0.getOpcode() == ISD::SETCC) { + // select x, y (fcmp lt x, y) -> fminnum x, y + // select x, y (fcmp gt x, y) -> fmaxnum x, y + // + // This is OK if we don't care about what happens if either operand is a + // NaN. + // + + // FIXME: Instead of testing for UnsafeFPMath, this should be checking for + // no signed zeros as well as no nans. + const TargetOptions &Options = DAG.getTarget().Options; + if (Options.UnsafeFPMath && + VT.isFloatingPoint() && N0.hasOneUse() && + DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) { + ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); + + SDValue FMinMax = + combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), + N1, N2, CC, TLI, DAG); + if (FMinMax) + return FMinMax; + } + if ((!LegalOperations && TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) || - TLI.isOperationLegal(ISD::SELECT_CC, VT)) + TLI.isOperationLegal(ISD::SELECT_CC, VT)) return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), N1, N2, N0.getOperand(2)); @@ -4613,12 +4787,17 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { SDValue Cond = N->getOperand(0); SDValue LHS = N->getOperand(1); SDValue RHS = N->getOperand(2); - MVT VT = N->getSimpleValueType(0); + EVT VT = N->getValueType(0); int NumElems = VT.getVectorNumElements(); assert(LHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getOpcode() == ISD::CONCAT_VECTORS && Cond.getOpcode() == ISD::BUILD_VECTOR); + // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about + // binary ones here. + if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2) + return SDValue(); + // We're sure we have an even number of elements due to the // concat_vectors we have as arguments to vselect. // Skip BV elements until we find one that's not an UNDEF @@ -4656,6 +4835,162 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1)); } +SDValue DAGCombiner::visitMSTORE(SDNode *N) { + + if (Level >= AfterLegalizeTypes) + return SDValue(); + + MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N); + SDValue Mask = MST->getMask(); + SDValue Data = MST->getData(); + SDLoc DL(N); + + // If the MSTORE data type requires splitting and the mask is provided by a + // SETCC, then split both nodes and its operands before legalization. This + // prevents the type legalizer from unrolling SETCC into scalar comparisons + // and enables future optimizations (e.g. min/max pattern matching on X86). + if (Mask.getOpcode() == ISD::SETCC) { + + // Check if any splitting is required. + if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) != + TargetLowering::TypeSplitVector) + return SDValue(); + + SDValue MaskLo, MaskHi, Lo, Hi; + std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); + + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MST->getValueType(0)); + + SDValue Chain = MST->getChain(); + SDValue Ptr = MST->getBasePtr(); + + EVT MemoryVT = MST->getMemoryVT(); + unsigned Alignment = MST->getOriginalAlignment(); + + // if Alignment is equal to the vector size, + // take the half of it for the second part + unsigned SecondHalfAlignment = + (Alignment == Data->getValueType(0).getSizeInBits()/8) ? + Alignment/2 : Alignment; + + EVT LoMemVT, HiMemVT; + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + + SDValue DataLo, DataHi; + std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); + + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(MST->getPointerInfo(), + MachineMemOperand::MOStore, LoMemVT.getStoreSize(), + Alignment, MST->getAAInfo(), MST->getRanges()); + + Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, MMO); + + unsigned IncrementSize = LoMemVT.getSizeInBits()/8; + Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, Ptr.getValueType())); + + MMO = DAG.getMachineFunction(). + getMachineMemOperand(MST->getPointerInfo(), + MachineMemOperand::MOStore, HiMemVT.getStoreSize(), + SecondHalfAlignment, MST->getAAInfo(), + MST->getRanges()); + + Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, MMO); + + AddToWorklist(Lo.getNode()); + AddToWorklist(Hi.getNode()); + + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); + } + return SDValue(); +} + +SDValue DAGCombiner::visitMLOAD(SDNode *N) { + + if (Level >= AfterLegalizeTypes) + return SDValue(); + + MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N); + SDValue Mask = MLD->getMask(); + SDLoc DL(N); + + // If the MLOAD result requires splitting and the mask is provided by a + // SETCC, then split both nodes and its operands before legalization. This + // prevents the type legalizer from unrolling SETCC into scalar comparisons + // and enables future optimizations (e.g. min/max pattern matching on X86). + + if (Mask.getOpcode() == ISD::SETCC) { + EVT VT = N->getValueType(0); + + // Check if any splitting is required. + if (TLI.getTypeAction(*DAG.getContext(), VT) != + TargetLowering::TypeSplitVector) + return SDValue(); + + SDValue MaskLo, MaskHi, Lo, Hi; + std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); + + SDValue Src0 = MLD->getSrc0(); + SDValue Src0Lo, Src0Hi; + std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL); + + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0)); + + SDValue Chain = MLD->getChain(); + SDValue Ptr = MLD->getBasePtr(); + EVT MemoryVT = MLD->getMemoryVT(); + unsigned Alignment = MLD->getOriginalAlignment(); + + // if Alignment is equal to the vector size, + // take the half of it for the second part + unsigned SecondHalfAlignment = + (Alignment == MLD->getValueType(0).getSizeInBits()/8) ? + Alignment/2 : Alignment; + + EVT LoMemVT, HiMemVT; + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(MLD->getPointerInfo(), + MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), + Alignment, MLD->getAAInfo(), MLD->getRanges()); + + Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, MMO); + + unsigned IncrementSize = LoMemVT.getSizeInBits()/8; + Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, Ptr.getValueType())); + + MMO = DAG.getMachineFunction(). + getMachineMemOperand(MLD->getPointerInfo(), + MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), + SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges()); + + Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, MMO); + + AddToWorklist(Lo.getNode()); + AddToWorklist(Hi.getNode()); + + // Build a factor node to remember that this load is independent of the + // other one. + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain); + + SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); + + SDValue RetOps[] = { LoadRes, Chain }; + return DAG.getMergeValues(RetOps, DL); + } + return SDValue(); +} + SDValue DAGCombiner::visitVSELECT(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -4765,13 +5100,16 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { return N2; // cond always true -> true val else return N3; // cond always false -> false val - } - - // Fold to a simpler select_cc - if (SCC.getOpcode() == ISD::SETCC) + } else if (SCC->getOpcode() == ISD::UNDEF) { + // When the condition is UNDEF, just return the first operand. This is + // coherent the DAG creation, no setcc node is created in this case + return N2; + } else if (SCC.getOpcode() == ISD::SETCC) { + // Fold to a simpler select_cc return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0), SCC.getOperand(1), N2, N3, SCC.getOperand(2)); + } } // If we can fold this based on the true/false value, do so. @@ -5004,7 +5342,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && ISD::isUNINDEXEDLoad(N0.getNode()) && ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || - TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) { + TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()))) { bool DoXform = true; SmallVector<SDNode*, 4> SetCCs; if (!N0.hasOneUse()) @@ -5032,7 +5370,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); EVT MemVT = LN0->getMemoryVT(); if ((!LegalOperations && !LN0->isVolatile()) || - TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) { + TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT)) { SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, LN0->getChain(), LN0->getBasePtr(), MemVT, @@ -5052,7 +5390,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { N0.getOpcode() == ISD::XOR) && isa<LoadSDNode>(N0.getOperand(0)) && N0.getOperand(1).getOpcode() == ISD::Constant && - TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()) && + TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()) && (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); if (LN0->getExtensionType() != ISD::ZEXTLOAD && LN0->isUnindexed()) { @@ -5131,14 +5469,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { if (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, SetCCVT)) { SDLoc DL(N); ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); - SDValue SetCC = DAG.getSetCC(DL, - SetCCVT, + SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N0.getOperand(0), N0.getOperand(1), CC); - EVT SelectVT = getSetCCResultType(VT); - return DAG.getSelect(DL, VT, - DAG.getSExtOrTrunc(SetCC, DL, SelectVT), + return DAG.getSelect(DL, VT, SetCC, NegOne, DAG.getConstant(0, VT)); - } } } @@ -5298,7 +5632,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && ISD::isUNINDEXEDLoad(N0.getNode()) && ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || - TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) { + TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()))) { bool DoXform = true; SmallVector<SDNode*, 4> SetCCs; if (!N0.hasOneUse()) @@ -5326,7 +5660,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { N0.getOpcode() == ISD::XOR) && isa<LoadSDNode>(N0.getOperand(0)) && N0.getOperand(1).getOpcode() == ISD::Constant && - TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()) && + TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()) && (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) { @@ -5363,7 +5697,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); EVT MemVT = LN0->getMemoryVT(); if ((!LegalOperations && !LN0->isVolatile()) || - TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) { + TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, LN0->getChain(), LN0->getBasePtr(), MemVT, @@ -5525,7 +5859,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // scalars. if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && ISD::isUNINDEXEDLoad(N0.getNode()) && - TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType())) { + TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) { bool DoXform = true; SmallVector<SDNode*, 4> SetCCs; if (!N0.hasOneUse()) @@ -5555,7 +5889,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); ISD::LoadExtType ExtType = LN0->getExtensionType(); EVT MemVT = LN0->getMemoryVT(); - if (!LegalOperations || TLI.isLoadExtLegal(ExtType, MemVT)) { + if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) { SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N), VT, LN0->getChain(), LN0->getBasePtr(), MemVT, LN0->getMemOperand()); @@ -5610,9 +5944,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { return SDValue(); } -/// GetDemandedBits - See if the specified operand can be simplified with the -/// knowledge that only the bits specified by Mask are used. If so, return the -/// simpler operand, otherwise return a null SDValue. +/// See if the specified operand can be simplified with the knowledge that only +/// the bits specified by Mask are used. If so, return the simpler operand, +/// otherwise return a null SDValue. SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { switch (V.getOpcode()) { default: break; @@ -5653,11 +5987,11 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { return SDValue(); } -/// ReduceLoadWidth - If the result of a wider load is shifted to right of N -/// bits and then truncated to a narrower type and where N is a multiple -/// of number of bits of the narrower type, transform it to a narrower load -/// from address + N / num of bits of new type. If the result is to be -/// extended, also fold the extension to form a extending load. +/// If the result of a wider load is shifted to right of N bits and then +/// truncated to a narrower type and where N is a multiple of number of bits of +/// the narrower type, transform it to a narrower load from address + N / num of +/// bits of new type. If the result is to be extended, also fold the extension +/// to form a extending load. SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { unsigned Opc = N->getOpcode(); @@ -5684,7 +6018,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { ExtVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() - N01->getZExtValue()); } - if (LegalOperations && !TLI.isLoadExtLegal(ExtType, ExtVT)) + if (LegalOperations && !TLI.isLoadExtLegal(ExtType, VT, ExtVT)) return SDValue(); unsigned EVTBits = ExtVT.getSizeInBits(); @@ -5763,6 +6097,9 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt) return SDValue(); + if (!TLI.shouldReduceLoadWidth(LN0, ExtType, ExtVT)) + return SDValue(); + EVT PtrType = N0.getOperand(1).getValueType(); if (PtrType == MVT::Untyped || PtrType.isExtended()) @@ -5789,12 +6126,12 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), LN0->isVolatile(), LN0->isNonTemporal(), - LN0->isInvariant(), NewAlign, LN0->getTBAAInfo()); + LN0->isInvariant(), NewAlign, LN0->getAAInfo()); else Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(),NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), - NewAlign, LN0->getTBAAInfo()); + LN0->isInvariant(), NewAlign, LN0->getAAInfo()); // Replace the old load's chain with the new load's chain. WorklistRemover DeadNodes(*this); @@ -5888,7 +6225,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { ISD::isUNINDEXEDLoad(N0.getNode()) && EVT == cast<LoadSDNode>(N0)->getMemoryVT() && ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || - TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { + TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, LN0->getChain(), @@ -5904,7 +6241,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { N0.hasOneUse() && EVT == cast<LoadSDNode>(N0)->getMemoryVT() && ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || - TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { + TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, LN0->getChain(), @@ -6152,7 +6489,7 @@ static SDNode *getBuildPairElt(SDNode *N, unsigned i) { return Elt.getOperand(Elt.getResNo()).getNode(); } -/// CombineConsecutiveLoads - build_pair (load, load) -> load +/// build_pair (load, load) -> load /// if load locations are consecutive. SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { assert(N->getOpcode() == ISD::BUILD_PAIR); @@ -6218,7 +6555,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // Ideally this won't happen very often, because instcombine // and the earlier dagcombine runs (where illegal nodes are // permitted) should have folded most of them already. - DAG.DeleteNode(Res.getNode()); + deleteAndRecombine(Res.getNode()); } } @@ -6247,12 +6584,8 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { LN0->getBasePtr(), LN0->getPointerInfo(), LN0->isVolatile(), LN0->isNonTemporal(), LN0->isInvariant(), OrigAlign, - LN0->getTBAAInfo()); - AddToWorklist(N); - CombineTo(N0.getNode(), - DAG.getNode(ISD::BITCAST, SDLoc(N0), - N0.getValueType(), Load), - Load.getValue(1)); + LN0->getAAInfo()); + DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); return Load; } } @@ -6337,9 +6670,8 @@ SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) { return CombineConsecutiveLoads(N, VT); } -/// ConstantFoldBITCASTofBUILD_VECTOR - We know that BV is a build_vector -/// node with Constant, ConstantFP or Undef operands. DstEltVT indicates the -/// destination element value type. +/// We know that BV is a build_vector node with Constant, ConstantFP or Undef +/// operands. DstEltVT indicates the destination element value type. SDValue DAGCombiner:: ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { EVT SrcEltVT = BV->getValueType(0).getVectorElementType(); @@ -6383,7 +6715,6 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { if (SrcEltVT.isFloatingPoint()) { // Convert the input float vector to a int vector where the elements are the // same sizes. - assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!"); EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits()); BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode(); SrcEltVT = IntVT; @@ -6392,7 +6723,6 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { // Now we know the input is an integer vector. If the output is a FP type, // convert to integer first, then to FP of the right size. if (DstEltVT.isFloatingPoint()) { - assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!"); EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits()); SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode(); @@ -6475,6 +6805,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); EVT VT = N->getValueType(0); + const TargetOptions &Options = DAG.getTarget().Options; // fold vector ops if (VT.isVector()) { @@ -6485,195 +6816,197 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // fold (fadd c1, c2) -> c1 + c2 if (N0CFP && N1CFP) return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N1); + // canonicalize constant to RHS if (N0CFP && !N1CFP) return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N0); - // fold (fadd A, 0) -> A - if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && - N1CFP->getValueAPF().isZero()) - return N0; + // fold (fadd A, (fneg B)) -> (fsub A, B) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && - isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2) + isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2) return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0, GetNegatedExpression(N1, DAG, LegalOperations)); + // fold (fadd (fneg A), B) -> (fsub B, A) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && - isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2) + isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2) return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N1, GetNegatedExpression(N0, DAG, LegalOperations)); - // If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) - if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && - N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() && - isa<ConstantFPSDNode>(N0.getOperand(1))) - return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0.getOperand(0), - DAG.getNode(ISD::FADD, SDLoc(N), VT, - N0.getOperand(1), N1)); - - // No FP constant should be created after legalization as Instruction - // Selection pass has hard time in dealing with FP constant. - // - // We don't need test this condition for transformation like following, as - // the DAG being transformed implies it is legal to take FP constant as - // operand. - // - // (fadd (fmul c, x), x) -> (fmul c+1, x) - // - bool AllowNewFpConst = (Level < AfterLegalizeDAG); - - // If allow, fold (fadd (fneg x), x) -> 0.0 - if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath && - N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) - return DAG.getConstantFP(0.0, VT); + // If 'unsafe math' is enabled, fold lots of things. + if (Options.UnsafeFPMath) { + // No FP constant should be created after legalization as Instruction + // Selection pass has a hard time dealing with FP constants. + bool AllowNewConst = (Level < AfterLegalizeDAG); - // If allow, fold (fadd x, (fneg x)) -> 0.0 - if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath && - N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) - return DAG.getConstantFP(0.0, VT); - - // In unsafe math mode, we can fold chains of FADD's of the same value - // into multiplications. This transform is not safe in general because - // we are reducing the number of rounding steps. - if (DAG.getTarget().Options.UnsafeFPMath && - TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && - !N0CFP && !N1CFP) { - if (N0.getOpcode() == ISD::FMUL) { - ConstantFPSDNode *CFP00 = dyn_cast<ConstantFPSDNode>(N0.getOperand(0)); - ConstantFPSDNode *CFP01 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); - - // (fadd (fmul c, x), x) -> (fmul x, c+1) - if (CFP00 && !CFP01 && N0.getOperand(1) == N1) { - SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, - SDValue(CFP00, 0), - DAG.getConstantFP(1.0, VT)); - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N1, NewCFP); - } + // fold (fadd A, 0) -> A + if (N1CFP && N1CFP->getValueAPF().isZero()) + return N0; - // (fadd (fmul x, c), x) -> (fmul x, c+1) - if (CFP01 && !CFP00 && N0.getOperand(0) == N1) { - SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, - SDValue(CFP01, 0), - DAG.getConstantFP(1.0, VT)); - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N1, NewCFP); - } + // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) + if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() && + isa<ConstantFPSDNode>(N0.getOperand(1))) + return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0.getOperand(0), + DAG.getNode(ISD::FADD, SDLoc(N), VT, + N0.getOperand(1), N1)); + + // If allowed, fold (fadd (fneg x), x) -> 0.0 + if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) + return DAG.getConstantFP(0.0, VT); + + // If allowed, fold (fadd x, (fneg x)) -> 0.0 + if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) + return DAG.getConstantFP(0.0, VT); + + // We can fold chains of FADD's of the same value into multiplications. + // This transform is not safe in general because we are reducing the number + // of rounding steps. + if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) { + if (N0.getOpcode() == ISD::FMUL) { + ConstantFPSDNode *CFP00 = dyn_cast<ConstantFPSDNode>(N0.getOperand(0)); + ConstantFPSDNode *CFP01 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); + + // (fadd (fmul x, c), x) -> (fmul x, c+1) + if (CFP01 && !CFP00 && N0.getOperand(0) == N1) { + SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, + SDValue(CFP01, 0), + DAG.getConstantFP(1.0, VT)); + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, NewCFP); + } - // (fadd (fmul c, x), (fadd x, x)) -> (fmul x, c+2) - if (CFP00 && !CFP01 && N1.getOpcode() == ISD::FADD && - N1.getOperand(0) == N1.getOperand(1) && - N0.getOperand(1) == N1.getOperand(0)) { - SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, - SDValue(CFP00, 0), - DAG.getConstantFP(2.0, VT)); - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N0.getOperand(1), NewCFP); + // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2) + if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD && + N1.getOperand(0) == N1.getOperand(1) && + N0.getOperand(0) == N1.getOperand(0)) { + SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, + SDValue(CFP01, 0), + DAG.getConstantFP(2.0, VT)); + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, + N0.getOperand(0), NewCFP); + } } - // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2) - if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD && - N1.getOperand(0) == N1.getOperand(1) && - N0.getOperand(0) == N1.getOperand(0)) { - SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, - SDValue(CFP01, 0), - DAG.getConstantFP(2.0, VT)); - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N0.getOperand(0), NewCFP); - } - } + if (N1.getOpcode() == ISD::FMUL) { + ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0)); + ConstantFPSDNode *CFP11 = dyn_cast<ConstantFPSDNode>(N1.getOperand(1)); - if (N1.getOpcode() == ISD::FMUL) { - ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0)); - ConstantFPSDNode *CFP11 = dyn_cast<ConstantFPSDNode>(N1.getOperand(1)); + // (fadd x, (fmul x, c)) -> (fmul x, c+1) + if (CFP11 && !CFP10 && N1.getOperand(0) == N0) { + SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, + SDValue(CFP11, 0), + DAG.getConstantFP(1.0, VT)); + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, NewCFP); + } - // (fadd x, (fmul c, x)) -> (fmul x, c+1) - if (CFP10 && !CFP11 && N1.getOperand(1) == N0) { - SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, - SDValue(CFP10, 0), - DAG.getConstantFP(1.0, VT)); - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N0, NewCFP); + // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2) + if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD && + N0.getOperand(0) == N0.getOperand(1) && + N1.getOperand(0) == N0.getOperand(0)) { + SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, + SDValue(CFP11, 0), + DAG.getConstantFP(2.0, VT)); + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1.getOperand(0), NewCFP); + } } - // (fadd x, (fmul x, c)) -> (fmul x, c+1) - if (CFP11 && !CFP10 && N1.getOperand(0) == N0) { - SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, - SDValue(CFP11, 0), - DAG.getConstantFP(1.0, VT)); - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N0, NewCFP); + if (N0.getOpcode() == ISD::FADD && AllowNewConst) { + ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0)); + // (fadd (fadd x, x), x) -> (fmul x, 3.0) + if (!CFP && N0.getOperand(0) == N0.getOperand(1) && + (N0.getOperand(0) == N1)) + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, + N1, DAG.getConstantFP(3.0, VT)); } - - // (fadd (fadd x, x), (fmul c, x)) -> (fmul x, c+2) - if (CFP10 && !CFP11 && N0.getOpcode() == ISD::FADD && - N0.getOperand(0) == N0.getOperand(1) && - N1.getOperand(1) == N0.getOperand(0)) { - SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, - SDValue(CFP10, 0), - DAG.getConstantFP(2.0, VT)); - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N1.getOperand(1), NewCFP); + if (N1.getOpcode() == ISD::FADD && AllowNewConst) { + ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0)); + // (fadd x, (fadd x, x)) -> (fmul x, 3.0) + if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && + N1.getOperand(0) == N0) + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, + N0, DAG.getConstantFP(3.0, VT)); } - // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2) - if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD && + // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0) + if (AllowNewConst && + N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1) && - N1.getOperand(0) == N0.getOperand(0)) { - SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, - SDValue(CFP11, 0), - DAG.getConstantFP(2.0, VT)); - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N1.getOperand(0), NewCFP); - } - } - - if (N0.getOpcode() == ISD::FADD && AllowNewFpConst) { - ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0)); - // (fadd (fadd x, x), x) -> (fmul x, 3.0) - if (!CFP && N0.getOperand(0) == N0.getOperand(1) && - (N0.getOperand(0) == N1)) - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N1, DAG.getConstantFP(3.0, VT)); - } - - if (N1.getOpcode() == ISD::FADD && AllowNewFpConst) { - ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0)); - // (fadd x, (fadd x, x)) -> (fmul x, 3.0) - if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && - N1.getOperand(0) == N0) + N1.getOperand(0) == N1.getOperand(1) && + N0.getOperand(0) == N1.getOperand(0)) return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N0, DAG.getConstantFP(3.0, VT)); + N0.getOperand(0), DAG.getConstantFP(4.0, VT)); } - - // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0) - if (AllowNewFpConst && - N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && - N0.getOperand(0) == N0.getOperand(1) && - N1.getOperand(0) == N1.getOperand(1) && - N0.getOperand(0) == N1.getOperand(0)) - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N0.getOperand(0), - DAG.getConstantFP(4.0, VT)); - } + } // enable-unsafe-fp-math // FADD -> FMA combines: - if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast || - DAG.getTarget().Options.UnsafeFPMath) && - DAG.getTarget().getTargetLowering()->isFMAFasterThanFMulAndFAdd(VT) && + if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) && + TLI.isFMAFasterThanFMulAndFAdd(VT) && (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) { // fold (fadd (fmul x, y), z) -> (fma x, y, z) - if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) + if (N0.getOpcode() == ISD::FMUL && + (N0->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) return DAG.getNode(ISD::FMA, SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), N1); // fold (fadd x, (fmul y, z)) -> (fma y, z, x) // Note: Commutes FADD operands. - if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) + if (N1.getOpcode() == ISD::FMUL && + (N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1.getOperand(0), N1.getOperand(1), N0); + + // When FP_EXTEND nodes are free on the target, and there is an opportunity + // to combine into FMA, arrange such nodes accordingly. + if (TLI.isFPExtFree(VT)) { + + // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FMUL) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, + N00.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, + N00.getOperand(1)), N1); + } + + // fold (fadd x, (fpext (fmul y, z)), z) -> (fma (fpext y), (fpext z), x) + // Note: Commutes FADD operands. + if (N1.getOpcode() == ISD::FP_EXTEND) { + SDValue N10 = N1.getOperand(0); + if (N10.getOpcode() == ISD::FMUL) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, + N10.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, + N10.getOperand(1)), N0); + } + } + + // More folding opportunities when target permits. + if (TLI.enableAggressiveFMAFusion(VT)) { + + // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z)) + if (N0.getOpcode() == ISD::FMA && + N0.getOperand(2).getOpcode() == ISD::FMUL) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + N0.getOperand(0), N0.getOperand(1), + DAG.getNode(ISD::FMA, SDLoc(N), VT, + N0.getOperand(2).getOperand(0), + N0.getOperand(2).getOperand(1), + N1)); + + // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x)) + if (N1->getOpcode() == ISD::FMA && + N1.getOperand(2).getOpcode() == ISD::FMUL) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + N1.getOperand(0), N1.getOperand(1), + DAG.getNode(ISD::FMA, SDLoc(N), VT, + N1.getOperand(2).getOperand(0), + N1.getOperand(2).getOperand(1), + N0)); + } } return SDValue(); @@ -6682,10 +7015,11 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { SDValue DAGCombiner::visitFSUB(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); - ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); + ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); EVT VT = N->getValueType(0); SDLoc dl(N); + const TargetOptions &Options = DAG.getTarget().Options; // fold vector ops if (VT.isVector()) { @@ -6696,60 +7030,60 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { // fold (fsub c1, c2) -> c1-c2 if (N0CFP && N1CFP) return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0, N1); - // fold (fsub A, 0) -> A - if (DAG.getTarget().Options.UnsafeFPMath && - N1CFP && N1CFP->getValueAPF().isZero()) - return N0; - // fold (fsub 0, B) -> -B - if (DAG.getTarget().Options.UnsafeFPMath && - N0CFP && N0CFP->getValueAPF().isZero()) { - if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) - return GetNegatedExpression(N1, DAG, LegalOperations); - if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) - return DAG.getNode(ISD::FNEG, dl, VT, N1); - } + // fold (fsub A, (fneg B)) -> (fadd A, B) - if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) + if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) return DAG.getNode(ISD::FADD, dl, VT, N0, GetNegatedExpression(N1, DAG, LegalOperations)); - // If 'unsafe math' is enabled, fold - // (fsub x, x) -> 0.0 & - // (fsub x, (fadd x, y)) -> (fneg y) & - // (fsub x, (fadd y, x)) -> (fneg y) - if (DAG.getTarget().Options.UnsafeFPMath) { + // If 'unsafe math' is enabled, fold lots of things. + if (Options.UnsafeFPMath) { + // (fsub A, 0) -> A + if (N1CFP && N1CFP->getValueAPF().isZero()) + return N0; + + // (fsub 0, B) -> -B + if (N0CFP && N0CFP->getValueAPF().isZero()) { + if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) + return GetNegatedExpression(N1, DAG, LegalOperations); + if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) + return DAG.getNode(ISD::FNEG, dl, VT, N1); + } + + // (fsub x, x) -> 0.0 if (N0 == N1) return DAG.getConstantFP(0.0f, VT); + // (fsub x, (fadd x, y)) -> (fneg y) + // (fsub x, (fadd y, x)) -> (fneg y) if (N1.getOpcode() == ISD::FADD) { SDValue N10 = N1->getOperand(0); SDValue N11 = N1->getOperand(1); - if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, - &DAG.getTarget().Options)) + if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &Options)) return GetNegatedExpression(N11, DAG, LegalOperations); - if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, - &DAG.getTarget().Options)) + if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options)) return GetNegatedExpression(N10, DAG, LegalOperations); } } // FSUB -> FMA combines: - if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast || - DAG.getTarget().Options.UnsafeFPMath) && - DAG.getTarget().getTargetLowering()->isFMAFasterThanFMulAndFAdd(VT) && + if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) && + TLI.isFMAFasterThanFMulAndFAdd(VT) && (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) { // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) - if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) + if (N0.getOpcode() == ISD::FMUL && + (N0->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) return DAG.getNode(ISD::FMA, dl, VT, N0.getOperand(0), N0.getOperand(1), DAG.getNode(ISD::FNEG, dl, VT, N1)); // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) // Note: Commutes FSUB operands. - if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) + if (N1.getOpcode() == ISD::FMUL && + (N1->hasOneUse() || TLI.enableAggressiveFMAFusion(VT))) return DAG.getNode(ISD::FMA, dl, VT, DAG.getNode(ISD::FNEG, dl, VT, N1.getOperand(0)), @@ -6758,13 +7092,115 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0).getOpcode() == ISD::FMUL && - N0->hasOneUse() && N0.getOperand(0).hasOneUse()) { + ((N0->hasOneUse() && N0.getOperand(0).hasOneUse()) || + TLI.enableAggressiveFMAFusion(VT))) { SDValue N00 = N0.getOperand(0).getOperand(0); SDValue N01 = N0.getOperand(0).getOperand(1); return DAG.getNode(ISD::FMA, dl, VT, DAG.getNode(ISD::FNEG, dl, VT, N00), N01, DAG.getNode(ISD::FNEG, dl, VT, N1)); } + + // When FP_EXTEND nodes are free on the target, and there is an opportunity + // to combine into FMA, arrange such nodes accordingly. + if (TLI.isFPExtFree(VT)) { + + // fold (fsub (fpext (fmul x, y)), z) + // -> (fma (fpext x), (fpext y), (fneg z)) + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FMUL) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, + N00.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, + N00.getOperand(1)), + DAG.getNode(ISD::FNEG, SDLoc(N), VT, N1)); + } + + // fold (fsub x, (fpext (fmul y, z))) + // -> (fma (fneg (fpext y)), (fpext z), x) + // Note: Commutes FSUB operands. + if (N1.getOpcode() == ISD::FP_EXTEND) { + SDValue N10 = N1.getOperand(0); + if (N10.getOpcode() == ISD::FMUL) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + DAG.getNode(ISD::FNEG, SDLoc(N), VT, + DAG.getNode(ISD::FP_EXTEND, SDLoc(N), + VT, N10.getOperand(0))), + DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, + N10.getOperand(1)), + N0); + } + + // fold (fsub (fpext (fneg (fmul, x, y))), z) + // -> (fma (fneg (fpext x)), (fpext y), (fneg z)) + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FNEG) { + SDValue N000 = N00.getOperand(0); + if (N000.getOpcode() == ISD::FMUL) { + return DAG.getNode(ISD::FMA, dl, VT, + DAG.getNode(ISD::FNEG, dl, VT, + DAG.getNode(ISD::FP_EXTEND, SDLoc(N), + VT, N000.getOperand(0))), + DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, + N000.getOperand(1)), + DAG.getNode(ISD::FNEG, dl, VT, N1)); + } + } + } + + // fold (fsub (fneg (fpext (fmul, x, y))), z) + // -> (fma (fneg (fpext x)), (fpext y), (fneg z)) + if (N0.getOpcode() == ISD::FNEG) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FP_EXTEND) { + SDValue N000 = N00.getOperand(0); + if (N000.getOpcode() == ISD::FMUL) { + return DAG.getNode(ISD::FMA, dl, VT, + DAG.getNode(ISD::FNEG, dl, VT, + DAG.getNode(ISD::FP_EXTEND, SDLoc(N), + VT, N000.getOperand(0))), + DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, + N000.getOperand(1)), + DAG.getNode(ISD::FNEG, dl, VT, N1)); + } + } + } + } + + // More folding opportunities when target permits. + if (TLI.enableAggressiveFMAFusion(VT)) { + + // fold (fsub (fma x, y, (fmul u, v)), z) + // -> (fma x, y (fma u, v, (fneg z))) + if (N0.getOpcode() == ISD::FMA && + N0.getOperand(2).getOpcode() == ISD::FMUL) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + N0.getOperand(0), N0.getOperand(1), + DAG.getNode(ISD::FMA, SDLoc(N), VT, + N0.getOperand(2).getOperand(0), + N0.getOperand(2).getOperand(1), + DAG.getNode(ISD::FNEG, SDLoc(N), VT, + N1))); + + // fold (fsub x, (fma y, z, (fmul u, v))) + // -> (fma (fneg y), z, (fma (fneg u), v, x)) + if (N1.getOpcode() == ISD::FMA && + N1.getOperand(2).getOpcode() == ISD::FMUL) { + SDValue N20 = N1.getOperand(2).getOperand(0); + SDValue N21 = N1.getOperand(2).getOperand(1); + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + DAG.getNode(ISD::FNEG, SDLoc(N), VT, + N1.getOperand(0)), + N1.getOperand(1), + DAG.getNode(ISD::FMA, SDLoc(N), VT, + DAG.getNode(ISD::FNEG, SDLoc(N), VT, + N20), + N21, N0)); + } + } } return SDValue(); @@ -6773,47 +7209,82 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { SDValue DAGCombiner::visitFMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); - ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); + ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); EVT VT = N->getValueType(0); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const TargetOptions &Options = DAG.getTarget().Options; // fold vector ops if (VT.isVector()) { + // This just handles C1 * C2 for vectors. Other vector folds are below. SDValue FoldedVOp = SimplifyVBinOp(N); - if (FoldedVOp.getNode()) return FoldedVOp; + if (FoldedVOp.getNode()) + return FoldedVOp; + // Canonicalize vector constant to RHS. + if (N0.getOpcode() == ISD::BUILD_VECTOR && + N1.getOpcode() != ISD::BUILD_VECTOR) + if (auto *BV0 = dyn_cast<BuildVectorSDNode>(N0)) + if (BV0->isConstant()) + return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); } // fold (fmul c1, c2) -> c1*c2 if (N0CFP && N1CFP) return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, N1); + // canonicalize constant to RHS if (N0CFP && !N1CFP) return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, N0); - // fold (fmul A, 0) -> 0 - if (DAG.getTarget().Options.UnsafeFPMath && - N1CFP && N1CFP->getValueAPF().isZero()) - return N1; - // fold (fmul A, 0) -> 0, vector edition. - if (DAG.getTarget().Options.UnsafeFPMath && - ISD::isBuildVectorAllZeros(N1.getNode())) - return N1; + // fold (fmul A, 1.0) -> A if (N1CFP && N1CFP->isExactlyValue(1.0)) return N0; + + if (Options.UnsafeFPMath) { + // fold (fmul A, 0) -> 0 + if (N1CFP && N1CFP->getValueAPF().isZero()) + return N1; + + // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2)) + if (N0.getOpcode() == ISD::FMUL) { + // Fold scalars or any vector constants (not just splats). + // This fold is done in general by InstCombine, but extra fmul insts + // may have been generated during lowering. + SDValue N01 = N0.getOperand(1); + auto *BV1 = dyn_cast<BuildVectorSDNode>(N1); + auto *BV01 = dyn_cast<BuildVectorSDNode>(N01); + if ((N1CFP && isConstOrConstSplatFP(N01)) || + (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) { + SDLoc SL(N); + SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, N01, N1); + return DAG.getNode(ISD::FMUL, SL, VT, N0.getOperand(0), MulConsts); + } + } + + // fold (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c)) + // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs + // during an early run of DAGCombiner can prevent folding with fmuls + // inserted during lowering. + if (N0.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1)) { + SDLoc SL(N); + const SDValue Two = DAG.getConstantFP(2.0, VT); + SDValue MulConsts = DAG.getNode(ISD::FMUL, SL, VT, Two, N1); + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), MulConsts); + } + } + // fold (fmul X, 2.0) -> (fadd X, X) if (N1CFP && N1CFP->isExactlyValue(+2.0)) return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N0); + // fold (fmul X, -1.0) -> (fneg X) if (N1CFP && N1CFP->isExactlyValue(-1.0)) if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0); // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) - if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, - &DAG.getTarget().Options)) { - if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, - &DAG.getTarget().Options)) { + if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) { + if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) { // Both can be negated for free, check to see if at least one is cheaper // negated. if (LHSNeg == 2 || RHSNeg == 2) @@ -6823,14 +7294,6 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { } } - // If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2)) - if (DAG.getTarget().Options.UnsafeFPMath && - N1CFP && N0.getOpcode() == ISD::FMUL && - N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1))) - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), - DAG.getNode(ISD::FMUL, SDLoc(N), VT, - N0.getOperand(1), N1)); - return SDValue(); } @@ -6842,8 +7305,16 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); EVT VT = N->getValueType(0); SDLoc dl(N); + const TargetOptions &Options = DAG.getTarget().Options; - if (DAG.getTarget().Options.UnsafeFPMath) { + // Constant fold FMA. + if (isa<ConstantFPSDNode>(N0) && + isa<ConstantFPSDNode>(N1) && + isa<ConstantFPSDNode>(N2)) { + return DAG.getNode(ISD::FMA, dl, VT, N0, N1, N2); + } + + if (Options.UnsafeFPMath) { if (N0CFP && N0CFP->isZero()) return N2; if (N1CFP && N1CFP->isZero()) @@ -6859,7 +7330,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2); // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) - if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && + if (Options.UnsafeFPMath && N1CFP && N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) && N2.getOperand(1).getOpcode() == ISD::ConstantFP) { @@ -6869,7 +7340,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) - if (DAG.getTarget().Options.UnsafeFPMath && + if (Options.UnsafeFPMath && N0.getOpcode() == ISD::FMUL && N1CFP && N0.getOperand(1).getOpcode() == ISD::ConstantFP) { return DAG.getNode(ISD::FMA, dl, VT, @@ -6893,13 +7364,13 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { } // (fma x, c, x) -> (fmul x, (c+1)) - if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N0 == N2) + if (Options.UnsafeFPMath && N1CFP && N0 == N2) return DAG.getNode(ISD::FMUL, dl, VT, N0, DAG.getNode(ISD::FADD, dl, VT, N1, DAG.getConstantFP(1.0, VT))); // (fma x, c, (fneg x)) -> (fmul x, (c-1)) - if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && + if (Options.UnsafeFPMath && N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) return DAG.getNode(ISD::FMUL, dl, VT, N0, DAG.getNode(ISD::FADD, dl, VT, @@ -6915,7 +7386,8 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); EVT VT = N->getValueType(0); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDLoc DL(N); + const TargetOptions &Options = DAG.getTarget().Options; // fold vector ops if (VT.isVector()) { @@ -6927,30 +7399,79 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { if (N0CFP && N1CFP) return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1); - // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. - if (N1CFP && DAG.getTarget().Options.UnsafeFPMath) { - // Compute the reciprocal 1.0 / c2. - APFloat N1APF = N1CFP->getValueAPF(); - APFloat Recip(N1APF.getSemantics(), 1); // 1.0 - APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven); - // Only do the transform if the reciprocal is a legal fp immediate that - // isn't too nasty (eg NaN, denormal, ...). - if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty - (!LegalOperations || - // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM - // backend)... we should handle this gracefully after Legalize. - // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) || - TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) || - TLI.isFPImmLegal(Recip, VT))) - return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, - DAG.getConstantFP(Recip, VT)); + if (Options.UnsafeFPMath) { + // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. + if (N1CFP) { + // Compute the reciprocal 1.0 / c2. + APFloat N1APF = N1CFP->getValueAPF(); + APFloat Recip(N1APF.getSemantics(), 1); // 1.0 + APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven); + // Only do the transform if the reciprocal is a legal fp immediate that + // isn't too nasty (eg NaN, denormal, ...). + if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty + (!LegalOperations || + // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM + // backend)... we should handle this gracefully after Legalize. + // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) || + TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) || + TLI.isFPImmLegal(Recip, VT))) + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, + DAG.getConstantFP(Recip, VT)); + } + + // If this FDIV is part of a reciprocal square root, it may be folded + // into a target-specific square root estimate instruction. + if (N1.getOpcode() == ISD::FSQRT) { + if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0))) { + return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); + } + } else if (N1.getOpcode() == ISD::FP_EXTEND && + N1.getOperand(0).getOpcode() == ISD::FSQRT) { + if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) { + RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV); + AddToWorklist(RV.getNode()); + return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); + } + } else if (N1.getOpcode() == ISD::FP_ROUND && + N1.getOperand(0).getOpcode() == ISD::FSQRT) { + if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0))) { + RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1)); + AddToWorklist(RV.getNode()); + return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); + } + } else if (N1.getOpcode() == ISD::FMUL) { + // Look through an FMUL. Even though this won't remove the FDIV directly, + // it's still worthwhile to get rid of the FSQRT if possible. + SDValue SqrtOp; + SDValue OtherOp; + if (N1.getOperand(0).getOpcode() == ISD::FSQRT) { + SqrtOp = N1.getOperand(0); + OtherOp = N1.getOperand(1); + } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) { + SqrtOp = N1.getOperand(1); + OtherOp = N1.getOperand(0); + } + if (SqrtOp.getNode()) { + // We found a FSQRT, so try to make this fold: + // x / (y * sqrt(z)) -> x * (rsqrt(z) / y) + if (SDValue RV = BuildRsqrtEstimate(SqrtOp.getOperand(0))) { + RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp); + AddToWorklist(RV.getNode()); + return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); + } + } + } + + // Fold into a reciprocal estimate and multiply instead of a real divide. + if (SDValue RV = BuildReciprocalEstimate(N1)) { + AddToWorklist(RV.getNode()); + return DAG.getNode(ISD::FMUL, DL, VT, N0, RV); + } } // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) - if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, - &DAG.getTarget().Options)) { - if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, - &DAG.getTarget().Options)) { + if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) { + if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) { // Both can be negated for free, check to see if at least one is cheaper // negated. if (LHSNeg == 2 || RHSNeg == 2) @@ -6960,6 +7481,44 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { } } + // Combine multiple FDIVs with the same divisor into multiple FMULs by the + // reciprocal. + // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip) + // Notice that this is not always beneficial. One reason is different target + // may have different costs for FDIV and FMUL, so sometimes the cost of two + // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason + // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL". + if (Options.UnsafeFPMath) { + // Skip if current node is a reciprocal. + if (N0CFP && N0CFP->isExactlyValue(1.0)) + return SDValue(); + + SmallVector<SDNode *, 4> Users; + // Find all FDIV users of the same divisor. + for (SDNode::use_iterator UI = N1.getNode()->use_begin(), + UE = N1.getNode()->use_end(); + UI != UE; ++UI) { + SDNode *User = UI.getUse().getUser(); + if (User->getOpcode() == ISD::FDIV && User->getOperand(1) == N1) + Users.push_back(User); + } + + if (TLI.combineRepeatedFPDivisors(Users.size())) { + SDValue FPOne = DAG.getConstantFP(1.0, VT); // floating point 1.0 + SDValue Reciprocal = DAG.getNode(ISD::FDIV, SDLoc(N), VT, FPOne, N1); + + // Dividend / Divisor -> Dividend * Reciprocal + for (auto I = Users.begin(), E = Users.end(); I != E; ++I) { + if ((*I)->getOperand(0) != FPOne) { + SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(*I), VT, + (*I)->getOperand(0), Reciprocal); + DAG.ReplaceAllUsesWith(*I, NewNode.getNode()); + } + } + return SDValue(); + } + } + return SDValue(); } @@ -6977,6 +7536,32 @@ SDValue DAGCombiner::visitFREM(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitFSQRT(SDNode *N) { + if (DAG.getTarget().Options.UnsafeFPMath && + !TLI.isFsqrtCheap()) { + // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5) + if (SDValue RV = BuildRsqrtEstimate(N->getOperand(0))) { + EVT VT = RV.getValueType(); + RV = DAG.getNode(ISD::FMUL, SDLoc(N), VT, N->getOperand(0), RV); + AddToWorklist(RV.getNode()); + + // Unfortunately, RV is now NaN if the input was exactly 0. + // Select out this case and force the answer to 0. + SDValue Zero = DAG.getConstantFP(0.0, VT); + SDValue ZeroCmp = + DAG.getSetCC(SDLoc(N), TLI.getSetCCResultType(*DAG.getContext(), VT), + N->getOperand(0), Zero, ISD::SETEQ); + AddToWorklist(ZeroCmp.getNode()); + AddToWorklist(RV.getNode()); + + RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, + SDLoc(N), VT, ZeroCmp, Zero, RV); + return RV; + } + } + return SDValue(); +} + SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -7222,7 +7807,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { // fold (fpext (load x)) -> (fpext (fptrunc (extload x))) if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && - TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType())) { + TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(), @@ -7239,6 +7824,43 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitFCEIL(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + EVT VT = N->getValueType(0); + + // fold (fceil c1) -> fceil(c1) + if (N0CFP) + return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0); + + return SDValue(); +} + +SDValue DAGCombiner::visitFTRUNC(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + EVT VT = N->getValueType(0); + + // fold (ftrunc c1) -> ftrunc(c1) + if (N0CFP) + return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0); + + return SDValue(); +} + +SDValue DAGCombiner::visitFFLOOR(SDNode *N) { + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + EVT VT = N->getValueType(0); + + // fold (ffloor c1) -> ffloor(c1) + if (N0CFP) + return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0); + + return SDValue(); +} + +// FIXME: FNEG and FABS have a lot in common; refactor. SDValue DAGCombiner::visitFNEG(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -7248,26 +7870,36 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { if (FoldedVOp.getNode()) return FoldedVOp; } + // Constant fold FNEG. + if (isa<ConstantFPSDNode>(N0)) + return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N->getOperand(0)); + if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(), &DAG.getTarget().Options)) return GetNegatedExpression(N0, DAG, LegalOperations); - // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading + // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading // constant pool values. - // TODO: We can also optimize for vectors here, but we need to make sure - // that the sign mask is created properly for each vector element. - if (!TLI.isFNegFree(VT) && N0.getOpcode() == ISD::BITCAST && - !VT.isVector() && - N0.getNode()->hasOneUse() && - N0.getOperand(0).getValueType().isInteger()) { + if (!TLI.isFNegFree(VT) && + N0.getOpcode() == ISD::BITCAST && + N0.getNode()->hasOneUse()) { SDValue Int = N0.getOperand(0); EVT IntVT = Int.getValueType(); if (IntVT.isInteger() && !IntVT.isVector()) { + APInt SignMask; + if (N0.getValueType().isVector()) { + // For a vector, get a mask such as 0x80... per scalar element + // and splat it. + SignMask = APInt::getSignBit(N0.getValueType().getScalarSizeInBits()); + SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask); + } else { + // For a scalar, just generate 0x80... + SignMask = APInt::getSignBit(IntVT.getSizeInBits()); + } Int = DAG.getNode(ISD::XOR, SDLoc(N0), IntVT, Int, - DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); + DAG.getConstant(SignMask, IntVT)); AddToWorklist(Int.getNode()); - return DAG.getNode(ISD::BITCAST, SDLoc(N), - VT, Int); + return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Int); } } @@ -7289,45 +7921,50 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { return SDValue(); } -SDValue DAGCombiner::visitFCEIL(SDNode *N) { +SDValue DAGCombiner::visitFMINNUM(SDNode *N) { SDValue N0 = N->getOperand(0); - ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); - EVT VT = N->getValueType(0); - - // fold (fceil c1) -> fceil(c1) - if (N0CFP) - return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0); - - return SDValue(); -} + SDValue N1 = N->getOperand(1); + const ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + const ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); -SDValue DAGCombiner::visitFTRUNC(SDNode *N) { - SDValue N0 = N->getOperand(0); - ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); - EVT VT = N->getValueType(0); + if (N0CFP && N1CFP) { + const APFloat &C0 = N0CFP->getValueAPF(); + const APFloat &C1 = N1CFP->getValueAPF(); + return DAG.getConstantFP(minnum(C0, C1), N->getValueType(0)); + } - // fold (ftrunc c1) -> ftrunc(c1) - if (N0CFP) - return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0); + if (N0CFP) { + EVT VT = N->getValueType(0); + // Canonicalize to constant on RHS. + return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0); + } return SDValue(); } -SDValue DAGCombiner::visitFFLOOR(SDNode *N) { +SDValue DAGCombiner::visitFMAXNUM(SDNode *N) { SDValue N0 = N->getOperand(0); - ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); - EVT VT = N->getValueType(0); + SDValue N1 = N->getOperand(1); + const ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + const ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); - // fold (ffloor c1) -> ffloor(c1) - if (N0CFP) - return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0); + if (N0CFP && N1CFP) { + const APFloat &C0 = N0CFP->getValueAPF(); + const APFloat &C1 = N1CFP->getValueAPF(); + return DAG.getConstantFP(maxnum(C0, C1), N->getValueType(0)); + } + + if (N0CFP) { + EVT VT = N->getValueType(0); + // Canonicalize to constant on RHS. + return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0); + } return SDValue(); } SDValue DAGCombiner::visitFABS(SDNode *N) { SDValue N0 = N->getOperand(0); - ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); EVT VT = N->getValueType(0); if (VT.isVector()) { @@ -7336,32 +7973,40 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { } // fold (fabs c1) -> fabs(c1) - if (N0CFP) + if (isa<ConstantFPSDNode>(N0)) return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); + // fold (fabs (fabs x)) -> (fabs x) if (N0.getOpcode() == ISD::FABS) return N->getOperand(0); + // fold (fabs (fneg x)) -> (fabs x) // fold (fabs (fcopysign x, y)) -> (fabs x) if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN) return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0)); - // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading + // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading // constant pool values. - // TODO: We can also optimize for vectors here, but we need to make sure - // that the sign mask is created properly for each vector element. if (!TLI.isFAbsFree(VT) && - N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() && - N0.getOperand(0).getValueType().isInteger() && - !VT.isVector()) { + N0.getOpcode() == ISD::BITCAST && + N0.getNode()->hasOneUse()) { SDValue Int = N0.getOperand(0); EVT IntVT = Int.getValueType(); if (IntVT.isInteger() && !IntVT.isVector()) { + APInt SignMask; + if (N0.getValueType().isVector()) { + // For a vector, get a mask such as 0x7f... per scalar element + // and splat it. + SignMask = ~APInt::getSignBit(N0.getValueType().getScalarSizeInBits()); + SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask); + } else { + // For a scalar, just generate 0x7f... + SignMask = ~APInt::getSignBit(IntVT.getSizeInBits()); + } Int = DAG.getNode(ISD::AND, SDLoc(N0), IntVT, Int, - DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); + DAG.getConstant(SignMask, IntVT)); AddToWorklist(Int.getNode()); - return DAG.getNode(ISD::BITCAST, SDLoc(N), - N->getValueType(0), Int); + return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Int); } } @@ -7441,15 +8086,12 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { // will convert it back to (X & C1) >> C2. CombineTo(N, NewBRCond, false); // Truncate is dead. - if (Trunc) { - removeFromWorklist(Trunc); - DAG.DeleteNode(Trunc); - } + if (Trunc) + deleteAndRecombine(Trunc); // Replace the uses of SRL with SETCC WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(N1, SetCC); - removeFromWorklist(N1.getNode()); - DAG.DeleteNode(N1.getNode()); + deleteAndRecombine(N1.getNode()); return SDValue(N, 0); // Return N so it doesn't get rechecked! } } @@ -7478,8 +8120,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { dbgs() << '\n'); WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(N1, Tmp); - removeFromWorklist(TheXor); - DAG.DeleteNode(TheXor); + deleteAndRecombine(TheXor); return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, Tmp, N2); } @@ -7509,8 +8150,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { // Replace the uses of XOR with SETCC WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(N1, SetCC); - removeFromWorklist(N1.getNode()); - DAG.DeleteNode(N1.getNode()); + deleteAndRecombine(N1.getNode()); return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, SetCC, N2); } @@ -7547,9 +8187,8 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) { return SDValue(); } -/// canFoldInAddressingMode - Return true if 'Use' is a load or a store that -/// uses N as its base pointer and that N may be folded in the load / store -/// addressing mode. +/// Return true if 'Use' is a load or a store that uses N as its base pointer +/// and that N may be folded in the load / store addressing mode. static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, SelectionDAG &DAG, const TargetLowering &TLI) { @@ -7588,12 +8227,11 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, return TLI.isLegalAddressingMode(AM, VT.getTypeForEVT(*DAG.getContext())); } -/// CombineToPreIndexedLoadStore - Try turning a load / store into a -/// pre-indexed load / store when the base pointer is an add or subtract -/// and it has other uses besides the load / store. After the -/// transformation, the new indexed load / store has effectively folded -/// the add / subtract in and all of its other uses are redirected to the -/// new load / store. +/// Try turning a load/store into a pre-indexed load/store when the base +/// pointer is an add or subtract and it has other uses besides the load/store. +/// After the transformation, the new indexed load/store has effectively folded +/// the add/subtract in and all of its other uses are redirected to the +/// new load/store. bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { if (Level < AfterLegalizeDAG) return false; @@ -7754,7 +8392,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { } // Finally, since the node is now dead, remove it from the graph. - DAG.DeleteNode(N); + deleteAndRecombine(N); if (Swapped) std::swap(BasePtr, Offset); @@ -7804,23 +8442,20 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { SDLoc(OtherUses[i]), OtherUses[i]->getValueType(0), NewOp1, NewOp2); DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse); - removeFromWorklist(OtherUses[i]); - DAG.DeleteNode(OtherUses[i]); + deleteAndRecombine(OtherUses[i]); } // Replace the uses of Ptr with uses of the updated base value. DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0)); - removeFromWorklist(Ptr.getNode()); - DAG.DeleteNode(Ptr.getNode()); + deleteAndRecombine(Ptr.getNode()); return true; } -/// CombineToPostIndexedLoadStore - Try to combine a load / store with a -/// add / sub of the base pointer node into a post-indexed load / store. -/// The transformation folded the add / subtract into the new indexed -/// load / store effectively and all of its uses are redirected to the -/// new load / store. +/// Try to combine a load/store with a add/sub of the base pointer node into a +/// post-indexed load/store. The transformation folded the add/subtract into the +/// new indexed load/store effectively and all of its uses are redirected to the +/// new load/store. bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { if (Level < AfterLegalizeDAG) return false; @@ -7924,13 +8559,12 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { } // Finally, since the node is now dead, remove it from the graph. - DAG.DeleteNode(N); + deleteAndRecombine(N); // Replace the uses of Use with uses of the updated base value. DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0), Result.getValue(isLoad ? 1 : 0)); - removeFromWorklist(Op); - DAG.DeleteNode(Op); + deleteAndRecombine(Op); return true; } } @@ -7939,6 +8573,30 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { return false; } +/// \brief Return the base-pointer arithmetic from an indexed \p LD. +SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) { + ISD::MemIndexedMode AM = LD->getAddressingMode(); + assert(AM != ISD::UNINDEXED); + SDValue BP = LD->getOperand(1); + SDValue Inc = LD->getOperand(2); + + // Some backends use TargetConstants for load offsets, but don't expect + // TargetConstants in general ADD nodes. We can convert these constants into + // regular Constants (if the constant is not opaque). + assert((Inc.getOpcode() != ISD::TargetConstant || + !cast<ConstantSDNode>(Inc)->isOpaque()) && + "Cannot split out indexing using opaque target constants"); + if (Inc.getOpcode() == ISD::TargetConstant) { + ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc); + Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), + ConstInc->getValueType(0)); + } + + unsigned Opc = + (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB); + return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc); +} + SDValue DAGCombiner::visitLOAD(SDNode *N) { LoadSDNode *LD = cast<LoadSDNode>(N); SDValue Chain = LD->getChain(); @@ -7965,18 +8623,33 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); - if (N->use_empty()) { - removeFromWorklist(N); - DAG.DeleteNode(N); - } + if (N->use_empty()) + deleteAndRecombine(N); return SDValue(N, 0); // Return N so it doesn't get rechecked! } } else { // Indexed loads. assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?"); - if (!N->hasAnyUseOfValue(0) && !N->hasAnyUseOfValue(1)) { + + // If this load has an opaque TargetConstant offset, then we cannot split + // the indexing into an add/sub directly (that TargetConstant may not be + // valid for a different type of node, and we cannot convert an opaque + // target constant into a regular constant). + bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant && + cast<ConstantSDNode>(LD->getOperand(2))->isOpaque(); + + if (!N->hasAnyUseOfValue(0) && + ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) { SDValue Undef = DAG.getUNDEF(N->getValueType(0)); + SDValue Index; + if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) { + Index = SplitIndexingFromLoad(LD); + // Try to fold the base pointer arithmetic into subsequent loads and + // stores. + AddUsersToWorklist(N); + } else + Index = DAG.getUNDEF(N->getValueType(1)); DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG); dbgs() << "\nWith: "; @@ -7984,11 +8657,9 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { dbgs() << " and 2 other values\n"); WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef); - DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), - DAG.getUNDEF(N->getValueType(1))); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain); - removeFromWorklist(N); - DAG.DeleteNode(N); + deleteAndRecombine(N); return SDValue(N, 0); // Return N so it doesn't get rechecked! } } @@ -8016,15 +8687,15 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { LD->getValueType(0), Chain, Ptr, LD->getPointerInfo(), LD->getMemoryVT(), - LD->isVolatile(), LD->isNonTemporal(), Align, - LD->getTBAAInfo()); + LD->isVolatile(), LD->isNonTemporal(), + LD->isInvariant(), Align, LD->getAAInfo()); return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true); } } } - bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA : - TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA(); + bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA + : DAG.getSubtarget().useAA(); #ifndef NDEBUG if (CombinerAAOnlyFunc.getNumOccurrences() && CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) @@ -8354,7 +9025,7 @@ struct LoadedSlice { // At this point, we know that we perform a cross-register-bank copy. // Check if it is expensive. - const TargetRegisterInfo *TRI = TLI.getTargetMachine().getRegisterInfo(); + const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo(); // Assume bitcasts are cheap, unless both register classes do not // explicitly share a common sub class. if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC)) @@ -8618,9 +9289,9 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) { return true; } -/// CheckForMaskedLoad - Check to see if V is (and load (ptr), imm), where the -/// load is having specific bytes cleared out. If so, return the byte size -/// being masked out and the shift amount. +/// Check to see if V is (and load (ptr), imm), where the load is having +/// specific bytes cleared out. If so, return the byte size being masked out +/// and the shift amount. static std::pair<unsigned, unsigned> CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { std::pair<unsigned, unsigned> Result(0, 0); @@ -8693,9 +9364,9 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { } -/// ShrinkLoadReplaceStoreWithStore - Check to see if IVal is something that -/// provides a value as specified by MaskInfo. If so, replace the specified -/// store with a narrower store of truncated IVal. +/// Check to see if IVal is something that provides a value as specified by +/// MaskInfo. If so, replace the specified store with a narrower store of +/// truncated IVal. static SDNode * ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, SDValue IVal, StoreSDNode *St, @@ -8750,10 +9421,10 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, } -/// ReduceLoadOpStoreWidth - Look for sequence of load / op / store where op is -/// one of 'or', 'xor', and 'and' of immediates. If 'op' is only touching some -/// of the loaded bits, try narrowing the load and store if it would end up -/// being a win for performance or code size. +/// Look for sequence of load / op / store where op is one of 'or', 'xor', and +/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try +/// narrowing the load and store if it would end up being a win for performance +/// or code size. SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { StoreSDNode *ST = cast<StoreSDNode>(N); if (ST->isVolatile()) @@ -8853,7 +9524,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { LD->getPointerInfo().getWithOffset(PtrOff), LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(), NewAlign, - LD->getTBAAInfo()); + LD->getAAInfo()); SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD, DAG.getConstant(NewImm, NewVT)); SDValue NewST = DAG.getStore(Chain, SDLoc(N), @@ -8874,10 +9545,9 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { return SDValue(); } -/// TransformFPLoadStorePair - For a given floating point load / store pair, -/// if the load value isn't used by any other operations, then consider -/// transforming the pair to integer load / store operations if the target -/// deems the transformation profitable. +/// For a given floating point load / store pair, if the load value isn't used +/// by any other operations, then consider transforming the pair to integer +/// load / store operations if the target deems the transformation profitable. SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { StoreSDNode *ST = cast<StoreSDNode>(N); SDValue Chain = ST->getChain(); @@ -9051,7 +9721,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { return false; // Only look at ends of store sequences. - SDValue Chain = SDValue(St, 1); + SDValue Chain = SDValue(St, 0); if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE) return false; @@ -9082,7 +9752,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { StoreSDNode *Index = St; while (Index) { // If the chain has more than one use, then we can't reorder the mem ops. - if (Index != St && !SDValue(Index, 1)->hasOneUse()) + if (Index != St && !SDValue(Index, 0)->hasOneUse()) break; // Find the base pointer and offset for this memory node. @@ -9279,7 +9949,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) { StoreInt|= C->getValueAPF().bitcastToAPInt().zext(StoreBW); } else { - assert(false && "Invalid constant element type"); + llvm_unreachable("Invalid constant element type"); } } @@ -9313,8 +9983,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // Since we know that St is redundant, just iterate. while (!St->use_empty()) DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain()); - removeFromWorklist(St); - DAG.DeleteNode(St); + deleteAndRecombine(St); } return true; @@ -9373,6 +10042,13 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (LoadNodes.size() < 2) return false; + // If we have load/store pair instructions and we only have two values, + // don't bother. + unsigned RequiredAlignment; + if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) && + St->getAlignment() >= RequiredAlignment) + return false; + // Scan the memory operations on the chain and find the first non-consecutive // load memory address. These variables hold the index in the store node // array. @@ -9408,9 +10084,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(*DAG.getContext(), StoreTy); if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && - TLI.isLoadExtLegal(ISD::ZEXTLOAD, StoreTy) && - TLI.isLoadExtLegal(ISD::SEXTLOAD, StoreTy) && - TLI.isLoadExtLegal(ISD::EXTLOAD, StoreTy)) + TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) && + TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) && + TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy)) LastLegalIntegerType = i+1; } } @@ -9488,8 +10164,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { continue; StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain()); - removeFromWorklist(St); - DAG.DeleteNode(St); + deleteAndRecombine(St); } return true; @@ -9515,7 +10190,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr, ST->getPointerInfo(), ST->isVolatile(), ST->isNonTemporal(), OrigAlign, - ST->getTBAAInfo()); + ST->getAAInfo()); } // Turn 'store undef, Ptr' -> nothing. @@ -9569,19 +10244,19 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); - const MDNode *TBAAInfo = ST->getTBAAInfo(); + AAMDNodes AAInfo = ST->getAAInfo(); SDValue St0 = DAG.getStore(Chain, SDLoc(ST), Lo, Ptr, ST->getPointerInfo(), isVolatile, isNonTemporal, - ST->getAlignment(), TBAAInfo); + ST->getAlignment(), AAInfo); Ptr = DAG.getNode(ISD::ADD, SDLoc(N), Ptr.getValueType(), Ptr, DAG.getConstant(4, Ptr.getValueType())); Alignment = MinAlign(Alignment, 4U); SDValue St1 = DAG.getStore(Chain, SDLoc(ST), Hi, Ptr, ST->getPointerInfo().getWithOffset(4), isVolatile, isNonTemporal, - Alignment, TBAAInfo); + Alignment, AAInfo); return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, St0, St1); } @@ -9598,7 +10273,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { return DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(), ST->getMemoryVT(), ST->isVolatile(), ST->isNonTemporal(), Align, - ST->getTBAAInfo()); + ST->getAAInfo()); } } @@ -9608,8 +10283,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (NewST.getNode()) return NewST; - bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA : - TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA(); + bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA + : DAG.getSubtarget().useAA(); #ifndef NDEBUG if (CombinerAAOnlyFunc.getNumOccurrences() && CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) @@ -9686,6 +10361,17 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { } } + // If this is a store followed by a store with the same value to the same + // location, then the store is dead/noop. + if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) { + if (ST1->getBasePtr() == Ptr && ST->getMemoryVT() == ST1->getMemoryVT() && + ST1->getValue() == Value && ST->isUnindexed() && !ST->isVolatile() && + ST1->isUnindexed() && !ST1->isVolatile()) { + // The store is dead, remove it. + return Chain; + } + } + // If this is an FP_ROUND or TRUNC followed by a store, fold this into a // truncating store. We can do this even if this is already a truncstore. if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE) @@ -9791,6 +10477,87 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } +SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad( + SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) { + EVT ResultVT = EVE->getValueType(0); + EVT VecEltVT = InVecVT.getVectorElementType(); + unsigned Align = OriginalLoad->getAlignment(); + unsigned NewAlign = TLI.getDataLayout()->getABITypeAlignment( + VecEltVT.getTypeForEVT(*DAG.getContext())); + + if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT)) + return SDValue(); + + Align = NewAlign; + + SDValue NewPtr = OriginalLoad->getBasePtr(); + SDValue Offset; + EVT PtrType = NewPtr.getValueType(); + MachinePointerInfo MPI; + if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) { + int Elt = ConstEltNo->getZExtValue(); + unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8; + if (TLI.isBigEndian()) + PtrOff = InVecVT.getSizeInBits() / 8 - PtrOff; + Offset = DAG.getConstant(PtrOff, PtrType); + MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff); + } else { + Offset = DAG.getNode( + ISD::MUL, SDLoc(EVE), EltNo.getValueType(), EltNo, + DAG.getConstant(VecEltVT.getStoreSize(), EltNo.getValueType())); + if (TLI.isBigEndian()) + Offset = DAG.getNode( + ISD::SUB, SDLoc(EVE), EltNo.getValueType(), + DAG.getConstant(InVecVT.getStoreSize(), EltNo.getValueType()), Offset); + MPI = OriginalLoad->getPointerInfo(); + } + NewPtr = DAG.getNode(ISD::ADD, SDLoc(EVE), PtrType, NewPtr, Offset); + + // The replacement we need to do here is a little tricky: we need to + // replace an extractelement of a load with a load. + // Use ReplaceAllUsesOfValuesWith to do the replacement. + // Note that this replacement assumes that the extractvalue is the only + // use of the load; that's okay because we don't want to perform this + // transformation in other cases anyway. + SDValue Load; + SDValue Chain; + if (ResultVT.bitsGT(VecEltVT)) { + // If the result type of vextract is wider than the load, then issue an + // extending load instead. + ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, + VecEltVT) + ? ISD::ZEXTLOAD + : ISD::EXTLOAD; + Load = DAG.getExtLoad( + ExtType, SDLoc(EVE), ResultVT, OriginalLoad->getChain(), NewPtr, MPI, + VecEltVT, OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(), + OriginalLoad->isInvariant(), Align, OriginalLoad->getAAInfo()); + Chain = Load.getValue(1); + } else { + Load = DAG.getLoad( + VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, + OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(), + OriginalLoad->isInvariant(), Align, OriginalLoad->getAAInfo()); + Chain = Load.getValue(1); + if (ResultVT.bitsLT(VecEltVT)) + Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load); + else + Load = DAG.getNode(ISD::BITCAST, SDLoc(EVE), ResultVT, Load); + } + WorklistRemover DeadNodes(*this); + SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) }; + SDValue To[] = { Load, Chain }; + DAG.ReplaceAllUsesOfValuesWith(From, To, 2); + // Since we're explicitly calling ReplaceAllUses, add the new node to the + // worklist explicitly as well. + AddToWorklist(Load.getNode()); + AddUsersToWorklist(Load.getNode()); // Add users too + // Make sure to revisit this node to clean it up; it will usually be dead. + AddToWorklist(EVE); + ++OpsNarrowed; + return SDValue(EVE, 0); +} + SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // (vextract (scalar_to_vector val, 0) -> val SDValue InVec = N->getOperand(0); @@ -9859,6 +10626,39 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { } } + bool BCNumEltsChanged = false; + EVT ExtVT = VT.getVectorElementType(); + EVT LVT = ExtVT; + + // If the result of load has to be truncated, then it's not necessarily + // profitable. + if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT)) + return SDValue(); + + if (InVec.getOpcode() == ISD::BITCAST) { + // Don't duplicate a load with other uses. + if (!InVec.hasOneUse()) + return SDValue(); + + EVT BCVT = InVec.getOperand(0).getValueType(); + if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType())) + return SDValue(); + if (VT.getVectorNumElements() != BCVT.getVectorNumElements()) + BCNumEltsChanged = true; + InVec = InVec.getOperand(0); + ExtVT = BCVT.getVectorElementType(); + } + + // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size) + if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() && + ISD::isNormalLoad(InVec.getNode()) && + !N->getOperand(1)->hasPredecessor(InVec.getNode())) { + SDValue Index = N->getOperand(1); + if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec)) + return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index, + OrigLoad); + } + // Perform only after legalization to ensure build_vector / vector_shuffle // optimizations have already been done. if (!LegalOperations) return SDValue(); @@ -9869,30 +10669,6 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { if (ConstEltNo) { int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); - bool NewLoad = false; - bool BCNumEltsChanged = false; - EVT ExtVT = VT.getVectorElementType(); - EVT LVT = ExtVT; - - // If the result of load has to be truncated, then it's not necessarily - // profitable. - if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT)) - return SDValue(); - - if (InVec.getOpcode() == ISD::BITCAST) { - // Don't duplicate a load with other uses. - if (!InVec.hasOneUse()) - return SDValue(); - - EVT BCVT = InVec.getOperand(0).getValueType(); - if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType())) - return SDValue(); - if (VT.getVectorNumElements() != BCVT.getVectorNumElements()) - BCNumEltsChanged = true; - InVec = InVec.getOperand(0); - ExtVT = BCVT.getVectorElementType(); - NewLoad = true; - } LoadSDNode *LN0 = nullptr; const ShuffleVectorSDNode *SVN = nullptr; @@ -9935,6 +10711,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { if (ISD::isNormalLoad(InVec.getNode())) { LN0 = cast<LoadSDNode>(InVec); Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems; + EltNo = DAG.getConstant(Elt, EltNo.getValueType()); } } @@ -9947,72 +10724,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { if (Elt == -1) return DAG.getUNDEF(LVT); - unsigned Align = LN0->getAlignment(); - if (NewLoad) { - // Check the resultant load doesn't need a higher alignment than the - // original load. - unsigned NewAlign = - TLI.getDataLayout() - ->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext())); - - if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT)) - return SDValue(); - - Align = NewAlign; - } - - SDValue NewPtr = LN0->getBasePtr(); - unsigned PtrOff = 0; - - if (Elt) { - PtrOff = LVT.getSizeInBits() * Elt / 8; - EVT PtrType = NewPtr.getValueType(); - if (TLI.isBigEndian()) - PtrOff = VT.getSizeInBits() / 8 - PtrOff; - NewPtr = DAG.getNode(ISD::ADD, SDLoc(N), PtrType, NewPtr, - DAG.getConstant(PtrOff, PtrType)); - } - - // The replacement we need to do here is a little tricky: we need to - // replace an extractelement of a load with a load. - // Use ReplaceAllUsesOfValuesWith to do the replacement. - // Note that this replacement assumes that the extractvalue is the only - // use of the load; that's okay because we don't want to perform this - // transformation in other cases anyway. - SDValue Load; - SDValue Chain; - if (NVT.bitsGT(LVT)) { - // If the result type of vextract is wider than the load, then issue an - // extending load instead. - ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, LVT) - ? ISD::ZEXTLOAD : ISD::EXTLOAD; - Load = DAG.getExtLoad(ExtType, SDLoc(N), NVT, LN0->getChain(), - NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), - LVT, LN0->isVolatile(), LN0->isNonTemporal(), - Align, LN0->getTBAAInfo()); - Chain = Load.getValue(1); - } else { - Load = DAG.getLoad(LVT, SDLoc(N), LN0->getChain(), NewPtr, - LN0->getPointerInfo().getWithOffset(PtrOff), - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->isInvariant(), Align, LN0->getTBAAInfo()); - Chain = Load.getValue(1); - if (NVT.bitsLT(LVT)) - Load = DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, Load); - else - Load = DAG.getNode(ISD::BITCAST, SDLoc(N), NVT, Load); - } - WorklistRemover DeadNodes(*this); - SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) }; - SDValue To[] = { Load, Chain }; - DAG.ReplaceAllUsesOfValuesWith(From, To, 2); - // Since we're explcitly calling ReplaceAllUses, add the new node to the - // worklist explicitly as well. - AddToWorklist(Load.getNode()); - AddUsersToWorklist(Load.getNode()); // Add users too - // Make sure to revisit this node to clean it up; it will usually be dead. - AddToWorklist(N); - return SDValue(N, 0); + return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0); } return SDValue(); @@ -10215,32 +10927,46 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from // at most two distinct vectors, turn this into a shuffle node. + // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes. + if (!isTypeLegal(VT)) + return SDValue(); + // May only combine to shuffle after legalize if shuffle is legal. - if (LegalOperations && - !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT)) + if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT)) return SDValue(); SDValue VecIn1, VecIn2; + bool UsesZeroVector = false; for (unsigned i = 0; i != NumInScalars; ++i) { + SDValue Op = N->getOperand(i); // Ignore undef inputs. - if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; + if (Op.getOpcode() == ISD::UNDEF) continue; + + // See if we can combine this build_vector into a blend with a zero vector. + if (!VecIn2.getNode() && ((Op.getOpcode() == ISD::Constant && + cast<ConstantSDNode>(Op.getNode())->isNullValue()) || + (Op.getOpcode() == ISD::ConstantFP && + cast<ConstantFPSDNode>(Op.getNode())->getValueAPF().isZero()))) { + UsesZeroVector = true; + continue; + } // If this input is something other than a EXTRACT_VECTOR_ELT with a // constant index, bail out. - if (N->getOperand(i).getOpcode() != ISD::EXTRACT_VECTOR_ELT || - !isa<ConstantSDNode>(N->getOperand(i).getOperand(1))) { + if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT || + !isa<ConstantSDNode>(Op.getOperand(1))) { VecIn1 = VecIn2 = SDValue(nullptr, 0); break; } // We allow up to two distinct input vectors. - SDValue ExtractedFromVec = N->getOperand(i).getOperand(0); + SDValue ExtractedFromVec = Op.getOperand(0); if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2) continue; if (!VecIn1.getNode()) { VecIn1 = ExtractedFromVec; - } else if (!VecIn2.getNode()) { + } else if (!VecIn2.getNode() && !UsesZeroVector) { VecIn2 = ExtractedFromVec; } else { // Too many inputs. @@ -10251,55 +10977,93 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { // If everything is good, we can make a shuffle operation. if (VecIn1.getNode()) { + unsigned InNumElements = VecIn1.getValueType().getVectorNumElements(); SmallVector<int, 8> Mask; for (unsigned i = 0; i != NumInScalars; ++i) { - if (N->getOperand(i).getOpcode() == ISD::UNDEF) { + unsigned Opcode = N->getOperand(i).getOpcode(); + if (Opcode == ISD::UNDEF) { Mask.push_back(-1); continue; } + // Operands can also be zero. + if (Opcode != ISD::EXTRACT_VECTOR_ELT) { + assert(UsesZeroVector && + (Opcode == ISD::Constant || Opcode == ISD::ConstantFP) && + "Unexpected node found!"); + Mask.push_back(NumInScalars+i); + continue; + } + // If extracting from the first vector, just use the index directly. SDValue Extract = N->getOperand(i); SDValue ExtVal = Extract.getOperand(1); + unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue(); if (Extract.getOperand(0) == VecIn1) { - unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue(); - if (ExtIndex > VT.getVectorNumElements()) - return SDValue(); - Mask.push_back(ExtIndex); continue; } - // Otherwise, use InIdx + VecSize - unsigned Idx = cast<ConstantSDNode>(ExtVal)->getZExtValue(); - Mask.push_back(Idx+NumInScalars); + // Otherwise, use InIdx + InputVecSize + Mask.push_back(InNumElements + ExtIndex); } + // Avoid introducing illegal shuffles with zero. + if (UsesZeroVector && !TLI.isVectorClearMaskLegal(Mask, VT)) + return SDValue(); + // We can't generate a shuffle node with mismatched input and output types. // Attempt to transform a single input vector to the correct type. if ((VT != VecIn1.getValueType())) { - // We don't support shuffeling between TWO values of different types. - if (VecIn2.getNode()) + // If the input vector type has a different base type to the output + // vector type, bail out. + EVT VTElemType = VT.getVectorElementType(); + if ((VecIn1.getValueType().getVectorElementType() != VTElemType) || + (VecIn2.getNode() && + (VecIn2.getValueType().getVectorElementType() != VTElemType))) return SDValue(); + // If the input vector is too small, widen it. // We only support widening of vectors which are half the size of the // output registers. For example XMM->YMM widening on X86 with AVX. - if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits()) - return SDValue(); + EVT VecInT = VecIn1.getValueType(); + if (VecInT.getSizeInBits() * 2 == VT.getSizeInBits()) { + // If we only have one small input, widen it by adding undef values. + if (!VecIn2.getNode()) + VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1, + DAG.getUNDEF(VecIn1.getValueType())); + else if (VecIn1.getValueType() == VecIn2.getValueType()) { + // If we have two small inputs of the same type, try to concat them. + VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1, VecIn2); + VecIn2 = SDValue(nullptr, 0); + } else + return SDValue(); + } else if (VecInT.getSizeInBits() == VT.getSizeInBits() * 2) { + // If the input vector is too large, try to split it. + // We don't support having two input vectors that are too large. + if (VecIn2.getNode()) + return SDValue(); - // If the input vector type has a different base type to the output - // vector type, bail out. - if (VecIn1.getValueType().getVectorElementType() != - VT.getVectorElementType()) + if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements())) + return SDValue(); + + // Try to replace VecIn1 with two extract_subvectors + // No need to update the masks, they should still be correct. + VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, + DAG.getConstant(VT.getVectorNumElements(), TLI.getVectorIdxTy())); + VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, + DAG.getConstant(0, TLI.getVectorIdxTy())); + UsesZeroVector = false; + } else return SDValue(); - - // Widen the input vector by adding undef values. - VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, - VecIn1, DAG.getUNDEF(VecIn1.getValueType())); } - // If VecIn2 is unused then change it to undef. - VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); + if (UsesZeroVector) + VecIn2 = VT.isInteger() ? DAG.getConstant(0, VT) : + DAG.getConstantFP(0.0, VT); + else + // If VecIn2 is unused then change it to undef. + VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); // Check that we were able to transform all incoming values to the same // type. @@ -10307,10 +11071,6 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { VecIn1.getValueType() != VT) return SDValue(); - // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes. - if (!isTypeLegal(VT)) - return SDValue(); - // Return the new VECTOR_SHUFFLE node. SDValue Ops[2]; Ops[0] = VecIn1; @@ -10501,6 +11261,92 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { return SDValue(); } +static SDValue simplifyShuffleOperandRecursively(SmallBitVector &UsedElements, + SDValue V, SelectionDAG &DAG) { + SDLoc DL(V); + EVT VT = V.getValueType(); + + switch (V.getOpcode()) { + default: + return V; + + case ISD::CONCAT_VECTORS: { + EVT OpVT = V->getOperand(0).getValueType(); + int OpSize = OpVT.getVectorNumElements(); + SmallBitVector OpUsedElements(OpSize, false); + bool FoundSimplification = false; + SmallVector<SDValue, 4> NewOps; + NewOps.reserve(V->getNumOperands()); + for (int i = 0, NumOps = V->getNumOperands(); i < NumOps; ++i) { + SDValue Op = V->getOperand(i); + bool OpUsed = false; + for (int j = 0; j < OpSize; ++j) + if (UsedElements[i * OpSize + j]) { + OpUsedElements[j] = true; + OpUsed = true; + } + NewOps.push_back( + OpUsed ? simplifyShuffleOperandRecursively(OpUsedElements, Op, DAG) + : DAG.getUNDEF(OpVT)); + FoundSimplification |= Op == NewOps.back(); + OpUsedElements.reset(); + } + if (FoundSimplification) + V = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, NewOps); + return V; + } + + case ISD::INSERT_SUBVECTOR: { + SDValue BaseV = V->getOperand(0); + SDValue SubV = V->getOperand(1); + auto *IdxN = dyn_cast<ConstantSDNode>(V->getOperand(2)); + if (!IdxN) + return V; + + int SubSize = SubV.getValueType().getVectorNumElements(); + int Idx = IdxN->getZExtValue(); + bool SubVectorUsed = false; + SmallBitVector SubUsedElements(SubSize, false); + for (int i = 0; i < SubSize; ++i) + if (UsedElements[i + Idx]) { + SubVectorUsed = true; + SubUsedElements[i] = true; + UsedElements[i + Idx] = false; + } + + // Now recurse on both the base and sub vectors. + SDValue SimplifiedSubV = + SubVectorUsed + ? simplifyShuffleOperandRecursively(SubUsedElements, SubV, DAG) + : DAG.getUNDEF(SubV.getValueType()); + SDValue SimplifiedBaseV = simplifyShuffleOperandRecursively(UsedElements, BaseV, DAG); + if (SimplifiedSubV != SubV || SimplifiedBaseV != BaseV) + V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, + SimplifiedBaseV, SimplifiedSubV, V->getOperand(2)); + return V; + } + } +} + +static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0, + SDValue N1, SelectionDAG &DAG) { + EVT VT = SVN->getValueType(0); + int NumElts = VT.getVectorNumElements(); + SmallBitVector N0UsedElements(NumElts, false), N1UsedElements(NumElts, false); + for (int M : SVN->getMask()) + if (M >= 0 && M < NumElts) + N0UsedElements[M] = true; + else if (M >= NumElts) + N1UsedElements[M - NumElts] = true; + + SDValue S0 = simplifyShuffleOperandRecursively(N0UsedElements, N0, DAG); + SDValue S1 = simplifyShuffleOperandRecursively(N1UsedElements, N1, DAG); + if (S0 == N0 && S1 == N1) + return SDValue(); + + return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask()); +} + // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat. static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); @@ -10653,6 +11499,12 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { } } + // There are various patterns used to build up a vector from smaller vectors, + // subvectors, or elements. Scan chains of these and replace unused insertions + // or components with undef. + if (SDValue S = simplifyShuffleOperands(SVN, N0, N1, DAG)) + return S; + if (N0.getOpcode() == ISD::CONCAT_VECTORS && Level < AfterLegalizeVectorOps && (N1.getOpcode() == ISD::UNDEF || @@ -10664,99 +11516,11 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return V; } - // If this shuffle node is simply a swizzle of another shuffle node, - // then try to simplify it. - if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && - N1.getOpcode() == ISD::UNDEF) { - - ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0); - - // The incoming shuffle must be of the same type as the result of the - // current shuffle. - assert(OtherSV->getOperand(0).getValueType() == VT && - "Shuffle types don't match"); - - SmallVector<int, 4> Mask; - // Compute the combined shuffle mask. - for (unsigned i = 0; i != NumElts; ++i) { - int Idx = SVN->getMaskElt(i); - assert(Idx < (int)NumElts && "Index references undef operand"); - // Next, this index comes from the first value, which is the incoming - // shuffle. Adopt the incoming index. - if (Idx >= 0) - Idx = OtherSV->getMaskElt(Idx); - Mask.push_back(Idx); - } - - bool CommuteOperands = false; - if (N0.getOperand(1).getOpcode() != ISD::UNDEF) { - // To be valid, the combine shuffle mask should only reference elements - // from one of the two vectors in input to the inner shufflevector. - bool IsValidMask = true; - for (unsigned i = 0; i != NumElts && IsValidMask; ++i) - // See if the combined mask only reference undefs or elements coming - // from the first shufflevector operand. - IsValidMask = Mask[i] < 0 || (unsigned)Mask[i] < NumElts; - - if (!IsValidMask) { - IsValidMask = true; - for (unsigned i = 0; i != NumElts && IsValidMask; ++i) - // Check that all the elements come from the second shuffle operand. - IsValidMask = Mask[i] < 0 || (unsigned)Mask[i] >= NumElts; - CommuteOperands = IsValidMask; - } - - // Early exit if the combined shuffle mask is not valid. - if (!IsValidMask) - return SDValue(); - } - - // See if this pair of shuffles can be safely folded according to either - // of the following rules: - // shuffle(shuffle(x, y), undef) -> x - // shuffle(shuffle(x, undef), undef) -> x - // shuffle(shuffle(x, y), undef) -> y - bool IsIdentityMask = true; - unsigned BaseMaskIndex = CommuteOperands ? NumElts : 0; - for (unsigned i = 0; i != NumElts && IsIdentityMask; ++i) { - // Skip Undefs. - if (Mask[i] < 0) - continue; - - // The combined shuffle must map each index to itself. - IsIdentityMask = (unsigned)Mask[i] == i + BaseMaskIndex; - } - - if (IsIdentityMask) { - if (CommuteOperands) - // optimize shuffle(shuffle(x, y), undef) -> y. - return OtherSV->getOperand(1); - - // optimize shuffle(shuffle(x, undef), undef) -> x - // optimize shuffle(shuffle(x, y), undef) -> x - return OtherSV->getOperand(0); - } - - // It may still be beneficial to combine the two shuffles if the - // resulting shuffle is legal. - if (TLI.isTypeLegal(VT) && TLI.isShuffleMaskLegal(Mask, VT)) { - if (!CommuteOperands) - // shuffle(shuffle(x, undef, M1), undef, M2) -> shuffle(x, undef, M3). - // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(x, undef, M3) - return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), N1, - &Mask[0]); - - // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(undef, y, M3) - return DAG.getVectorShuffle(VT, SDLoc(N), N1, N0->getOperand(1), - &Mask[0]); - } - } - // Canonicalize shuffles according to rules: // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A) // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B) // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B) - if (N1.getOpcode() == ISD::VECTOR_SHUFFLE && N0.getOpcode() != ISD::UNDEF && + if (N1.getOpcode() == ISD::VECTOR_SHUFFLE && N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) { // The incoming shuffle must be of the same type as the result of the @@ -10775,13 +11539,12 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { } // Try to fold according to rules: - // shuffle(shuffle(A, B, M0), B, M1) -> shuffle(A, B, M2) - // shuffle(shuffle(A, B, M0), A, M1) -> shuffle(A, B, M2) - // shuffle(shuffle(A, Undef, M0), B, M1) -> shuffle(A, B, M2) - // shuffle(shuffle(A, Undef, M0), A, M1) -> shuffle(A, Undef, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) // Don't try to fold shuffles with illegal type. if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && - N1.getOpcode() != ISD::UNDEF && TLI.isTypeLegal(VT)) { + TLI.isTypeLegal(VT)) { ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0); // The incoming shuffle must be of the same type as the result of the @@ -10789,14 +11552,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { assert(OtherSV->getOperand(0).getValueType() == VT && "Shuffle types don't match"); - SDValue SV0 = OtherSV->getOperand(0); - SDValue SV1 = OtherSV->getOperand(1); - bool HasSameOp0 = N1 == SV0; - bool IsSV1Undef = SV1.getOpcode() == ISD::UNDEF; - if (!HasSameOp0 && !IsSV1Undef && N1 != SV1) - // Early exit. - return SDValue(); - + SDValue SV0, SV1; SmallVector<int, 4> Mask; // Compute the combined shuffle mask for a shuffle with SV0 as the first // operand, and SV1 as the second operand. @@ -10808,24 +11564,90 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { continue; } + SDValue CurrentVec; if (Idx < (int)NumElts) { + // This shuffle index refers to the inner shuffle N0. Lookup the inner + // shuffle mask to identify which vector is actually referenced. Idx = OtherSV->getMaskElt(Idx); - if (IsSV1Undef && Idx >= (int) NumElts) - Idx = -1; // Propagate Undef. - } else - Idx = HasSameOp0 ? Idx - NumElts : Idx; + if (Idx < 0) { + // Propagate Undef. + Mask.push_back(Idx); + continue; + } + + CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0) + : OtherSV->getOperand(1); + } else { + // This shuffle index references an element within N1. + CurrentVec = N1; + } + + // Simple case where 'CurrentVec' is UNDEF. + if (CurrentVec.getOpcode() == ISD::UNDEF) { + Mask.push_back(-1); + continue; + } + + // Canonicalize the shuffle index. We don't know yet if CurrentVec + // will be the first or second operand of the combined shuffle. + Idx = Idx % NumElts; + if (!SV0.getNode() || SV0 == CurrentVec) { + // Ok. CurrentVec is the left hand side. + // Update the mask accordingly. + SV0 = CurrentVec; + Mask.push_back(Idx); + continue; + } + + // Bail out if we cannot convert the shuffle pair into a single shuffle. + if (SV1.getNode() && SV1 != CurrentVec) + return SDValue(); - Mask.push_back(Idx); + // Ok. CurrentVec is the right hand side. + // Update the mask accordingly. + SV1 = CurrentVec; + Mask.push_back(Idx + NumElts); } + // Check if all indices in Mask are Undef. In case, propagate Undef. + bool isUndefMask = true; + for (unsigned i = 0; i != NumElts && isUndefMask; ++i) + isUndefMask &= Mask[i] < 0; + + if (isUndefMask) + return DAG.getUNDEF(VT); + + if (!SV0.getNode()) + SV0 = DAG.getUNDEF(VT); + if (!SV1.getNode()) + SV1 = DAG.getUNDEF(VT); + // Avoid introducing shuffles with illegal mask. - if (TLI.isShuffleMaskLegal(Mask, VT)) { - if (IsSV1Undef) - // shuffle(shuffle(A, Undef, M0), B, M1) -> shuffle(A, B, M2) - // shuffle(shuffle(A, Undef, M0), A, M1) -> shuffle(A, Undef, M2) - return DAG.getVectorShuffle(VT, SDLoc(N), SV0, N1, &Mask[0]); - return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, &Mask[0]); + if (!TLI.isShuffleMaskLegal(Mask, VT)) { + // Compute the commuted shuffle mask and test again. + for (unsigned i = 0; i != NumElts; ++i) { + int idx = Mask[i]; + if (idx < 0) + continue; + else if (idx < (int)NumElts) + Mask[i] = idx + NumElts; + else + Mask[i] = idx - NumElts; + } + + if (!TLI.isShuffleMaskLegal(Mask, VT)) + return SDValue(); + + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2) + std::swap(SV0, SV1); } + + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) + return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, &Mask[0]); } return SDValue(); @@ -10858,8 +11680,8 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { return SDValue(); } -/// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform -/// an AND to a vector_shuffle with the destination vector and a zero vector. +/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle +/// with the destination vector and a zero vector. /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> /// vector_shuffle V, Zero, <0, 4, 2, 4> SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { @@ -10881,7 +11703,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { if (cast<ConstantSDNode>(Elt)->isAllOnesValue()) Indices.push_back(i); else if (cast<ConstantSDNode>(Elt)->isNullValue()) - Indices.push_back(NumElts); + Indices.push_back(NumElts+i); else return SDValue(); } @@ -10905,7 +11727,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { return SDValue(); } -/// SimplifyVBinOp - Visit a binary vector operation, like ADD. +/// Visit a binary vector operation, like ADD. SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { assert(N->getValueType(0).isVector() && "SimplifyVBinOp only works on vectors!"); @@ -10991,7 +11813,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { return SDValue(); } -/// SimplifyVUnaryOp - Visit a binary vector operation, like FABS/FNEG. +/// Visit a binary vector operation, like FABS/FNEG. SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) { assert(N->getValueType(0).isVector() && "SimplifyVUnaryOp only works on vectors!"); @@ -11042,8 +11864,8 @@ SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0, SCC.getOperand(0), SCC.getOperand(1), SCC.getOperand(4)); AddToWorklist(SETCC.getNode()); - return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), - SCC.getOperand(2), SCC.getOperand(3), SETCC); + return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC, + SCC.getOperand(2), SCC.getOperand(3)); } return SCC; @@ -11051,12 +11873,11 @@ SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0, return SDValue(); } -/// SimplifySelectOps - Given a SELECT or a SELECT_CC node, where LHS and RHS -/// are the two values being selected between, see if we can simplify the -/// select. Callers of this should assume that TheSelect is deleted if this -/// returns true. As such, they should return the appropriate thing (e.g. the -/// node) back to the top-level of the DAG combiner loop to avoid it being -/// looked at. +/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values +/// being selected between, see if we can simplify the select. Callers of this +/// should assume that TheSelect is deleted if this returns true. As such, they +/// should return the appropriate thing (e.g. the node) back to the top-level of +/// the DAG combiner loop to avoid it being looked at. bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, SDValue RHS) { @@ -11135,22 +11956,27 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, } SDValue Load; + // It is safe to replace the two loads if they have different alignments, + // but the new load must be the minimum (most restrictive) alignment of the + // inputs. + bool isInvariant = LLD->isInvariant() & RLD->isInvariant(); + unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment()); if (LLD->getExtensionType() == ISD::NON_EXTLOAD) { Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect), - // FIXME: Discards pointer and TBAA info. + // FIXME: Discards pointer and AA info. LLD->getChain(), Addr, MachinePointerInfo(), LLD->isVolatile(), LLD->isNonTemporal(), - LLD->isInvariant(), LLD->getAlignment()); + isInvariant, Alignment); } else { Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType() : LLD->getExtensionType(), SDLoc(TheSelect), TheSelect->getValueType(0), - // FIXME: Discards pointer and TBAA info. + // FIXME: Discards pointer and AA info. LLD->getChain(), Addr, MachinePointerInfo(), LLD->getMemoryVT(), LLD->isVolatile(), - LLD->isNonTemporal(), LLD->getAlignment()); + LLD->isNonTemporal(), isInvariant, Alignment); } // Users of the select now use the result of the load. @@ -11166,7 +11992,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, return false; } -/// SimplifySelectCC - Simplify an expression of the form (N0 cond N1) ? N2 : N3 +/// Simplify an expression of the form (N0 cond N1) ? N2 : N3 /// where 'cond' is the comparison specified by CC. SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, @@ -11458,7 +12284,7 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, return SDValue(); } -/// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC. +/// This is a stub for TargetLowering::SimplifySetCC. SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, SDLoc DL, bool foldBooleans) { @@ -11467,10 +12293,10 @@ SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL); } -/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant, -/// return a DAG expression to select that will generate the same value by -/// multiplying by a magic number. See: -/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> +/// Given an ISD::SDIV node expressing a divide by constant, return +/// a DAG expression to select that will generate the same value by multiplying +/// by a magic number. +/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". SDValue DAGCombiner::BuildSDIV(SDNode *N) { ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); if (!C) @@ -11489,10 +12315,29 @@ SDValue DAGCombiner::BuildSDIV(SDNode *N) { return S; } -/// BuildUDIV - Given an ISD::UDIV node expressing a divide by constant, -/// return a DAG expression to select that will generate the same value by -/// multiplying by a magic number. See: -/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> +/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a +/// DAG expression that will generate the same value by right shifting. +SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) { + ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); + if (!C) + return SDValue(); + + // Avoid division by zero. + if (!C->getAPIntValue()) + return SDValue(); + + std::vector<SDNode *> Built; + SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, &Built); + + for (SDNode *N : Built) + AddToWorklist(N); + return S; +} + +/// Given an ISD::UDIV node expressing a divide by constant, return a DAG +/// expression that will generate the same value by multiplying by a magic +/// number. +/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". SDValue DAGCombiner::BuildUDIV(SDNode *N) { ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); if (!C) @@ -11511,9 +12356,141 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) { return S; } -/// FindBaseOffset - Return true if base is a frame index, which is known not -// to alias with anything but itself. Provides base object and offset as -// results. +SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op) { + if (Level >= AfterLegalizeDAG) + return SDValue(); + + // Expose the DAG combiner to the target combiner implementations. + TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this); + + unsigned Iterations = 0; + if (SDValue Est = TLI.getRecipEstimate(Op, DCI, Iterations)) { + if (Iterations) { + // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) + // For the reciprocal, we need to find the zero of the function: + // F(X) = A X - 1 [which has a zero at X = 1/A] + // => + // X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form + // does not require additional intermediate precision] + EVT VT = Op.getValueType(); + SDLoc DL(Op); + SDValue FPOne = DAG.getConstantFP(1.0, VT); + + AddToWorklist(Est.getNode()); + + // Newton iterations: Est = Est + Est (1 - Arg * Est) + for (unsigned i = 0; i < Iterations; ++i) { + SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est); + AddToWorklist(NewEst.getNode()); + + NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst); + AddToWorklist(NewEst.getNode()); + + NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst); + AddToWorklist(NewEst.getNode()); + + Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst); + AddToWorklist(Est.getNode()); + } + } + return Est; + } + + return SDValue(); +} + +/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) +/// For the reciprocal sqrt, we need to find the zero of the function: +/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)] +/// => +/// X_{i+1} = X_i (1.5 - A X_i^2 / 2) +/// As a result, we precompute A/2 prior to the iteration loop. +SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est, + unsigned Iterations) { + EVT VT = Arg.getValueType(); + SDLoc DL(Arg); + SDValue ThreeHalves = DAG.getConstantFP(1.5, VT); + + // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that + // this entire sequence requires only one FP constant. + SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg); + AddToWorklist(HalfArg.getNode()); + + HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg); + AddToWorklist(HalfArg.getNode()); + + // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est) + for (unsigned i = 0; i < Iterations; ++i) { + SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est); + AddToWorklist(NewEst.getNode()); + + NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst); + AddToWorklist(NewEst.getNode()); + + NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst); + AddToWorklist(NewEst.getNode()); + + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst); + AddToWorklist(Est.getNode()); + } + return Est; +} + +/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) +/// For the reciprocal sqrt, we need to find the zero of the function: +/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)] +/// => +/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0)) +SDValue DAGCombiner::BuildRsqrtNRTwoConst(SDValue Arg, SDValue Est, + unsigned Iterations) { + EVT VT = Arg.getValueType(); + SDLoc DL(Arg); + SDValue MinusThree = DAG.getConstantFP(-3.0, VT); + SDValue MinusHalf = DAG.getConstantFP(-0.5, VT); + + // Newton iterations: Est = -0.5 * Est * (-3.0 + Arg * Est * Est) + for (unsigned i = 0; i < Iterations; ++i) { + SDValue HalfEst = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf); + AddToWorklist(HalfEst.getNode()); + + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Est); + AddToWorklist(Est.getNode()); + + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg); + AddToWorklist(Est.getNode()); + + Est = DAG.getNode(ISD::FADD, DL, VT, Est, MinusThree); + AddToWorklist(Est.getNode()); + + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, HalfEst); + AddToWorklist(Est.getNode()); + } + return Est; +} + +SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op) { + if (Level >= AfterLegalizeDAG) + return SDValue(); + + // Expose the DAG combiner to the target combiner implementations. + TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this); + unsigned Iterations = 0; + bool UseOneConstNR = false; + if (SDValue Est = TLI.getRsqrtEstimate(Op, DCI, Iterations, UseOneConstNR)) { + AddToWorklist(Est.getNode()); + if (Iterations) { + Est = UseOneConstNR ? + BuildRsqrtNROneConst(Op, Est, Iterations) : + BuildRsqrtNRTwoConst(Op, Est, Iterations); + } + return Est; + } + + return SDValue(); +} + +/// Return true if base is a frame index, which is known not to alias with +/// anything but itself. Provides base object and offset as results. static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, const GlobalValue *&GV, const void *&CV) { // Assume it is a primitive operation. @@ -11549,8 +12526,7 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, return isa<FrameIndexSDNode>(Base); } -/// isAlias - Return true if there is any possibility that the two addresses -/// overlap. +/// Return true if there is any possibility that the two addresses overlap. bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { // If they are the same then they must be aliases. if (Op0->getBasePtr() == Op1->getBasePtr()) return true; @@ -11609,8 +12585,9 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { return false; } - bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0 ? CombinerGlobalAA : - TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA(); + bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0 + ? CombinerGlobalAA + : DAG.getSubtarget().useAA(); #ifndef NDEBUG if (CombinerAAOnlyFunc.getNumOccurrences() && CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) @@ -11628,10 +12605,10 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { AliasAnalysis::AliasResult AAResult = AA.alias(AliasAnalysis::Location(Op0->getMemOperand()->getValue(), Overlap1, - UseTBAA ? Op0->getTBAAInfo() : nullptr), + UseTBAA ? Op0->getAAInfo() : AAMDNodes()), AliasAnalysis::Location(Op1->getMemOperand()->getValue(), Overlap2, - UseTBAA ? Op1->getTBAAInfo() : nullptr)); + UseTBAA ? Op1->getAAInfo() : AAMDNodes())); if (AAResult == AliasAnalysis::NoAlias) return false; } @@ -11640,7 +12617,7 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { return true; } -/// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, +/// Walk up chain skipping non-aliasing memory nodes, /// looking for aliasing nodes and adding them to the Aliases vector. void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, SmallVectorImpl<SDValue> &Aliases) { @@ -11676,7 +12653,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, } // Don't bother if we've been before. - if (!Visited.insert(Chain.getNode())) + if (!Visited.insert(Chain.getNode()).second) continue; switch (Chain.getOpcode()) { @@ -11751,10 +12728,9 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, // like register copies will interfere with trivial cases). SmallVector<const SDNode *, 16> Worklist; - for (SmallPtrSet<SDNode *, 16>::iterator I = Visited.begin(), - IE = Visited.end(); I != IE; ++I) - if (*I != OriginalChain.getNode()) - Worklist.push_back(*I); + for (const SDNode *N : Visited) + if (N != OriginalChain.getNode()) + Worklist.push_back(N); while (!Worklist.empty()) { const SDNode *M = Worklist.pop_back_val(); @@ -11765,7 +12741,8 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, for (SDNode::use_iterator UI = M->use_begin(), UIE = M->use_end(); UI != UIE; ++UI) - if (UI.getUse().getValueType() == MVT::Other && Visited.insert(*UI)) { + if (UI.getUse().getValueType() == MVT::Other && + Visited.insert(*UI).second) { if (isa<MemIntrinsicSDNode>(*UI) || isa<MemSDNode>(*UI)) { // We've not visited this use, and we care about it (it could have an // ordering dependency with the original node). @@ -11781,8 +12758,8 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, } } -/// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, looking -/// for a better chain (aliasing node.) +/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain +/// (aliasing node.) SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { SmallVector<SDValue, 8> Aliases; // Ops for replacing token factor. @@ -11801,11 +12778,9 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases); } -// SelectionDAG::Combine - This is the entry point for the file. -// +/// This is the entry point for the file. void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA, CodeGenOpt::Level OptLevel) { - /// run - This is the main entry point to this class. - /// + /// This is the main entry point to this class. DAGCombiner(*this, AA, OptLevel).Run(Level); } diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index ad75e916cefa..97fed230c536 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -40,12 +40,12 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/Analysis.h" -#include "llvm/CodeGen/FastISel.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/Loads.h" #include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -65,34 +65,32 @@ #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "isel" STATISTIC(NumFastIselSuccessIndependent, "Number of insts selected by " - "target-independent selector"); + "target-independent selector"); STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by " - "target-specific selector"); + "target-specific selector"); STATISTIC(NumFastIselDead, "Number of dead insts removed on failure"); -/// \brief Set CallLoweringInfo attribute flags based on a call instruction -/// and called function attributes. void FastISel::ArgListEntry::setAttributes(ImmutableCallSite *CS, unsigned AttrIdx) { - isSExt = CS->paramHasAttr(AttrIdx, Attribute::SExt); - isZExt = CS->paramHasAttr(AttrIdx, Attribute::ZExt); - isInReg = CS->paramHasAttr(AttrIdx, Attribute::InReg); - isSRet = CS->paramHasAttr(AttrIdx, Attribute::StructRet); - isNest = CS->paramHasAttr(AttrIdx, Attribute::Nest); - isByVal = CS->paramHasAttr(AttrIdx, Attribute::ByVal); - isInAlloca = CS->paramHasAttr(AttrIdx, Attribute::InAlloca); - isReturned = CS->paramHasAttr(AttrIdx, Attribute::Returned); - Alignment = CS->getParamAlignment(AttrIdx); -} - -/// startNewBlock - Set the current block to which generated machine -/// instructions will be appended, and clear the local CSE map. -/// + IsSExt = CS->paramHasAttr(AttrIdx, Attribute::SExt); + IsZExt = CS->paramHasAttr(AttrIdx, Attribute::ZExt); + IsInReg = CS->paramHasAttr(AttrIdx, Attribute::InReg); + IsSRet = CS->paramHasAttr(AttrIdx, Attribute::StructRet); + IsNest = CS->paramHasAttr(AttrIdx, Attribute::Nest); + IsByVal = CS->paramHasAttr(AttrIdx, Attribute::ByVal); + IsInAlloca = CS->paramHasAttr(AttrIdx, Attribute::InAlloca); + IsReturned = CS->paramHasAttr(AttrIdx, Attribute::Returned); + Alignment = CS->getParamAlignment(AttrIdx); +} + +/// Set the current block to which generated machine instructions will be +/// appended, and clear the local CSE map. void FastISel::startNewBlock() { LocalValueMap.clear(); @@ -105,18 +103,19 @@ void FastISel::startNewBlock() { LastLocalValue = EmitStartPt; } -bool FastISel::LowerArguments() { +bool FastISel::lowerArguments() { if (!FuncInfo.CanLowerReturn) // Fallback to SDISel argument lowering code to deal with sret pointer // parameter. return false; - if (!FastLowerArguments()) + if (!fastLowerArguments()) return false; // Enter arguments into ValueMap for uses in non-entry BBs. for (Function::const_arg_iterator I = FuncInfo.Fn->arg_begin(), - E = FuncInfo.Fn->arg_end(); I != E; ++I) { + E = FuncInfo.Fn->arg_end(); + I != E; ++I) { DenseMap<const Value *, unsigned>::iterator VI = LocalValueMap.find(I); assert(VI != LocalValueMap.end() && "Missed an argument?"); FuncInfo.ValueMap[I] = VI->second; @@ -128,22 +127,30 @@ void FastISel::flushLocalValueMap() { LocalValueMap.clear(); LastLocalValue = EmitStartPt; recomputeInsertPt(); + SavedInsertPt = FuncInfo.InsertPt; } -bool FastISel::hasTrivialKill(const Value *V) const { +bool FastISel::hasTrivialKill(const Value *V) { // Don't consider constants or arguments to have trivial kills. const Instruction *I = dyn_cast<Instruction>(V); if (!I) return false; // No-op casts are trivially coalesced by fast-isel. - if (const CastInst *Cast = dyn_cast<CastInst>(I)) + if (const auto *Cast = dyn_cast<CastInst>(I)) if (Cast->isNoopCast(DL.getIntPtrType(Cast->getContext())) && !hasTrivialKill(Cast->getOperand(0))) return false; + // Even the value might have only one use in the LLVM IR, it is possible that + // FastISel might fold the use into another instruction and now there is more + // than one use at the Machine Instruction level. + unsigned Reg = lookUpRegForValue(V); + if (Reg && !MRI.use_empty(Reg)) + return false; + // GEPs with all zero indices are trivially coalesced by fast-isel. - if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) + if (const auto *GEP = dyn_cast<GetElementPtrInst>(I)) if (GEP->hasAllZeroIndices() && !hasTrivialKill(GEP->getOperand(0))) return false; @@ -176,7 +183,7 @@ unsigned FastISel::getRegForValue(const Value *V) { // Look up the value to see if we already have a register for it. unsigned Reg = lookUpRegForValue(V); - if (Reg != 0) + if (Reg) return Reg; // In bottom-up mode, just create the virtual register which will be used @@ -197,29 +204,24 @@ unsigned FastISel::getRegForValue(const Value *V) { return Reg; } -/// materializeRegForValue - Helper for getRegForValue. This function is -/// called when the value isn't already available in a register and must -/// be materialized with new instructions. -unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { +unsigned FastISel::materializeConstant(const Value *V, MVT VT) { unsigned Reg = 0; - - if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + if (const auto *CI = dyn_cast<ConstantInt>(V)) { if (CI->getValue().getActiveBits() <= 64) - Reg = FastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue()); - } else if (isa<AllocaInst>(V)) { - Reg = TargetMaterializeAlloca(cast<AllocaInst>(V)); - } else if (isa<ConstantPointerNull>(V)) { + Reg = fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue()); + } else if (isa<AllocaInst>(V)) + Reg = fastMaterializeAlloca(cast<AllocaInst>(V)); + else if (isa<ConstantPointerNull>(V)) // Translate this as an integer zero so that it can be // local-CSE'd with actual integer zeros. - Reg = - getRegForValue(Constant::getNullValue(DL.getIntPtrType(V->getContext()))); - } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { - if (CF->isNullValue()) { - Reg = TargetMaterializeFloatZero(CF); - } else { + Reg = getRegForValue( + Constant::getNullValue(DL.getIntPtrType(V->getContext()))); + else if (const auto *CF = dyn_cast<ConstantFP>(V)) { + if (CF->isNullValue()) + Reg = fastMaterializeFloatZero(CF); + else // Try to emit the constant directly. - Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF); - } + Reg = fastEmit_f(VT, VT, ISD::ConstantFP, CF); if (!Reg) { // Try to emit the constant by using an integer constant with a cast. @@ -229,22 +231,22 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { uint64_t x[2]; uint32_t IntBitWidth = IntVT.getSizeInBits(); bool isExact; - (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true, - APFloat::rmTowardZero, &isExact); + (void)Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true, + APFloat::rmTowardZero, &isExact); if (isExact) { APInt IntVal(IntBitWidth, x); unsigned IntegerReg = - getRegForValue(ConstantInt::get(V->getContext(), IntVal)); + getRegForValue(ConstantInt::get(V->getContext(), IntVal)); if (IntegerReg != 0) - Reg = FastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, - IntegerReg, /*Kill=*/false); + Reg = fastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, IntegerReg, + /*Kill=*/false); } } - } else if (const Operator *Op = dyn_cast<Operator>(V)) { - if (!SelectOperator(Op, Op->getOpcode())) + } else if (const auto *Op = dyn_cast<Operator>(V)) { + if (!selectOperator(Op, Op->getOpcode())) if (!isa<Instruction>(Op) || - !TargetSelectInstruction(cast<Instruction>(Op))) + !fastSelectInstruction(cast<Instruction>(Op))) return 0; Reg = lookUpRegForValue(Op); } else if (isa<UndefValue>(V)) { @@ -252,15 +254,26 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::IMPLICIT_DEF), Reg); } + return Reg; +} + +/// Helper for getRegForValue. This function is called when the value isn't +/// already available in a register and must be materialized with new +/// instructions. +unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { + unsigned Reg = 0; + // Give the target-specific code a try first. + if (isa<Constant>(V)) + Reg = fastMaterializeConstant(cast<Constant>(V)); - // If target-independent code couldn't handle the value, give target-specific - // code a try. - if (!Reg && isa<Constant>(V)) - Reg = TargetMaterializeConstant(cast<Constant>(V)); + // If target-specific code couldn't or didn't want to handle the value, then + // give target-independent code a try. + if (!Reg) + Reg = materializeConstant(V, VT); // Don't cache constant materializations in the general ValueMap. // To do so would require tracking what uses they dominate. - if (Reg != 0) { + if (Reg) { LocalValueMap[V] = Reg; LastLocalValue = MRI.getVRegDef(Reg); } @@ -278,13 +291,7 @@ unsigned FastISel::lookUpRegForValue(const Value *V) { return LocalValueMap[V]; } -/// UpdateValueMap - Update the value map to include the new mapping for this -/// instruction, or insert an extra copy to get the result in a previous -/// determined register. -/// NOTE: This is only necessary because we might select a block that uses -/// a value before we select the block that defines the value. It might be -/// possible to fix this by selecting blocks in reverse postorder. -void FastISel::UpdateValueMap(const Value *I, unsigned Reg, unsigned NumRegs) { +void FastISel::updateValueMap(const Value *I, unsigned Reg, unsigned NumRegs) { if (!isa<Instruction>(I)) { LocalValueMap[I] = Reg; return; @@ -297,7 +304,7 @@ void FastISel::UpdateValueMap(const Value *I, unsigned Reg, unsigned NumRegs) { else if (Reg != AssignedReg) { // Arrange for uses of AssignedReg to be replaced by uses of Reg. for (unsigned i = 0; i < NumRegs; i++) - FuncInfo.RegFixups[AssignedReg+i] = Reg+i; + FuncInfo.RegFixups[AssignedReg + i] = Reg + i; AssignedReg = Reg; } @@ -315,13 +322,12 @@ std::pair<unsigned, bool> FastISel::getRegForGEPIndex(const Value *Idx) { MVT PtrVT = TLI.getPointerTy(); EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false); if (IdxVT.bitsLT(PtrVT)) { - IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND, - IdxN, IdxNIsKill); + IdxN = fastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND, IdxN, + IdxNIsKill); IdxNIsKill = true; - } - else if (IdxVT.bitsGT(PtrVT)) { - IdxN = FastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE, - IdxN, IdxNIsKill); + } else if (IdxVT.bitsGT(PtrVT)) { + IdxN = + fastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE, IdxN, IdxNIsKill); IdxNIsKill = true; } return std::pair<unsigned, bool>(IdxN, IdxNIsKill); @@ -343,7 +349,7 @@ void FastISel::recomputeInsertPt() { void FastISel::removeDeadCode(MachineBasicBlock::iterator I, MachineBasicBlock::iterator E) { - assert (I && E && std::distance(I, E) > 0 && "Invalid iterator!"); + assert(I && E && std::distance(I, E) > 0 && "Invalid iterator!"); while (I != E) { MachineInstr *Dead = &*I; ++I; @@ -358,7 +364,7 @@ FastISel::SavePoint FastISel::enterLocalValueArea() { DebugLoc OldDL = DbgLoc; recomputeInsertPt(); DbgLoc = DebugLoc(); - SavePoint SP = { OldInsertPt, OldDL }; + SavePoint SP = {OldInsertPt, OldDL}; return SP; } @@ -371,10 +377,7 @@ void FastISel::leaveLocalValueArea(SavePoint OldInsertPt) { DbgLoc = OldInsertPt.DL; } -/// SelectBinaryOp - Select and emit code for a binary operator instruction, -/// which has an opcode which directly corresponds to the given ISD opcode. -/// -bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) { +bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) { EVT VT = EVT::getEVT(I->getType(), /*HandleUnknown=*/true); if (VT == MVT::Other || !VT.isSimple()) // Unhandled type. Halt "fast" selection and bail. @@ -387,9 +390,8 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) { if (!TLI.isTypeLegal(VT)) { // MVT::i1 is special. Allow AND, OR, or XOR because they // don't require additional zeroing, which makes them easy. - if (VT == MVT::i1 && - (ISDOpcode == ISD::AND || ISDOpcode == ISD::OR || - ISDOpcode == ISD::XOR)) + if (VT == MVT::i1 && (ISDOpcode == ISD::AND || ISDOpcode == ISD::OR || + ISDOpcode == ISD::XOR)) VT = TLI.getTypeToTransformTo(I->getContext(), VT); else return false; @@ -397,38 +399,36 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) { // Check if the first operand is a constant, and handle it as "ri". At -O0, // we don't have anything that canonicalizes operand order. - if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(0))) + if (const auto *CI = dyn_cast<ConstantInt>(I->getOperand(0))) if (isa<Instruction>(I) && cast<Instruction>(I)->isCommutative()) { unsigned Op1 = getRegForValue(I->getOperand(1)); - if (Op1 == 0) return false; - + if (!Op1) + return false; bool Op1IsKill = hasTrivialKill(I->getOperand(1)); - unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op1, - Op1IsKill, CI->getZExtValue(), - VT.getSimpleVT()); - if (ResultReg == 0) return false; + unsigned ResultReg = + fastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op1, Op1IsKill, + CI->getZExtValue(), VT.getSimpleVT()); + if (!ResultReg) + return false; // We successfully emitted code for the given LLVM Instruction. - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); return true; } - unsigned Op0 = getRegForValue(I->getOperand(0)); - if (Op0 == 0) // Unhandled operand. Halt "fast" selection and bail. + if (!Op0) // Unhandled operand. Halt "fast" selection and bail. return false; - bool Op0IsKill = hasTrivialKill(I->getOperand(0)); // Check if the second operand is a constant and handle it appropriately. - if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { + if (const auto *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { uint64_t Imm = CI->getZExtValue(); // Transform "sdiv exact X, 8" -> "sra X, 3". if (ISDOpcode == ISD::SDIV && isa<BinaryOperator>(I) && - cast<BinaryOperator>(I)->isExact() && - isPowerOf2_64(Imm)) { + cast<BinaryOperator>(I)->isExact() && isPowerOf2_64(Imm)) { Imm = Log2_64(Imm); ISDOpcode = ISD::SRA; } @@ -440,54 +440,49 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) { ISDOpcode = ISD::AND; } - unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0, + unsigned ResultReg = fastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0, Op0IsKill, Imm, VT.getSimpleVT()); - if (ResultReg == 0) return false; + if (!ResultReg) + return false; // We successfully emitted code for the given LLVM Instruction. - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); return true; } // Check if the second operand is a constant float. - if (ConstantFP *CF = dyn_cast<ConstantFP>(I->getOperand(1))) { - unsigned ResultReg = FastEmit_rf(VT.getSimpleVT(), VT.getSimpleVT(), + if (const auto *CF = dyn_cast<ConstantFP>(I->getOperand(1))) { + unsigned ResultReg = fastEmit_rf(VT.getSimpleVT(), VT.getSimpleVT(), ISDOpcode, Op0, Op0IsKill, CF); - if (ResultReg != 0) { + if (ResultReg) { // We successfully emitted code for the given LLVM Instruction. - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); return true; } } unsigned Op1 = getRegForValue(I->getOperand(1)); - if (Op1 == 0) - // Unhandled operand. Halt "fast" selection and bail. + if (!Op1) // Unhandled operand. Halt "fast" selection and bail. return false; - bool Op1IsKill = hasTrivialKill(I->getOperand(1)); // Now we have both operands in registers. Emit the instruction. - unsigned ResultReg = FastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(), - ISDOpcode, - Op0, Op0IsKill, - Op1, Op1IsKill); - if (ResultReg == 0) + unsigned ResultReg = fastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(), + ISDOpcode, Op0, Op0IsKill, Op1, Op1IsKill); + if (!ResultReg) // Target-specific code wasn't able to find a machine opcode for // the given ISD opcode and type. Halt "fast" selection and bail. return false; // We successfully emitted code for the given LLVM Instruction. - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); return true; } -bool FastISel::SelectGetElementPtr(const User *I) { +bool FastISel::selectGetElementPtr(const User *I) { unsigned N = getRegForValue(I->getOperand(0)); - if (N == 0) - // Unhandled operand. Halt "fast" selection and bail. + if (!N) // Unhandled operand. Halt "fast" selection and bail. return false; - bool NIsKill = hasTrivialKill(I->getOperand(0)); // Keep a running tab of the total offset to coalesce multiple N = N + Offset @@ -497,18 +492,18 @@ bool FastISel::SelectGetElementPtr(const User *I) { uint64_t MaxOffs = 2048; Type *Ty = I->getOperand(0)->getType(); MVT VT = TLI.getPointerTy(); - for (GetElementPtrInst::const_op_iterator OI = I->op_begin()+1, - E = I->op_end(); OI != E; ++OI) { + for (GetElementPtrInst::const_op_iterator OI = I->op_begin() + 1, + E = I->op_end(); + OI != E; ++OI) { const Value *Idx = *OI; - if (StructType *StTy = dyn_cast<StructType>(Ty)) { + if (auto *StTy = dyn_cast<StructType>(Ty)) { unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); if (Field) { // N = N + Offset TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field); if (TotalOffs >= MaxOffs) { - N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); - if (N == 0) - // Unhandled operand. Halt "fast" selection and bail. + N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); + if (!N) // Unhandled operand. Halt "fast" selection and bail. return false; NIsKill = true; TotalOffs = 0; @@ -519,15 +514,15 @@ bool FastISel::SelectGetElementPtr(const User *I) { Ty = cast<SequentialType>(Ty)->getElementType(); // If this is a constant subscript, handle it quickly. - if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { - if (CI->isZero()) continue; + if (const auto *CI = dyn_cast<ConstantInt>(Idx)) { + if (CI->isZero()) + continue; // N = N + Offset TotalOffs += - DL.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); + DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue(); if (TotalOffs >= MaxOffs) { - N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); - if (N == 0) - // Unhandled operand. Halt "fast" selection and bail. + N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); + if (!N) // Unhandled operand. Halt "fast" selection and bail. return false; NIsKill = true; TotalOffs = 0; @@ -535,9 +530,8 @@ bool FastISel::SelectGetElementPtr(const User *I) { continue; } if (TotalOffs) { - N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); - if (N == 0) - // Unhandled operand. Halt "fast" selection and bail. + N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); + if (!N) // Unhandled operand. Halt "fast" selection and bail. return false; NIsKill = true; TotalOffs = 0; @@ -548,43 +542,37 @@ bool FastISel::SelectGetElementPtr(const User *I) { std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx); unsigned IdxN = Pair.first; bool IdxNIsKill = Pair.second; - if (IdxN == 0) - // Unhandled operand. Halt "fast" selection and bail. + if (!IdxN) // Unhandled operand. Halt "fast" selection and bail. return false; if (ElementSize != 1) { - IdxN = FastEmit_ri_(VT, ISD::MUL, IdxN, IdxNIsKill, ElementSize, VT); - if (IdxN == 0) - // Unhandled operand. Halt "fast" selection and bail. + IdxN = fastEmit_ri_(VT, ISD::MUL, IdxN, IdxNIsKill, ElementSize, VT); + if (!IdxN) // Unhandled operand. Halt "fast" selection and bail. return false; IdxNIsKill = true; } - N = FastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill); - if (N == 0) - // Unhandled operand. Halt "fast" selection and bail. + N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill); + if (!N) // Unhandled operand. Halt "fast" selection and bail. return false; } } if (TotalOffs) { - N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); - if (N == 0) - // Unhandled operand. Halt "fast" selection and bail. + N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); + if (!N) // Unhandled operand. Halt "fast" selection and bail. return false; } // We successfully emitted code for the given LLVM Instruction. - UpdateValueMap(I, N); + updateValueMap(I, N); return true; } -/// \brief Add a stackmap or patchpoint intrinsic call's live variable operands -/// to a stackmap or patchpoint machine instruction. bool FastISel::addStackMapLiveVars(SmallVectorImpl<MachineOperand> &Ops, const CallInst *CI, unsigned StartIdx) { for (unsigned i = StartIdx, e = CI->getNumArgOperands(); i != e; ++i) { Value *Val = CI->getArgOperand(i); // Check for constants and encode them with a StackMaps::ConstantOp prefix. - if (auto *C = dyn_cast<ConstantInt>(Val)) { + if (const auto *C = dyn_cast<ConstantInt>(Val)) { Ops.push_back(MachineOperand::CreateImm(StackMaps::ConstantOp)); Ops.push_back(MachineOperand::CreateImm(C->getSExtValue())); } else if (isa<ConstantPointerNull>(Val)) { @@ -601,16 +589,15 @@ bool FastISel::addStackMapLiveVars(SmallVectorImpl<MachineOperand> &Ops, return false; } else { unsigned Reg = getRegForValue(Val); - if (Reg == 0) + if (!Reg) return false; Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/false)); } } - return true; } -bool FastISel::SelectStackmap(const CallInst *I) { +bool FastISel::selectStackmap(const CallInst *I) { // void @llvm.experimental.stackmap(i64 <id>, i32 <numShadowBytes>, // [live variables...]) assert(I->getCalledFunction()->getReturnType()->isVoidTy() && @@ -637,7 +624,7 @@ bool FastISel::SelectStackmap(const CallInst *I) { assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)) && "Expected a constant integer."); const auto *NumBytes = - cast<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)); + cast<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)); Ops.push_back(MachineOperand::CreateImm(NumBytes->getZExtValue())); // Push live variables for the stack map (skipping the first two arguments @@ -653,13 +640,13 @@ bool FastISel::SelectStackmap(const CallInst *I) { const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC); for (unsigned i = 0; ScratchRegs[i]; ++i) Ops.push_back(MachineOperand::CreateReg( - ScratchRegs[i], /*IsDef=*/true, /*IsImp=*/true, /*IsKill=*/false, - /*IsDead=*/false, /*IsUndef=*/false, /*IsEarlyClobber=*/true)); + ScratchRegs[i], /*IsDef=*/true, /*IsImp=*/true, /*IsKill=*/false, + /*IsDead=*/false, /*IsUndef=*/false, /*IsEarlyClobber=*/true)); // Issue CALLSEQ_START unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)) - .addImm(0); + .addImm(0); // Issue STACKMAP. MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, @@ -670,7 +657,8 @@ bool FastISel::SelectStackmap(const CallInst *I) { // Issue CALLSEQ_END unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp)) - .addImm(0).addImm(0); + .addImm(0) + .addImm(0); // Inform the Frame Information that we have a stackmap in this function. FuncInfo.MF->getFrameInfo()->setHasStackMap(); @@ -709,10 +697,10 @@ bool FastISel::lowerCallOperands(const CallInst *CI, unsigned ArgIdx, : CI->getType(); CLI.setCallee(CI->getCallingConv(), RetTy, Callee, std::move(Args), NumArgs); - return LowerCallTo(CLI); + return lowerCallTo(CLI); } -bool FastISel::SelectPatchpoint(const CallInst *I) { +bool FastISel::selectPatchpoint(const CallInst *I) { // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>, // i32 <numBytes>, // i8* <target>, @@ -728,7 +716,7 @@ bool FastISel::SelectPatchpoint(const CallInst *I) { assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NArgPos)) && "Expected a constant integer."); const auto *NumArgsVal = - cast<ConstantInt>(I->getOperand(PatchPointOpers::NArgPos)); + cast<ConstantInt>(I->getOperand(PatchPointOpers::NArgPos)); unsigned NumArgs = NumArgsVal->getZExtValue(); // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs> @@ -740,6 +728,7 @@ bool FastISel::SelectPatchpoint(const CallInst *I) { // For AnyRegCC the arguments are lowered later on manually. unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs; CallLoweringInfo CLI; + CLI.setIsPatchPoint(); if (!lowerCallOperands(I, NumMetaOpers, NumCallArgs, Callee, IsAnyRegCC, CLI)) return false; @@ -764,12 +753,12 @@ bool FastISel::SelectPatchpoint(const CallInst *I) { assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)) && "Expected a constant integer."); const auto *NumBytes = - cast<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)); + cast<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)); Ops.push_back(MachineOperand::CreateImm(NumBytes->getZExtValue())); // Assume that the callee is a constant address or null pointer. // FIXME: handle function symbols in the future. - unsigned CalleeAddr; + uint64_t CalleeAddr; if (const auto *C = dyn_cast<IntToPtrInst>(Callee)) CalleeAddr = cast<ConstantInt>(C->getOperand(0))->getZExtValue(); else if (const auto *C = dyn_cast<ConstantExpr>(Callee)) { @@ -818,8 +807,8 @@ bool FastISel::SelectPatchpoint(const CallInst *I) { const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC); for (unsigned i = 0; ScratchRegs[i]; ++i) Ops.push_back(MachineOperand::CreateReg( - ScratchRegs[i], /*IsDef=*/true, /*IsImp=*/true, /*IsKill=*/false, - /*IsDead=*/false, /*IsUndef=*/false, /*IsEarlyClobber=*/true)); + ScratchRegs[i], /*IsDef=*/true, /*IsImp=*/true, /*IsKill=*/false, + /*IsDead=*/false, /*IsUndef=*/false, /*IsEarlyClobber=*/true)); // Add implicit defs (return values). for (auto Reg : CLI.InRegs) @@ -842,7 +831,7 @@ bool FastISel::SelectPatchpoint(const CallInst *I) { FuncInfo.MF->getFrameInfo()->setHasPatchPoint(); if (CLI.NumResultRegs) - UpdateValueMap(I, CLI.ResultReg, CLI.NumResultRegs); + updateValueMap(I, CLI.ResultReg, CLI.NumResultRegs); return true; } @@ -861,7 +850,7 @@ static AttributeSet getReturnAttrs(FastISel::CallLoweringInfo &CLI) { Attrs); } -bool FastISel::LowerCallTo(const CallInst *CI, const char *SymName, +bool FastISel::lowerCallTo(const CallInst *CI, const char *SymName, unsigned NumArgs) { ImmutableCallSite CS(CI); @@ -889,10 +878,10 @@ bool FastISel::LowerCallTo(const CallInst *CI, const char *SymName, CallLoweringInfo CLI; CLI.setCallee(RetTy, FTy, SymName, std::move(Args), CS, NumArgs); - return LowerCallTo(CLI); + return lowerCallTo(CLI); } -bool FastISel::LowerCallTo(CallLoweringInfo &CLI) { +bool FastISel::lowerCallTo(CallLoweringInfo &CLI) { // Handle the incoming return values from the call. CLI.clearIns(); SmallVector<EVT, 4> RetTys; @@ -901,9 +890,8 @@ bool FastISel::LowerCallTo(CallLoweringInfo &CLI) { SmallVector<ISD::OutputArg, 4> Outs; GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, TLI); - bool CanLowerReturn = TLI.CanLowerReturn(CLI.CallConv, *FuncInfo.MF, - CLI.IsVarArg, Outs, - CLI.RetTy->getContext()); + bool CanLowerReturn = TLI.CanLowerReturn( + CLI.CallConv, *FuncInfo.MF, CLI.IsVarArg, Outs, CLI.RetTy->getContext()); // FIXME: sret demotion isn't supported yet - bail out. if (!CanLowerReturn) @@ -932,23 +920,23 @@ bool FastISel::LowerCallTo(CallLoweringInfo &CLI) { CLI.clearOuts(); for (auto &Arg : CLI.getArgs()) { Type *FinalType = Arg.Ty; - if (Arg.isByVal) + if (Arg.IsByVal) FinalType = cast<PointerType>(Arg.Ty)->getElementType(); bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters( - FinalType, CLI.CallConv, CLI.IsVarArg); + FinalType, CLI.CallConv, CLI.IsVarArg); ISD::ArgFlagsTy Flags; - if (Arg.isZExt) + if (Arg.IsZExt) Flags.setZExt(); - if (Arg.isSExt) + if (Arg.IsSExt) Flags.setSExt(); - if (Arg.isInReg) + if (Arg.IsInReg) Flags.setInReg(); - if (Arg.isSRet) + if (Arg.IsSRet) Flags.setSRet(); - if (Arg.isByVal) + if (Arg.IsByVal) Flags.setByVal(); - if (Arg.isInAlloca) { + if (Arg.IsInAlloca) { Flags.setInAlloca(); // Set the byval flag for CCAssignFn callbacks that don't know about // inalloca. This way we can know how many bytes we should've allocated @@ -957,7 +945,7 @@ bool FastISel::LowerCallTo(CallLoweringInfo &CLI) { // the various CC lowering callbacks. Flags.setByVal(); } - if (Arg.isByVal || Arg.isInAlloca) { + if (Arg.IsByVal || Arg.IsInAlloca) { PointerType *Ty = cast<PointerType>(Arg.Ty); Type *ElementTy = Ty->getElementType(); unsigned FrameSize = DL.getTypeAllocSize(ElementTy); @@ -969,7 +957,7 @@ bool FastISel::LowerCallTo(CallLoweringInfo &CLI) { Flags.setByValSize(FrameSize); Flags.setByValAlign(FrameAlign); } - if (Arg.isNest) + if (Arg.IsNest) Flags.setNest(); if (NeedsRegBlock) Flags.setInConsecutiveRegs(); @@ -980,7 +968,7 @@ bool FastISel::LowerCallTo(CallLoweringInfo &CLI) { CLI.OutFlags.push_back(Flags); } - if (!FastLowerCall(CLI)) + if (!fastLowerCall(CLI)) return false; // Set all unused physreg defs as dead. @@ -988,12 +976,12 @@ bool FastISel::LowerCallTo(CallLoweringInfo &CLI) { CLI.Call->setPhysRegsDeadExcept(CLI.InRegs, TRI); if (CLI.NumResultRegs && CLI.CS) - UpdateValueMap(CLI.CS->getInstruction(), CLI.ResultReg, CLI.NumResultRegs); + updateValueMap(CLI.CS->getInstruction(), CLI.ResultReg, CLI.NumResultRegs); return true; } -bool FastISel::LowerCall(const CallInst *CI) { +bool FastISel::lowerCall(const CallInst *CI) { ImmutableCallSite CS(CI); PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); @@ -1021,19 +1009,19 @@ bool FastISel::LowerCall(const CallInst *CI) { } // Check if target-independent constraints permit a tail call here. - // Target-dependent constraints are checked within FastLowerCall. + // Target-dependent constraints are checked within fastLowerCall. bool IsTailCall = CI->isTailCall(); if (IsTailCall && !isInTailCallPosition(CS, TM)) IsTailCall = false; CallLoweringInfo CLI; CLI.setCallee(RetTy, FuncTy, CI->getCalledValue(), std::move(Args), CS) - .setTailCall(IsTailCall); + .setTailCall(IsTailCall); - return LowerCallTo(CLI); + return lowerCallTo(CLI); } -bool FastISel::SelectCall(const User *I) { +bool FastISel::selectCall(const User *I) { const CallInst *Call = cast<CallInst>(I); // Handle simple inline asms. @@ -1055,8 +1043,8 @@ bool FastISel::SelectCall(const User *I) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::INLINEASM)) - .addExternalSymbol(IA->getAsmString().c_str()) - .addImm(ExtraInfo); + .addExternalSymbol(IA->getAsmString().c_str()) + .addImm(ExtraInfo); return true; } @@ -1065,7 +1053,7 @@ bool FastISel::SelectCall(const User *I) { // Handle intrinsic function calls. if (const auto *II = dyn_cast<IntrinsicInst>(Call)) - return SelectIntrinsicCall(II); + return selectIntrinsicCall(II); // Usually, it does not make sense to initialize a value, // make an unrelated function call and use the value, because @@ -1076,12 +1064,13 @@ bool FastISel::SelectCall(const User *I) { // since they tend to be inlined. flushLocalValueMap(); - return LowerCall(Call); + return lowerCall(Call); } -bool FastISel::SelectIntrinsicCall(const IntrinsicInst *II) { +bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { switch (II->getIntrinsicID()) { - default: break; + default: + break; // At -O0 we don't care about the lifetime intrinsics. case Intrinsic::lifetime_start: case Intrinsic::lifetime_end: @@ -1106,7 +1095,7 @@ bool FastISel::SelectIntrinsicCall(const IntrinsicInst *II) { unsigned Offset = 0; Optional<MachineOperand> Op; - if (const Argument *Arg = dyn_cast<Argument>(Address)) + if (const auto *Arg = dyn_cast<Argument>(Address)) // Some arguments' frame index is recorded during argument lowering. Offset = FuncInfo.getArgumentFrameIndex(Arg); if (Offset) @@ -1137,13 +1126,14 @@ bool FastISel::SelectIntrinsicCall(const IntrinsicInst *II) { Op->setIsDebug(true); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::DBG_VALUE), false, Op->getReg(), 0, - DI->getVariable()); + DI->getVariable(), DI->getExpression()); } else BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::DBG_VALUE)) - .addOperand(*Op) - .addImm(0) - .addMetadata(DI->getVariable()); + .addOperand(*Op) + .addImm(0) + .addMetadata(DI->getVariable()) + .addMetadata(DI->getExpression()); } else { // We can't yet handle anything else here because it would require // generating code, thus altering codegen because of debug info. @@ -1160,26 +1150,34 @@ bool FastISel::SelectIntrinsicCall(const IntrinsicInst *II) { // Currently the optimizer can produce this; insert an undef to // help debugging. Probably the optimizer should not do this. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addReg(0U).addImm(DI->getOffset()) - .addMetadata(DI->getVariable()); - } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + .addReg(0U) + .addImm(DI->getOffset()) + .addMetadata(DI->getVariable()) + .addMetadata(DI->getExpression()); + } else if (const auto *CI = dyn_cast<ConstantInt>(V)) { if (CI->getBitWidth() > 64) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addCImm(CI).addImm(DI->getOffset()) - .addMetadata(DI->getVariable()); + .addCImm(CI) + .addImm(DI->getOffset()) + .addMetadata(DI->getVariable()) + .addMetadata(DI->getExpression()); else BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addImm(CI->getZExtValue()).addImm(DI->getOffset()) - .addMetadata(DI->getVariable()); - } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { + .addImm(CI->getZExtValue()) + .addImm(DI->getOffset()) + .addMetadata(DI->getVariable()) + .addMetadata(DI->getExpression()); + } else if (const auto *CF = dyn_cast<ConstantFP>(V)) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addFPImm(CF).addImm(DI->getOffset()) - .addMetadata(DI->getVariable()); + .addFPImm(CF) + .addImm(DI->getOffset()) + .addMetadata(DI->getVariable()) + .addMetadata(DI->getExpression()); } else if (unsigned Reg = lookUpRegForValue(V)) { // FIXME: This does not handle register-indirect values at offset 0. bool IsIndirect = DI->getOffset() != 0; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, IsIndirect, - Reg, DI->getOffset(), DI->getVariable()); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, IsIndirect, Reg, + DI->getOffset(), DI->getVariable(), DI->getExpression()); } else { // We can't yet handle anything else here because it would require // generating code, thus altering codegen because of debug info. @@ -1192,34 +1190,34 @@ bool FastISel::SelectIntrinsicCall(const IntrinsicInst *II) { unsigned long long Res = CI->isZero() ? -1ULL : 0; Constant *ResCI = ConstantInt::get(II->getType(), Res); unsigned ResultReg = getRegForValue(ResCI); - if (ResultReg == 0) + if (!ResultReg) return false; - UpdateValueMap(II, ResultReg); + updateValueMap(II, ResultReg); return true; } case Intrinsic::expect: { unsigned ResultReg = getRegForValue(II->getArgOperand(0)); - if (ResultReg == 0) + if (!ResultReg) return false; - UpdateValueMap(II, ResultReg); + updateValueMap(II, ResultReg); return true; } case Intrinsic::experimental_stackmap: - return SelectStackmap(II); + return selectStackmap(II); case Intrinsic::experimental_patchpoint_void: case Intrinsic::experimental_patchpoint_i64: - return SelectPatchpoint(II); + return selectPatchpoint(II); } - return FastLowerIntrinsicCall(II); + return fastLowerIntrinsicCall(II); } -bool FastISel::SelectCast(const User *I, unsigned Opcode) { +bool FastISel::selectCast(const User *I, unsigned Opcode) { EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); EVT DstVT = TLI.getValueType(I->getType()); - if (SrcVT == MVT::Other || !SrcVT.isSimple() || - DstVT == MVT::Other || !DstVT.isSimple()) + if (SrcVT == MVT::Other || !SrcVT.isSimple() || DstVT == MVT::Other || + !DstVT.isSimple()) // Unhandled type. Halt "fast" selection and bail. return false; @@ -1238,24 +1236,22 @@ bool FastISel::SelectCast(const User *I, unsigned Opcode) { bool InputRegIsKill = hasTrivialKill(I->getOperand(0)); - unsigned ResultReg = FastEmit_r(SrcVT.getSimpleVT(), - DstVT.getSimpleVT(), - Opcode, - InputReg, InputRegIsKill); + unsigned ResultReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), + Opcode, InputReg, InputRegIsKill); if (!ResultReg) return false; - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); return true; } -bool FastISel::SelectBitCast(const User *I) { +bool FastISel::selectBitCast(const User *I) { // If the bitcast doesn't change the type, just use the operand value. if (I->getType() == I->getOperand(0)->getType()) { unsigned Reg = getRegForValue(I->getOperand(0)); - if (Reg == 0) + if (!Reg) return false; - UpdateValueMap(I, Reg); + updateValueMap(I, Reg); return true; } @@ -1270,17 +1266,15 @@ bool FastISel::SelectBitCast(const User *I) { MVT SrcVT = SrcEVT.getSimpleVT(); MVT DstVT = DstEVT.getSimpleVT(); unsigned Op0 = getRegForValue(I->getOperand(0)); - if (Op0 == 0) - // Unhandled operand. Halt "fast" selection and bail. + if (!Op0) // Unhandled operand. Halt "fast" selection and bail. return false; - bool Op0IsKill = hasTrivialKill(I->getOperand(0)); // First, try to perform the bitcast by inserting a reg-reg copy. unsigned ResultReg = 0; if (SrcVT == DstVT) { - const TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT); - const TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT); + const TargetRegisterClass *SrcClass = TLI.getRegClassFor(SrcVT); + const TargetRegisterClass *DstClass = TLI.getRegClassFor(DstVT); // Don't attempt a cross-class copy. It will likely fail. if (SrcClass == DstClass) { ResultReg = createResultReg(DstClass); @@ -1291,28 +1285,27 @@ bool FastISel::SelectBitCast(const User *I) { // If the reg-reg copy failed, select a BITCAST opcode. if (!ResultReg) - ResultReg = FastEmit_r(SrcVT, DstVT, ISD::BITCAST, Op0, Op0IsKill); + ResultReg = fastEmit_r(SrcVT, DstVT, ISD::BITCAST, Op0, Op0IsKill); if (!ResultReg) return false; - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); return true; } -bool -FastISel::SelectInstruction(const Instruction *I) { +bool FastISel::selectInstruction(const Instruction *I) { // Just before the terminator instruction, insert instructions to // feed PHI nodes in successor blocks. if (isa<TerminatorInst>(I)) - if (!HandlePHINodesInSuccessorBlocks(I->getParent())) + if (!handlePHINodesInSuccessorBlocks(I->getParent())) return false; DbgLoc = I->getDebugLoc(); - MachineBasicBlock::iterator SavedInsertPt = FuncInfo.InsertPt; + SavedInsertPt = FuncInfo.InsertPt; - if (const CallInst *Call = dyn_cast<CallInst>(I)) { + if (const auto *Call = dyn_cast<CallInst>(I)) { const Function *F = Call->getCalledFunction(); LibFunc::Func Func; @@ -1330,40 +1323,39 @@ FastISel::SelectInstruction(const Instruction *I) { } // First, try doing target-independent selection. - if (SelectOperator(I, I->getOpcode())) { - ++NumFastIselSuccessIndependent; - DbgLoc = DebugLoc(); - return true; - } - // Remove dead code. However, ignore call instructions since we've flushed - // the local value map and recomputed the insert point. - if (!isa<CallInst>(I)) { + if (!SkipTargetIndependentISel) { + if (selectOperator(I, I->getOpcode())) { + ++NumFastIselSuccessIndependent; + DbgLoc = DebugLoc(); + return true; + } + // Remove dead code. recomputeInsertPt(); if (SavedInsertPt != FuncInfo.InsertPt) removeDeadCode(FuncInfo.InsertPt, SavedInsertPt); + SavedInsertPt = FuncInfo.InsertPt; } - // Next, try calling the target to attempt to handle the instruction. - SavedInsertPt = FuncInfo.InsertPt; - if (TargetSelectInstruction(I)) { + if (fastSelectInstruction(I)) { ++NumFastIselSuccessTarget; DbgLoc = DebugLoc(); return true; } - // Check for dead code and remove as necessary. + // Remove dead code. recomputeInsertPt(); if (SavedInsertPt != FuncInfo.InsertPt) removeDeadCode(FuncInfo.InsertPt, SavedInsertPt); DbgLoc = DebugLoc(); + // Undo phi node updates, because they will be added again by SelectionDAG. + if (isa<TerminatorInst>(I)) + FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate); return false; } -/// FastEmitBranch - Emit an unconditional branch to the given block, -/// unless it is the immediate (fall-through) successor, and update -/// the CFG. -void -FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DbgLoc) { +/// Emit an unconditional branch to the given block, unless it is the immediate +/// (fall-through) successor, and update the CFG. +void FastISel::fastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DbgLoc) { if (FuncInfo.MBB->getBasicBlock()->size() > 1 && FuncInfo.MBB->isLayoutSuccessor(MSucc)) { // For more accurate line information if this is the only instruction @@ -1381,54 +1373,51 @@ FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DbgLoc) { FuncInfo.MBB->addSuccessor(MSucc, BranchWeight); } -/// SelectFNeg - Emit an FNeg operation. -/// -bool -FastISel::SelectFNeg(const User *I) { +/// Emit an FNeg operation. +bool FastISel::selectFNeg(const User *I) { unsigned OpReg = getRegForValue(BinaryOperator::getFNegArgument(I)); - if (OpReg == 0) return false; - + if (!OpReg) + return false; bool OpRegIsKill = hasTrivialKill(I); // If the target has ISD::FNEG, use it. EVT VT = TLI.getValueType(I->getType()); - unsigned ResultReg = FastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(), - ISD::FNEG, OpReg, OpRegIsKill); - if (ResultReg != 0) { - UpdateValueMap(I, ResultReg); + unsigned ResultReg = fastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(), ISD::FNEG, + OpReg, OpRegIsKill); + if (ResultReg) { + updateValueMap(I, ResultReg); return true; } // Bitcast the value to integer, twiddle the sign bit with xor, // and then bitcast it back to floating-point. - if (VT.getSizeInBits() > 64) return false; + if (VT.getSizeInBits() > 64) + return false; EVT IntVT = EVT::getIntegerVT(I->getContext(), VT.getSizeInBits()); if (!TLI.isTypeLegal(IntVT)) return false; - unsigned IntReg = FastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(), + unsigned IntReg = fastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(), ISD::BITCAST, OpReg, OpRegIsKill); - if (IntReg == 0) + if (!IntReg) return false; - unsigned IntResultReg = FastEmit_ri_(IntVT.getSimpleVT(), ISD::XOR, - IntReg, /*Kill=*/true, - UINT64_C(1) << (VT.getSizeInBits()-1), - IntVT.getSimpleVT()); - if (IntResultReg == 0) + unsigned IntResultReg = fastEmit_ri_( + IntVT.getSimpleVT(), ISD::XOR, IntReg, /*IsKill=*/true, + UINT64_C(1) << (VT.getSizeInBits() - 1), IntVT.getSimpleVT()); + if (!IntResultReg) return false; - ResultReg = FastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(), - ISD::BITCAST, IntResultReg, /*Kill=*/true); - if (ResultReg == 0) + ResultReg = fastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(), ISD::BITCAST, + IntResultReg, /*IsKill=*/true); + if (!ResultReg) return false; - UpdateValueMap(I, ResultReg); + updateValueMap(I, ResultReg); return true; } -bool -FastISel::SelectExtractValue(const User *U) { +bool FastISel::selectExtractValue(const User *U) { const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(U); if (!EVI) return false; @@ -1464,55 +1453,54 @@ FastISel::SelectExtractValue(const User *U) { for (unsigned i = 0; i < VTIndex; i++) ResultReg += TLI.getNumRegisters(FuncInfo.Fn->getContext(), AggValueVTs[i]); - UpdateValueMap(EVI, ResultReg); + updateValueMap(EVI, ResultReg); return true; } -bool -FastISel::SelectOperator(const User *I, unsigned Opcode) { +bool FastISel::selectOperator(const User *I, unsigned Opcode) { switch (Opcode) { case Instruction::Add: - return SelectBinaryOp(I, ISD::ADD); + return selectBinaryOp(I, ISD::ADD); case Instruction::FAdd: - return SelectBinaryOp(I, ISD::FADD); + return selectBinaryOp(I, ISD::FADD); case Instruction::Sub: - return SelectBinaryOp(I, ISD::SUB); + return selectBinaryOp(I, ISD::SUB); case Instruction::FSub: // FNeg is currently represented in LLVM IR as a special case of FSub. if (BinaryOperator::isFNeg(I)) - return SelectFNeg(I); - return SelectBinaryOp(I, ISD::FSUB); + return selectFNeg(I); + return selectBinaryOp(I, ISD::FSUB); case Instruction::Mul: - return SelectBinaryOp(I, ISD::MUL); + return selectBinaryOp(I, ISD::MUL); case Instruction::FMul: - return SelectBinaryOp(I, ISD::FMUL); + return selectBinaryOp(I, ISD::FMUL); case Instruction::SDiv: - return SelectBinaryOp(I, ISD::SDIV); + return selectBinaryOp(I, ISD::SDIV); case Instruction::UDiv: - return SelectBinaryOp(I, ISD::UDIV); + return selectBinaryOp(I, ISD::UDIV); case Instruction::FDiv: - return SelectBinaryOp(I, ISD::FDIV); + return selectBinaryOp(I, ISD::FDIV); case Instruction::SRem: - return SelectBinaryOp(I, ISD::SREM); + return selectBinaryOp(I, ISD::SREM); case Instruction::URem: - return SelectBinaryOp(I, ISD::UREM); + return selectBinaryOp(I, ISD::UREM); case Instruction::FRem: - return SelectBinaryOp(I, ISD::FREM); + return selectBinaryOp(I, ISD::FREM); case Instruction::Shl: - return SelectBinaryOp(I, ISD::SHL); + return selectBinaryOp(I, ISD::SHL); case Instruction::LShr: - return SelectBinaryOp(I, ISD::SRL); + return selectBinaryOp(I, ISD::SRL); case Instruction::AShr: - return SelectBinaryOp(I, ISD::SRA); + return selectBinaryOp(I, ISD::SRA); case Instruction::And: - return SelectBinaryOp(I, ISD::AND); + return selectBinaryOp(I, ISD::AND); case Instruction::Or: - return SelectBinaryOp(I, ISD::OR); + return selectBinaryOp(I, ISD::OR); case Instruction::Xor: - return SelectBinaryOp(I, ISD::XOR); + return selectBinaryOp(I, ISD::XOR); case Instruction::GetElementPtr: - return SelectGetElementPtr(I); + return selectGetElementPtr(I); case Instruction::Br: { const BranchInst *BI = cast<BranchInst>(I); @@ -1520,7 +1508,7 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) { if (BI->isUnconditional()) { const BasicBlock *LLVMSucc = BI->getSuccessor(0); MachineBasicBlock *MSucc = FuncInfo.MBBMap[LLVMSucc]; - FastEmitBranch(MSucc, BI->getDebugLoc()); + fastEmitBranch(MSucc, BI->getDebugLoc()); return true; } @@ -1531,7 +1519,7 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) { case Instruction::Unreachable: if (TM.Options.TrapUnreachable) - return FastEmit_(MVT::Other, MVT::Other, ISD::TRAP) != 0; + return fastEmit_(MVT::Other, MVT::Other, ISD::TRAP) != 0; else return true; @@ -1544,38 +1532,39 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) { return false; case Instruction::Call: - return SelectCall(I); + return selectCall(I); case Instruction::BitCast: - return SelectBitCast(I); + return selectBitCast(I); case Instruction::FPToSI: - return SelectCast(I, ISD::FP_TO_SINT); + return selectCast(I, ISD::FP_TO_SINT); case Instruction::ZExt: - return SelectCast(I, ISD::ZERO_EXTEND); + return selectCast(I, ISD::ZERO_EXTEND); case Instruction::SExt: - return SelectCast(I, ISD::SIGN_EXTEND); + return selectCast(I, ISD::SIGN_EXTEND); case Instruction::Trunc: - return SelectCast(I, ISD::TRUNCATE); + return selectCast(I, ISD::TRUNCATE); case Instruction::SIToFP: - return SelectCast(I, ISD::SINT_TO_FP); + return selectCast(I, ISD::SINT_TO_FP); case Instruction::IntToPtr: // Deliberate fall-through. case Instruction::PtrToInt: { EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); EVT DstVT = TLI.getValueType(I->getType()); if (DstVT.bitsGT(SrcVT)) - return SelectCast(I, ISD::ZERO_EXTEND); + return selectCast(I, ISD::ZERO_EXTEND); if (DstVT.bitsLT(SrcVT)) - return SelectCast(I, ISD::TRUNCATE); + return selectCast(I, ISD::TRUNCATE); unsigned Reg = getRegForValue(I->getOperand(0)); - if (Reg == 0) return false; - UpdateValueMap(I, Reg); + if (!Reg) + return false; + updateValueMap(I, Reg); return true; } case Instruction::ExtractValue: - return SelectExtractValue(I); + return selectExtractValue(I); case Instruction::PHI: llvm_unreachable("FastISel shouldn't visit PHI nodes!"); @@ -1586,91 +1575,72 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) { } } -FastISel::FastISel(FunctionLoweringInfo &funcInfo, - const TargetLibraryInfo *libInfo) - : FuncInfo(funcInfo), - MF(funcInfo.MF), - MRI(FuncInfo.MF->getRegInfo()), - MFI(*FuncInfo.MF->getFrameInfo()), - MCP(*FuncInfo.MF->getConstantPool()), - TM(FuncInfo.MF->getTarget()), - DL(*TM.getDataLayout()), - TII(*TM.getInstrInfo()), - TLI(*TM.getTargetLowering()), - TRI(*TM.getRegisterInfo()), - LibInfo(libInfo) { -} +FastISel::FastISel(FunctionLoweringInfo &FuncInfo, + const TargetLibraryInfo *LibInfo, + bool SkipTargetIndependentISel) + : FuncInfo(FuncInfo), MF(FuncInfo.MF), MRI(FuncInfo.MF->getRegInfo()), + MFI(*FuncInfo.MF->getFrameInfo()), MCP(*FuncInfo.MF->getConstantPool()), + TM(FuncInfo.MF->getTarget()), DL(*MF->getSubtarget().getDataLayout()), + TII(*MF->getSubtarget().getInstrInfo()), + TLI(*MF->getSubtarget().getTargetLowering()), + TRI(*MF->getSubtarget().getRegisterInfo()), LibInfo(LibInfo), + SkipTargetIndependentISel(SkipTargetIndependentISel) {} FastISel::~FastISel() {} -bool FastISel::FastLowerArguments() { - return false; -} +bool FastISel::fastLowerArguments() { return false; } -bool FastISel::FastLowerCall(CallLoweringInfo &/*CLI*/) { - return false; -} +bool FastISel::fastLowerCall(CallLoweringInfo & /*CLI*/) { return false; } -bool FastISel::FastLowerIntrinsicCall(const IntrinsicInst * /*II*/) { +bool FastISel::fastLowerIntrinsicCall(const IntrinsicInst * /*II*/) { return false; } -unsigned FastISel::FastEmit_(MVT, MVT, - unsigned) { - return 0; -} +unsigned FastISel::fastEmit_(MVT, MVT, unsigned) { return 0; } -unsigned FastISel::FastEmit_r(MVT, MVT, - unsigned, - unsigned /*Op0*/, bool /*Op0IsKill*/) { +unsigned FastISel::fastEmit_r(MVT, MVT, unsigned, unsigned /*Op0*/, + bool /*Op0IsKill*/) { return 0; } -unsigned FastISel::FastEmit_rr(MVT, MVT, - unsigned, - unsigned /*Op0*/, bool /*Op0IsKill*/, - unsigned /*Op1*/, bool /*Op1IsKill*/) { +unsigned FastISel::fastEmit_rr(MVT, MVT, unsigned, unsigned /*Op0*/, + bool /*Op0IsKill*/, unsigned /*Op1*/, + bool /*Op1IsKill*/) { return 0; } -unsigned FastISel::FastEmit_i(MVT, MVT, unsigned, uint64_t /*Imm*/) { +unsigned FastISel::fastEmit_i(MVT, MVT, unsigned, uint64_t /*Imm*/) { return 0; } -unsigned FastISel::FastEmit_f(MVT, MVT, - unsigned, const ConstantFP * /*FPImm*/) { +unsigned FastISel::fastEmit_f(MVT, MVT, unsigned, + const ConstantFP * /*FPImm*/) { return 0; } -unsigned FastISel::FastEmit_ri(MVT, MVT, - unsigned, - unsigned /*Op0*/, bool /*Op0IsKill*/, - uint64_t /*Imm*/) { +unsigned FastISel::fastEmit_ri(MVT, MVT, unsigned, unsigned /*Op0*/, + bool /*Op0IsKill*/, uint64_t /*Imm*/) { return 0; } -unsigned FastISel::FastEmit_rf(MVT, MVT, - unsigned, - unsigned /*Op0*/, bool /*Op0IsKill*/, +unsigned FastISel::fastEmit_rf(MVT, MVT, unsigned, unsigned /*Op0*/, + bool /*Op0IsKill*/, const ConstantFP * /*FPImm*/) { return 0; } -unsigned FastISel::FastEmit_rri(MVT, MVT, - unsigned, - unsigned /*Op0*/, bool /*Op0IsKill*/, - unsigned /*Op1*/, bool /*Op1IsKill*/, - uint64_t /*Imm*/) { +unsigned FastISel::fastEmit_rri(MVT, MVT, unsigned, unsigned /*Op0*/, + bool /*Op0IsKill*/, unsigned /*Op1*/, + bool /*Op1IsKill*/, uint64_t /*Imm*/) { return 0; } -/// FastEmit_ri_ - This method is a wrapper of FastEmit_ri. It first tries -/// to emit an instruction with an immediate operand using FastEmit_ri. +/// This method is a wrapper of fastEmit_ri. It first tries to emit an +/// instruction with an immediate operand using fastEmit_ri. /// If that fails, it materializes the immediate into a register and try -/// FastEmit_rr instead. -unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode, - unsigned Op0, bool Op0IsKill, - uint64_t Imm, MVT ImmType) { +/// fastEmit_rr instead. +unsigned FastISel::fastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0, + bool Op0IsKill, uint64_t Imm, MVT ImmType) { // If this is a multiply by a power of two, emit this as a shift left. if (Opcode == ISD::MUL && isPowerOf2_64(Imm)) { Opcode = ISD::SHL; @@ -1688,30 +1658,29 @@ unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode, return 0; // First check if immediate type is legal. If not, we can't use the ri form. - unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm); - if (ResultReg != 0) + unsigned ResultReg = fastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm); + if (ResultReg) return ResultReg; - unsigned MaterialReg = FastEmit_i(ImmType, ImmType, ISD::Constant, Imm); - if (MaterialReg == 0) { + unsigned MaterialReg = fastEmit_i(ImmType, ImmType, ISD::Constant, Imm); + if (!MaterialReg) { // This is a bit ugly/slow, but failing here means falling out of // fast-isel, which would be very slow. - IntegerType *ITy = IntegerType::get(FuncInfo.Fn->getContext(), - VT.getSizeInBits()); + IntegerType *ITy = + IntegerType::get(FuncInfo.Fn->getContext(), VT.getSizeInBits()); MaterialReg = getRegForValue(ConstantInt::get(ITy, Imm)); - assert (MaterialReg != 0 && "Unable to materialize imm."); - if (MaterialReg == 0) return 0; + if (!MaterialReg) + return 0; } - return FastEmit_rr(VT, VT, Opcode, - Op0, Op0IsKill, - MaterialReg, /*Kill=*/true); + return fastEmit_rr(VT, VT, Opcode, Op0, Op0IsKill, MaterialReg, + /*IsKill=*/true); } -unsigned FastISel::createResultReg(const TargetRegisterClass* RC) { +unsigned FastISel::createResultReg(const TargetRegisterClass *RC) { return MRI.createVirtualRegister(RC); } -unsigned FastISel::constrainOperandRegClass(const MCInstrDesc &II, - unsigned Op, unsigned OpNum) { +unsigned FastISel::constrainOperandRegClass(const MCInstrDesc &II, unsigned Op, + unsigned OpNum) { if (TargetRegisterInfo::isVirtualRegister(Op)) { const TargetRegisterClass *RegClass = TII.getRegClass(II, OpNum, &TRI, *FuncInfo.MF); @@ -1727,8 +1696,8 @@ unsigned FastISel::constrainOperandRegClass(const MCInstrDesc &II, return Op; } -unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode, - const TargetRegisterClass* RC) { +unsigned FastISel::fastEmitInst_(unsigned MachineInstOpcode, + const TargetRegisterClass *RC) { unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); @@ -1736,9 +1705,9 @@ unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode, return ResultReg; } -unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill) { +unsigned FastISel::fastEmitInst_r(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, unsigned Op0, + bool Op0IsKill) { const MCInstrDesc &II = TII.get(MachineInstOpcode); unsigned ResultReg = createResultReg(RC); @@ -1746,10 +1715,10 @@ unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode, if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) - .addReg(Op0, Op0IsKill * RegState::Kill); + .addReg(Op0, getKillRegState(Op0IsKill)); else { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addReg(Op0, Op0IsKill * RegState::Kill); + .addReg(Op0, getKillRegState(Op0IsKill)); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } @@ -1757,10 +1726,10 @@ unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode, return ResultReg; } -unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - unsigned Op1, bool Op1IsKill) { +unsigned FastISel::fastEmitInst_rr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, unsigned Op0, + bool Op0IsKill, unsigned Op1, + bool Op1IsKill) { const MCInstrDesc &II = TII.get(MachineInstOpcode); unsigned ResultReg = createResultReg(RC); @@ -1769,23 +1738,23 @@ unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode, if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addReg(Op1, Op1IsKill * RegState::Kill); + .addReg(Op0, getKillRegState(Op0IsKill)) + .addReg(Op1, getKillRegState(Op1IsKill)); else { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addReg(Op1, Op1IsKill * RegState::Kill); + .addReg(Op0, getKillRegState(Op0IsKill)) + .addReg(Op1, getKillRegState(Op1IsKill)); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } -unsigned FastISel::FastEmitInst_rrr(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - unsigned Op1, bool Op1IsKill, - unsigned Op2, bool Op2IsKill) { +unsigned FastISel::fastEmitInst_rrr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, unsigned Op0, + bool Op0IsKill, unsigned Op1, + bool Op1IsKill, unsigned Op2, + bool Op2IsKill) { const MCInstrDesc &II = TII.get(MachineInstOpcode); unsigned ResultReg = createResultReg(RC); @@ -1795,48 +1764,46 @@ unsigned FastISel::FastEmitInst_rrr(unsigned MachineInstOpcode, if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addReg(Op1, Op1IsKill * RegState::Kill) - .addReg(Op2, Op2IsKill * RegState::Kill); + .addReg(Op0, getKillRegState(Op0IsKill)) + .addReg(Op1, getKillRegState(Op1IsKill)) + .addReg(Op2, getKillRegState(Op2IsKill)); else { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addReg(Op1, Op1IsKill * RegState::Kill) - .addReg(Op2, Op2IsKill * RegState::Kill); + .addReg(Op0, getKillRegState(Op0IsKill)) + .addReg(Op1, getKillRegState(Op1IsKill)) + .addReg(Op2, getKillRegState(Op2IsKill)); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } -unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - uint64_t Imm) { +unsigned FastISel::fastEmitInst_ri(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, unsigned Op0, + bool Op0IsKill, uint64_t Imm) { const MCInstrDesc &II = TII.get(MachineInstOpcode); unsigned ResultReg = createResultReg(RC); - RC = TII.getRegClass(II, II.getNumDefs(), &TRI, *FuncInfo.MF); - MRI.constrainRegClass(Op0, RC); + Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addImm(Imm); + .addReg(Op0, getKillRegState(Op0IsKill)) + .addImm(Imm); else { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addImm(Imm); + .addReg(Op0, getKillRegState(Op0IsKill)) + .addImm(Imm); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } -unsigned FastISel::FastEmitInst_rii(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - uint64_t Imm1, uint64_t Imm2) { +unsigned FastISel::fastEmitInst_rii(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, unsigned Op0, + bool Op0IsKill, uint64_t Imm1, + uint64_t Imm2) { const MCInstrDesc &II = TII.get(MachineInstOpcode); unsigned ResultReg = createResultReg(RC); @@ -1844,24 +1811,23 @@ unsigned FastISel::FastEmitInst_rii(unsigned MachineInstOpcode, if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addImm(Imm1) - .addImm(Imm2); + .addReg(Op0, getKillRegState(Op0IsKill)) + .addImm(Imm1) + .addImm(Imm2); else { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addImm(Imm1) - .addImm(Imm2); + .addReg(Op0, getKillRegState(Op0IsKill)) + .addImm(Imm1) + .addImm(Imm2); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } -unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - const ConstantFP *FPImm) { +unsigned FastISel::fastEmitInst_rf(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, unsigned Op0, + bool Op0IsKill, const ConstantFP *FPImm) { const MCInstrDesc &II = TII.get(MachineInstOpcode); unsigned ResultReg = createResultReg(RC); @@ -1869,23 +1835,22 @@ unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode, if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addFPImm(FPImm); + .addReg(Op0, getKillRegState(Op0IsKill)) + .addFPImm(FPImm); else { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addFPImm(FPImm); + .addReg(Op0, getKillRegState(Op0IsKill)) + .addFPImm(FPImm); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } -unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - unsigned Op1, bool Op1IsKill, - uint64_t Imm) { +unsigned FastISel::fastEmitInst_rri(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, unsigned Op0, + bool Op0IsKill, unsigned Op1, + bool Op1IsKill, uint64_t Imm) { const MCInstrDesc &II = TII.get(MachineInstOpcode); unsigned ResultReg = createResultReg(RC); @@ -1894,25 +1859,25 @@ unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode, if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addReg(Op1, Op1IsKill * RegState::Kill) - .addImm(Imm); + .addReg(Op0, getKillRegState(Op0IsKill)) + .addReg(Op1, getKillRegState(Op1IsKill)) + .addImm(Imm); else { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addReg(Op1, Op1IsKill * RegState::Kill) - .addImm(Imm); + .addReg(Op0, getKillRegState(Op0IsKill)) + .addReg(Op1, getKillRegState(Op1IsKill)) + .addImm(Imm); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } -unsigned FastISel::FastEmitInst_rrii(unsigned MachineInstOpcode, +unsigned FastISel::fastEmitInst_rrii(unsigned MachineInstOpcode, const TargetRegisterClass *RC, - unsigned Op0, bool Op0IsKill, - unsigned Op1, bool Op1IsKill, - uint64_t Imm1, uint64_t Imm2) { + unsigned Op0, bool Op0IsKill, unsigned Op1, + bool Op1IsKill, uint64_t Imm1, + uint64_t Imm2) { const MCInstrDesc &II = TII.get(MachineInstOpcode); unsigned ResultReg = createResultReg(RC); @@ -1921,28 +1886,30 @@ unsigned FastISel::FastEmitInst_rrii(unsigned MachineInstOpcode, if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addReg(Op1, Op1IsKill * RegState::Kill) - .addImm(Imm1).addImm(Imm2); + .addReg(Op0, getKillRegState(Op0IsKill)) + .addReg(Op1, getKillRegState(Op1IsKill)) + .addImm(Imm1) + .addImm(Imm2); else { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addReg(Op1, Op1IsKill * RegState::Kill) - .addImm(Imm1).addImm(Imm2); + .addReg(Op0, getKillRegState(Op0IsKill)) + .addReg(Op1, getKillRegState(Op1IsKill)) + .addImm(Imm1) + .addImm(Imm2); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } -unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - uint64_t Imm) { +unsigned FastISel::fastEmitInst_i(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, uint64_t Imm) { unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg).addImm(Imm); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) + .addImm(Imm); else { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addImm(Imm); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, @@ -1951,41 +1918,41 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode, return ResultReg; } -unsigned FastISel::FastEmitInst_ii(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - uint64_t Imm1, uint64_t Imm2) { +unsigned FastISel::fastEmitInst_ii(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, uint64_t Imm1, + uint64_t Imm2) { unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) - .addImm(Imm1).addImm(Imm2); + .addImm(Imm1) + .addImm(Imm2); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addImm(Imm1).addImm(Imm2); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addImm(Imm1) + .addImm(Imm2); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } -unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT, - unsigned Op0, bool Op0IsKill, - uint32_t Idx) { +unsigned FastISel::fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, + bool Op0IsKill, uint32_t Idx) { unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); assert(TargetRegisterInfo::isVirtualRegister(Op0) && "Cannot yet extract from physregs"); const TargetRegisterClass *RC = MRI.getRegClass(Op0); MRI.constrainRegClass(Op0, TRI.getSubClassWithSubReg(RC, Idx)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, - DbgLoc, TII.get(TargetOpcode::COPY), ResultReg) - .addReg(Op0, getKillRegState(Op0IsKill), Idx); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), + ResultReg).addReg(Op0, getKillRegState(Op0IsKill), Idx); return ResultReg; } -/// FastEmitZExtFromI1 - Emit MachineInstrs to compute the value of Op -/// with all but the least significant bit set to zero. -unsigned FastISel::FastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) { - return FastEmit_ri(VT, VT, ISD::AND, Op0, Op0IsKill, 1); +/// Emit MachineInstrs to compute the value of Op with all but the least +/// significant bit set to zero. +unsigned FastISel::fastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) { + return fastEmit_ri(VT, VT, ISD::AND, Op0, Op0IsKill, 1); } /// HandlePHINodesInSuccessorBlocks - Handle PHI nodes in successor blocks. @@ -1994,22 +1961,24 @@ unsigned FastISel::FastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) { /// nodes as input. We cannot just directly add them, because expansion /// might result in multiple MBB's for one BB. As such, the start of the /// BB might correspond to a different MBB than the end. -bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { +bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { const TerminatorInst *TI = LLVMBB->getTerminator(); SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled; - unsigned OrigNumPHINodesToUpdate = FuncInfo.PHINodesToUpdate.size(); + FuncInfo.OrigNumPHINodesToUpdate = FuncInfo.PHINodesToUpdate.size(); // Check successor nodes' PHI nodes that expect a constant to be available // from this block. for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { const BasicBlock *SuccBB = TI->getSuccessor(succ); - if (!isa<PHINode>(SuccBB->begin())) continue; + if (!isa<PHINode>(SuccBB->begin())) + continue; MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB]; // If this terminator has multiple identical successors (common for // switches), only handle each succ once. - if (!SuccsHandled.insert(SuccMBB)) continue; + if (!SuccsHandled.insert(SuccMBB).second) + continue; MachineBasicBlock::iterator MBBI = SuccMBB->begin(); @@ -2017,10 +1986,11 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // nodes and Machine PHI nodes, but the incoming operands have not been // emitted yet. for (BasicBlock::const_iterator I = SuccBB->begin(); - const PHINode *PN = dyn_cast<PHINode>(I); ++I) { + const auto *PN = dyn_cast<PHINode>(I); ++I) { // Ignore dead phi's. - if (PN->use_empty()) continue; + if (PN->use_empty()) + continue; // Only handle legal types. Two interesting things to note here. First, // by bailing out early, we may leave behind some dead instructions, @@ -2031,10 +2001,8 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true); if (VT == MVT::Other || !TLI.isTypeLegal(VT)) { // Handle integer promotions, though, because they're common and easy. - if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) - VT = TLI.getTypeToTransformTo(LLVMBB->getContext(), VT); - else { - FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); + if (!(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) { + FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate); return false; } } @@ -2044,12 +2012,12 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // Set the DebugLoc for the copy. Prefer the location of the operand // if there is one; use the location of the PHI otherwise. DbgLoc = PN->getDebugLoc(); - if (const Instruction *Inst = dyn_cast<Instruction>(PHIOp)) + if (const auto *Inst = dyn_cast<Instruction>(PHIOp)) DbgLoc = Inst->getDebugLoc(); unsigned Reg = getRegForValue(PHIOp); - if (Reg == 0) { - FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); + if (!Reg) { + FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate); return false; } FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg)); @@ -2062,17 +2030,17 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) { assert(LI->hasOneUse() && - "tryToFoldLoad expected a LoadInst with a single use"); + "tryToFoldLoad expected a LoadInst with a single use"); // We know that the load has a single use, but don't know what it is. If it // isn't one of the folded instructions, then we can't succeed here. Handle // this by scanning the single-use users of the load until we get to FoldInst. - unsigned MaxUsers = 6; // Don't scan down huge single-use chains of instrs. + unsigned MaxUsers = 6; // Don't scan down huge single-use chains of instrs. const Instruction *TheUser = LI->user_back(); - while (TheUser != FoldInst && // Scan up until we find FoldInst. + while (TheUser != FoldInst && // Scan up until we find FoldInst. // Stay in the right block. TheUser->getParent() == FoldInst->getParent() && - --MaxUsers) { // Don't scan too far. + --MaxUsers) { // Don't scan too far. // If there are multiple or no uses of this instruction, then bail out. if (!TheUser->hasOneUse()) return false; @@ -2094,7 +2062,7 @@ bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) { // then there actually was no reference to it. Perhaps the load is referenced // by a dead instruction. unsigned LoadReg = getRegForValue(LI); - if (LoadReg == 0) + if (!LoadReg) return false; // We can't fold if this vreg has no uses or more than one use. Multiple uses @@ -2152,19 +2120,20 @@ FastISel::createMachineMemOperandFor(const Instruction *I) const { Flags = MachineMemOperand::MOStore; Ptr = SI->getPointerOperand(); ValTy = SI->getValueOperand()->getType(); - } else { + } else return nullptr; - } - bool IsNonTemporal = I->getMetadata("nontemporal") != nullptr; - bool IsInvariant = I->getMetadata("invariant.load") != nullptr; - const MDNode *TBAAInfo = I->getMetadata(LLVMContext::MD_tbaa); + bool IsNonTemporal = I->getMetadata(LLVMContext::MD_nontemporal) != nullptr; + bool IsInvariant = I->getMetadata(LLVMContext::MD_invariant_load) != nullptr; const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range); - if (Alignment == 0) // Ensure that codegen never sees alignment 0. + AAMDNodes AAInfo; + I->getAAMetadata(AAInfo); + + if (Alignment == 0) // Ensure that codegen never sees alignment 0. Alignment = DL.getABITypeAlignment(ValTy); - unsigned Size = TM.getDataLayout()->getTypeStoreSize(ValTy); + unsigned Size = DL.getTypeStoreSize(ValTy); if (IsVolatile) Flags |= MachineMemOperand::MOVolatile; @@ -2174,5 +2143,45 @@ FastISel::createMachineMemOperandFor(const Instruction *I) const { Flags |= MachineMemOperand::MOInvariant; return FuncInfo.MF->getMachineMemOperand(MachinePointerInfo(Ptr), Flags, Size, - Alignment, TBAAInfo, Ranges); + Alignment, AAInfo, Ranges); +} + +CmpInst::Predicate FastISel::optimizeCmpPredicate(const CmpInst *CI) const { + // If both operands are the same, then try to optimize or fold the cmp. + CmpInst::Predicate Predicate = CI->getPredicate(); + if (CI->getOperand(0) != CI->getOperand(1)) + return Predicate; + + switch (Predicate) { + default: llvm_unreachable("Invalid predicate!"); + case CmpInst::FCMP_FALSE: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::FCMP_OEQ: Predicate = CmpInst::FCMP_ORD; break; + case CmpInst::FCMP_OGT: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::FCMP_OGE: Predicate = CmpInst::FCMP_ORD; break; + case CmpInst::FCMP_OLT: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::FCMP_OLE: Predicate = CmpInst::FCMP_ORD; break; + case CmpInst::FCMP_ONE: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::FCMP_ORD: Predicate = CmpInst::FCMP_ORD; break; + case CmpInst::FCMP_UNO: Predicate = CmpInst::FCMP_UNO; break; + case CmpInst::FCMP_UEQ: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::FCMP_UGT: Predicate = CmpInst::FCMP_UNO; break; + case CmpInst::FCMP_UGE: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::FCMP_ULT: Predicate = CmpInst::FCMP_UNO; break; + case CmpInst::FCMP_ULE: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::FCMP_UNE: Predicate = CmpInst::FCMP_UNO; break; + case CmpInst::FCMP_TRUE: Predicate = CmpInst::FCMP_TRUE; break; + + case CmpInst::ICMP_EQ: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::ICMP_NE: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::ICMP_UGT: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::ICMP_UGE: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::ICMP_ULT: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::ICMP_ULE: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::ICMP_SGT: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::ICMP_SGE: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::ICMP_SLT: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::ICMP_SLE: Predicate = CmpInst::FCMP_TRUE; break; + } + + return Predicate; } diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index ae124e852a91..19aca6edd451 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -36,6 +36,7 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> using namespace llvm; @@ -55,58 +56,71 @@ static bool isUsedOutsideOfDefiningBlock(const Instruction *I) { return false; } +static ISD::NodeType getPreferredExtendForValue(const Value *V) { + // For the users of the source value being used for compare instruction, if + // the number of signed predicate is greater than unsigned predicate, we + // prefer to use SIGN_EXTEND. + // + // With this optimization, we would be able to reduce some redundant sign or + // zero extension instruction, and eventually more machine CSE opportunities + // can be exposed. + ISD::NodeType ExtendKind = ISD::ANY_EXTEND; + unsigned NumOfSigned = 0, NumOfUnsigned = 0; + for (const User *U : V->users()) { + if (const auto *CI = dyn_cast<CmpInst>(U)) { + NumOfSigned += CI->isSigned(); + NumOfUnsigned += CI->isUnsigned(); + } + } + if (NumOfSigned > NumOfUnsigned) + ExtendKind = ISD::SIGN_EXTEND; + + return ExtendKind; +} + void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, SelectionDAG *DAG) { - const TargetLowering *TLI = TM.getTargetLowering(); - Fn = &fn; MF = &mf; + TLI = MF->getSubtarget().getTargetLowering(); RegInfo = &MF->getRegInfo(); // Check whether the function can return without sret-demotion. SmallVector<ISD::OutputArg, 4> Outs; GetReturnInfo(Fn->getReturnType(), Fn->getAttributes(), Outs, *TLI); CanLowerReturn = TLI->CanLowerReturn(Fn->getCallingConv(), *MF, - Fn->isVarArg(), - Outs, Fn->getContext()); + Fn->isVarArg(), Outs, Fn->getContext()); // Initialize the mapping of values to registers. This is only set up for // instruction values that are used outside of the block that defines // them. Function::const_iterator BB = Fn->begin(), EB = Fn->end(); - for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) - if (const AllocaInst *AI = dyn_cast<AllocaInst>(I)) { - // Don't fold inalloca allocas or other dynamic allocas into the initial - // stack frame allocation, even if they are in the entry block. - if (!AI->isStaticAlloca()) - continue; - - if (const ConstantInt *CUI = dyn_cast<ConstantInt>(AI->getArraySize())) { - Type *Ty = AI->getAllocatedType(); - uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty); - unsigned Align = - std::max((unsigned)TLI->getDataLayout()->getPrefTypeAlignment(Ty), - AI->getAlignment()); - - TySize *= CUI->getZExtValue(); // Get total allocated size. - if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects. - - StaticAllocaMap[AI] = - MF->getFrameInfo()->CreateStackObject(TySize, Align, false, AI); - } - } - for (; BB != EB; ++BB) for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) { - // Look for dynamic allocas. if (const AllocaInst *AI = dyn_cast<AllocaInst>(I)) { - if (!AI->isStaticAlloca()) { + // Static allocas can be folded into the initial stack frame adjustment. + if (AI->isStaticAlloca()) { + const ConstantInt *CUI = cast<ConstantInt>(AI->getArraySize()); + Type *Ty = AI->getAllocatedType(); + uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty); + unsigned Align = + std::max((unsigned)TLI->getDataLayout()->getPrefTypeAlignment(Ty), + AI->getAlignment()); + + TySize *= CUI->getZExtValue(); // Get total allocated size. + if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects. + + StaticAllocaMap[AI] = + MF->getFrameInfo()->CreateStackObject(TySize, Align, false, AI); + + } else { unsigned Align = std::max( (unsigned)TLI->getDataLayout()->getPrefTypeAlignment( AI->getAllocatedType()), AI->getAlignment()); - unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); + unsigned StackAlign = + MF->getSubtarget().getFrameLowering()->getStackAlignment(); if (Align <= StackAlign) Align = 0; // Inform the Frame Information that we have variable-sized objects. @@ -126,9 +140,9 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, if (Op.Type == InlineAsm::isClobber) { // Clobbers don't have SDValue operands, hence SDValue(). TLI->ComputeConstraintToUse(Op, SDValue(), DAG); - std::pair<unsigned, const TargetRegisterClass*> PhysReg = - TLI->getRegForInlineAsmConstraint(Op.ConstraintCode, - Op.ConstraintVT); + std::pair<unsigned, const TargetRegisterClass *> PhysReg = + TLI->getRegForInlineAsmConstraint(Op.ConstraintCode, + Op.ConstraintVT); if (PhysReg.first == SP) MF->getFrameInfo()->setHasInlineAsmWithSPAdjust(true); } @@ -136,6 +150,21 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, } } + // Look for calls to the @llvm.va_start intrinsic. We can omit some + // prologue boilerplate for variadic functions that don't examine their + // arguments. + if (const auto *II = dyn_cast<IntrinsicInst>(I)) { + if (II->getIntrinsicID() == Intrinsic::vastart) + MF->getFrameInfo()->setHasVAStart(true); + } + + // If we have a musttail call in a variadic funciton, we need to ensure we + // forward implicit register parameters. + if (const auto *CI = dyn_cast<CallInst>(I)) { + if (CI->isMustTailCall() && Fn->isVarArg()) + MF->getFrameInfo()->setHasMustTailInVarArgFunc(true); + } + // Mark values used outside their block as exported, by allocating // a virtual register for them. if (isUsedOutsideOfDefiningBlock(I)) @@ -166,13 +195,16 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, StaticAllocaMap.find(AI); if (SI != StaticAllocaMap.end()) { // Check for VLAs. int FI = SI->second; - MMI.setVariableDbgInfo(DI->getVariable(), + MMI.setVariableDbgInfo(DI->getVariable(), DI->getExpression(), FI, DI->getDebugLoc()); } } } } } + + // Decide the preferred extend type for a value. + PreferredExtendType[I] = getPreferredExtendForValue(I); } // Create an initial MachineBasicBlock for each LLVM BasicBlock in F. This @@ -208,7 +240,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { EVT VT = ValueVTs[vti]; unsigned NumRegisters = TLI->getNumRegisters(Fn->getContext(), VT); - const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); for (unsigned i = 0; i != NumRegisters; ++i) BuildMI(MBB, DL, TII->get(TargetOpcode::PHI), PHIReg + i); PHIReg += NumRegisters; @@ -241,12 +273,14 @@ void FunctionLoweringInfo::clear() { ArgDbgValues.clear(); ByValArgFrameIndexMap.clear(); RegFixups.clear(); + StatepointStackSlots.clear(); + PreferredExtendType.clear(); } /// CreateReg - Allocate a single virtual register for the given type. unsigned FunctionLoweringInfo::CreateReg(MVT VT) { - return RegInfo-> - createVirtualRegister(TM.getTargetLowering()->getRegClassFor(VT)); + return RegInfo->createVirtualRegister( + MF->getSubtarget().getTargetLowering()->getRegClassFor(VT)); } /// CreateRegs - Allocate the appropriate number of virtual registers of @@ -257,7 +291,7 @@ unsigned FunctionLoweringInfo::CreateReg(MVT VT) { /// will assign registers for each member or element. /// unsigned FunctionLoweringInfo::CreateRegs(Type *Ty) { - const TargetLowering *TLI = TM.getTargetLowering(); + const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(*TLI, Ty, ValueVTs); @@ -306,8 +340,6 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { if (!Ty->isIntegerTy() || Ty->isVectorTy()) return; - const TargetLowering *TLI = TM.getTargetLowering(); - SmallVector<EVT, 1> ValueVTs; ComputeValueVTs(*TLI, Ty, ValueVTs); assert(ValueVTs.size() == 1 && @@ -452,7 +484,7 @@ void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI, // Gather all the type infos for this landing pad and pass them along to // MachineModuleInfo. - std::vector<const GlobalVariable *> TyInfo; + std::vector<const GlobalValue *> TyInfo; unsigned N = I.getNumArgOperands(); for (unsigned i = N - 1; i > 1; --i) { @@ -510,14 +542,14 @@ void llvm::AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI, Value *Val = I.getClause(i - 1); if (I.isCatch(i - 1)) { MMI.addCatchTypeInfo(MBB, - dyn_cast<GlobalVariable>(Val->stripPointerCasts())); + dyn_cast<GlobalValue>(Val->stripPointerCasts())); } else { // Add filters in a list. Constant *CVal = cast<Constant>(Val); - SmallVector<const GlobalVariable*, 4> FilterList; + SmallVector<const GlobalValue*, 4> FilterList; for (User::op_iterator II = CVal->op_begin(), IE = CVal->op_end(); II != IE; ++II) - FilterList.push_back(cast<GlobalVariable>((*II)->stripPointerCasts())); + FilterList.push_back(cast<GlobalValue>((*II)->stripPointerCasts())); MMI.addFilterTypeInfo(MBB, FilterList); } diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 7c124b8caa91..a65f33e17774 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -27,7 +27,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; #define DEBUG_TYPE "instr-emitter" @@ -265,12 +265,16 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MIB.addReg(VRBase, RegState::Define); } - SDValue Op(Node, i); - if (IsClone) - VRBaseMap.erase(Op); - bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; - (void)isNew; // Silence compiler warning. - assert(isNew && "Node emitted out of order - early"); + // If this def corresponds to a result of the SDNode insert the VRBase into + // the lookup map. + if (i < NumResults) { + SDValue Op(Node, i); + if (IsClone) + VRBaseMap.erase(Op); + bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; + (void)isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); + } } } @@ -402,10 +406,10 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, Type *Type = CP->getType(); // MachineConstantPool wants an explicit alignment. if (Align == 0) { - Align = TM->getDataLayout()->getPrefTypeAlignment(Type); + Align = MF->getSubtarget().getDataLayout()->getPrefTypeAlignment(Type); if (Align == 0) { // Alignment of vector types. FIXME! - Align = TM->getDataLayout()->getTypeAllocSize(Type); + Align = MF->getSubtarget().getDataLayout()->getTypeAllocSize(Type); } } @@ -643,14 +647,18 @@ MachineInstr * InstrEmitter::EmitDbgValue(SDDbgValue *SD, DenseMap<SDValue, unsigned> &VRBaseMap) { uint64_t Offset = SD->getOffset(); - MDNode* MDPtr = SD->getMDPtr(); + MDNode *Var = SD->getVariable(); + MDNode *Expr = SD->getExpression(); DebugLoc DL = SD->getDebugLoc(); if (SD->getKind() == SDDbgValue::FRAMEIX) { // Stack address; this needs to be lowered in target-dependent fashion. // EmitTargetCodeForFrameDebugValue is responsible for allocation. return BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE)) - .addFrameIndex(SD->getFrameIx()).addImm(Offset).addMetadata(MDPtr); + .addFrameIndex(SD->getFrameIx()) + .addImm(Offset) + .addMetadata(Var) + .addMetadata(Expr); } // Otherwise, we're going to create an instruction here. const MCInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE); @@ -696,7 +704,8 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, MIB.addReg(0U, RegState::Debug); } - MIB.addMetadata(MDPtr); + MIB.addMetadata(Var); + MIB.addMetadata(Expr); return &*MIB; } @@ -859,9 +868,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, MIB->setPhysRegsDeadExcept(UsedRegs, *TRI); // Run post-isel target hook to adjust this instruction if needed. -#ifdef NDEBUG if (II.hasPostISelHook()) -#endif TLI->AdjustInstrPostInstrSelection(MIB, Node); } @@ -1013,11 +1020,8 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, /// at the given position in the given block. InstrEmitter::InstrEmitter(MachineBasicBlock *mbb, MachineBasicBlock::iterator insertpos) - : MF(mbb->getParent()), - MRI(&MF->getRegInfo()), - TM(&MF->getTarget()), - TII(TM->getInstrInfo()), - TRI(TM->getRegisterInfo()), - TLI(TM->getTargetLowering()), - MBB(mbb), InsertPos(insertpos) { -} + : MF(mbb->getParent()), MRI(&MF->getRegInfo()), + TII(MF->getSubtarget().getInstrInfo()), + TRI(MF->getSubtarget().getRegisterInfo()), + TLI(MF->getSubtarget().getTargetLowering()), MBB(mbb), + InsertPos(insertpos) {} diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h index 920dda8820f6..7b86f7dd8de0 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -13,8 +13,8 @@ // //===----------------------------------------------------------------------===// -#ifndef INSTREMITTER_H -#define INSTREMITTER_H +#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_INSTREMITTER_H +#define LLVM_LIB_CODEGEN_SELECTIONDAG_INSTREMITTER_H #include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -29,7 +29,6 @@ class SDDbgValue; class InstrEmitter { MachineFunction *MF; MachineRegisterInfo *MRI; - const TargetMachine *TM; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; const TargetLowering *TLI; diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 16c5b4ba7768..e5473e35caed 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" @@ -33,10 +34,13 @@ #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; +#define DEBUG_TYPE "legalizedag" + //===----------------------------------------------------------------------===// -/// SelectionDAGLegalize - This takes an arbitrary SelectionDAG as input and +/// This takes an arbitrary SelectionDAG as input and /// hacks on it until the target machine can handle it. This involves /// eliminating value sizes the machine cannot handle (promoting small sizes to /// large sizes or splitting up large values into small values) as well as @@ -48,16 +52,17 @@ using namespace llvm; /// will attempt merge setcc and brc instructions into brcc's. /// namespace { -class SelectionDAGLegalize : public SelectionDAG::DAGUpdateListener { +class SelectionDAGLegalize { const TargetMachine &TM; const TargetLowering &TLI; SelectionDAG &DAG; - /// LegalizePosition - The iterator for walking through the node list. - SelectionDAG::allnodes_iterator LegalizePosition; + /// \brief The set of nodes which have already been legalized. We hold a + /// reference to it in order to update as necessary on node deletion. + SmallPtrSetImpl<SDNode *> &LegalizedNodes; - /// LegalizedNodes - The set of nodes which have already been legalized. - SmallPtrSet<SDNode *, 16> LegalizedNodes; + /// \brief A set of all the nodes updated during legalization. + SmallSetVector<SDNode *, 16> *UpdatedNodes; EVT getSetCCResultType(EVT VT) const { return TLI.getSetCCResultType(*DAG.getContext(), VT); @@ -66,20 +71,22 @@ class SelectionDAGLegalize : public SelectionDAG::DAGUpdateListener { // Libcall insertion helpers. public: - explicit SelectionDAGLegalize(SelectionDAG &DAG); - - void LegalizeDAG(); + SelectionDAGLegalize(SelectionDAG &DAG, + SmallPtrSetImpl<SDNode *> &LegalizedNodes, + SmallSetVector<SDNode *, 16> *UpdatedNodes = nullptr) + : TM(DAG.getTarget()), TLI(DAG.getTargetLoweringInfo()), DAG(DAG), + LegalizedNodes(LegalizedNodes), UpdatedNodes(UpdatedNodes) {} -private: - /// LegalizeOp - Legalizes the given operation. + /// \brief Legalizes the given operation. void LegalizeOp(SDNode *Node); +private: SDValue OptimizeFloatStore(StoreSDNode *ST); void LegalizeLoadOps(SDNode *Node); void LegalizeStoreOps(SDNode *Node); - /// PerformInsertVectorEltInMemory - Some target cannot handle a variable + /// Some targets cannot handle a variable /// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it /// is necessary to spill the vector being inserted into to memory, perform /// the insert there, and then read the result back. @@ -88,7 +95,7 @@ private: SDValue ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx, SDLoc dl); - /// ShuffleWithNarrowerEltType - Return a vector shuffle operation which + /// Return a vector shuffle operation which /// performs the same shuffe in terms of order or result bytes, but on a type /// whose vector element type is narrower than the original shuffle type. /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> @@ -145,43 +152,55 @@ private: void ExpandNode(SDNode *Node); void PromoteNode(SDNode *Node); - void ForgetNode(SDNode *N) { - LegalizedNodes.erase(N); - if (LegalizePosition == SelectionDAG::allnodes_iterator(N)) - ++LegalizePosition; - } - public: - // DAGUpdateListener implementation. - void NodeDeleted(SDNode *N, SDNode *E) override { - ForgetNode(N); - } - void NodeUpdated(SDNode *N) override {} - // Node replacement helpers void ReplacedNode(SDNode *N) { - if (N->use_empty()) { - DAG.RemoveDeadNode(N); - } else { - ForgetNode(N); - } + LegalizedNodes.erase(N); + if (UpdatedNodes) + UpdatedNodes->insert(N); } void ReplaceNode(SDNode *Old, SDNode *New) { + DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG); + dbgs() << " with: "; New->dump(&DAG)); + + assert(Old->getNumValues() == New->getNumValues() && + "Replacing one node with another that produces a different number " + "of values!"); DAG.ReplaceAllUsesWith(Old, New); + for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) + DAG.TransferDbgValues(SDValue(Old, i), SDValue(New, i)); + if (UpdatedNodes) + UpdatedNodes->insert(New); ReplacedNode(Old); } void ReplaceNode(SDValue Old, SDValue New) { + DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG); + dbgs() << " with: "; New->dump(&DAG)); + DAG.ReplaceAllUsesWith(Old, New); + DAG.TransferDbgValues(Old, New); + if (UpdatedNodes) + UpdatedNodes->insert(New.getNode()); ReplacedNode(Old.getNode()); } void ReplaceNode(SDNode *Old, const SDValue *New) { + DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG)); + DAG.ReplaceAllUsesWith(Old, New); + for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) { + DEBUG(dbgs() << (i == 0 ? " with: " + : " and: "); + New[i]->dump(&DAG)); + DAG.TransferDbgValues(SDValue(Old, i), New[i]); + if (UpdatedNodes) + UpdatedNodes->insert(New[i].getNode()); + } ReplacedNode(Old); } }; } -/// ShuffleWithNarrowerEltType - Return a vector shuffle operation which +/// Return a vector shuffle operation which /// performs the same shuffe in terms of order or result bytes, but on a type /// whose vector element type is narrower than the original shuffle type. /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> @@ -213,41 +232,7 @@ SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, SDLoc dl, return DAG.getVectorShuffle(NVT, dl, N1, N2, &NewMask[0]); } -SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag) - : SelectionDAG::DAGUpdateListener(dag), - TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()), - DAG(dag) { -} - -void SelectionDAGLegalize::LegalizeDAG() { - DAG.AssignTopologicalOrder(); - - // Visit all the nodes. We start in topological order, so that we see - // nodes with their original operands intact. Legalization can produce - // new nodes which may themselves need to be legalized. Iterate until all - // nodes have been legalized. - for (;;) { - bool AnyLegalized = false; - for (LegalizePosition = DAG.allnodes_end(); - LegalizePosition != DAG.allnodes_begin(); ) { - --LegalizePosition; - - SDNode *N = LegalizePosition; - if (LegalizedNodes.insert(N)) { - AnyLegalized = true; - LegalizeOp(N); - } - } - if (!AnyLegalized) - break; - - } - - // Remove dead nodes now. - DAG.RemoveDeadNodes(); -} - -/// ExpandConstantFP - Expands the ConstantFP node to an integer constant or +/// Expands the ConstantFP node to an integer constant or /// a load from the constant pool. SDValue SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { @@ -275,7 +260,7 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { if (ConstantFPSDNode::isValueValidForType(SVT, CFP->getValueAPF()) && // Only do this if the target has a native EXTLOAD instruction from // smaller type. - TLI.isLoadExtLegal(ISD::EXTLOAD, SVT) && + TLI.isLoadExtLegal(ISD::EXTLOAD, OrigVT, SVT) && TLI.ShouldShrinkFPConstant(OrigVT)) { Type *SType = SVT.getTypeForEVT(*DAG.getContext()); LLVMC = cast<ConstantFP>(ConstantExpr::getFPTrunc(LLVMC, SType)); @@ -291,7 +276,7 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), - VT, false, false, Alignment); + VT, false, false, false, Alignment); return Result; } SDValue Result = @@ -301,7 +286,7 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { return Result; } -/// ExpandUnalignedStore - Expands an unaligned store to 2 half-size stores. +/// Expands an unaligned store to 2 half-size stores. static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, const TargetLowering &TLI, SelectionDAGLegalize *DAGLegalize) { @@ -377,7 +362,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, // Load from the stack slot. SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr, MachinePointerInfo(), - MemVT, false, false, 0); + MemVT, false, false, false, 0); Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr, ST->getPointerInfo() @@ -385,7 +370,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, MemVT, ST->isVolatile(), ST->isNonTemporal(), MinAlign(ST->getAlignment(), Offset), - ST->getTBAAInfo())); + ST->getAAInfo())); // The order of the stores doesn't matter - say it with a TokenFactor. SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); DAGLegalize->ReplaceNode(SDValue(ST, 0), Result); @@ -417,14 +402,14 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, ST->isVolatile(), ST->isNonTemporal(), - Alignment, ST->getTBAAInfo()); + Alignment, ST->getAAInfo()); SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); DAGLegalize->ReplaceNode(SDValue(ST, 0), Result); } -/// ExpandUnalignedLoad - Expands an unaligned load to 2 half-size loads. +/// Expands an unaligned load to 2 half-size loads. static void ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, const TargetLowering &TLI, @@ -476,7 +461,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(), MinAlign(LD->getAlignment(), Offset), - LD->getTBAAInfo()); + LD->getAAInfo()); // Follow the load with a store to the stack slot. Remember the store. Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr, MachinePointerInfo(), false, false, 0)); @@ -494,8 +479,9 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, LD->getPointerInfo().getWithOffset(Offset), MemVT, LD->isVolatile(), LD->isNonTemporal(), + LD->isInvariant(), MinAlign(LD->getAlignment(), Offset), - LD->getTBAAInfo()); + LD->getAAInfo()); // Follow the load with a store to the stack slot. Remember the store. // On big-endian machines this requires a truncating store to ensure // that the bits end up in the right place. @@ -508,7 +494,8 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, // Finally, perform the original load only redirected to the stack slot. Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase, - MachinePointerInfo(), LoadedVT, false, false, 0); + MachinePointerInfo(), LoadedVT, false,false, false, + 0); // Callers expect a MERGE_VALUES node. ValResult = Load; @@ -538,25 +525,27 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, if (TLI.isLittleEndian()) { Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(), NewLoadedVT, LD->isVolatile(), - LD->isNonTemporal(), Alignment, LD->getTBAAInfo()); + LD->isNonTemporal(), LD->isInvariant(), Alignment, + LD->getAAInfo()); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), NewLoadedVT, LD->isVolatile(), - LD->isNonTemporal(), MinAlign(Alignment, IncrementSize), - LD->getTBAAInfo()); + LD->isNonTemporal(),LD->isInvariant(), + MinAlign(Alignment, IncrementSize), LD->getAAInfo()); } else { Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(), NewLoadedVT, LD->isVolatile(), - LD->isNonTemporal(), Alignment, LD->getTBAAInfo()); + LD->isNonTemporal(), LD->isInvariant(), Alignment, + LD->getAAInfo()); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, Ptr.getValueType())); Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), NewLoadedVT, LD->isVolatile(), - LD->isNonTemporal(), MinAlign(Alignment, IncrementSize), - LD->getTBAAInfo()); + LD->isNonTemporal(), LD->isInvariant(), + MinAlign(Alignment, IncrementSize), LD->getAAInfo()); } // aggregate the two parts @@ -572,8 +561,8 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, ChainResult = TF; } -/// PerformInsertVectorEltInMemory - Some target cannot handle a variable -/// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it +/// Some target cannot handle a variable insertion index for the +/// INSERT_VECTOR_ELT instruction. In this case, it /// is necessary to spill the vector being inserted into to memory, perform /// the insert there, and then read the result back. SDValue SelectionDAGLegalize:: @@ -659,7 +648,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); - const MDNode *TBAAInfo = ST->getTBAAInfo(); + AAMDNodes AAInfo = ST->getAAInfo(); SDLoc dl(ST); if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) { if (CFP->getValueType(0) == MVT::f32 && @@ -668,7 +657,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { bitcastToAPInt().zextOrTrunc(32), MVT::i32); return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), - isVolatile, isNonTemporal, Alignment, TBAAInfo); + isVolatile, isNonTemporal, Alignment, AAInfo); } if (CFP->getValueType(0) == MVT::f64) { @@ -677,7 +666,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { SDValue Con = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). zextOrTrunc(64), MVT::i64); return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), - isVolatile, isNonTemporal, Alignment, TBAAInfo); + isVolatile, isNonTemporal, Alignment, AAInfo); } if (TLI.isTypeLegal(MVT::i32) && !ST->isVolatile()) { @@ -690,13 +679,13 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { if (TLI.isBigEndian()) std::swap(Lo, Hi); Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), isVolatile, - isNonTemporal, Alignment, TBAAInfo); + isNonTemporal, Alignment, AAInfo); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(4, Ptr.getValueType())); Hi = DAG.getStore(Chain, dl, Hi, Ptr, ST->getPointerInfo().getWithOffset(4), isVolatile, isNonTemporal, MinAlign(Alignment, 4U), - TBAAInfo); + AAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -714,7 +703,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); - const MDNode *TBAAInfo = ST->getTBAAInfo(); + AAMDNodes AAInfo = ST->getAAInfo(); if (!ST->isTruncatingStore()) { if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) { @@ -731,12 +720,12 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // If this is an unaligned store and the target doesn't support it, // expand it. unsigned AS = ST->getAddressSpace(); - if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT(), AS)) { + unsigned Align = ST->getAlignment(); + if (!TLI.allowsMisalignedMemoryAccesses(ST->getMemoryVT(), AS, Align)) { Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty); - if (ST->getAlignment() < ABIAlignment) - ExpandUnalignedStore(cast<StoreSDNode>(Node), - DAG, TLI, this); + if (Align < ABIAlignment) + ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this); } break; } @@ -754,7 +743,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { SDValue Result = DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), isVolatile, - isNonTemporal, Alignment, TBAAInfo); + isNonTemporal, Alignment, AAInfo); ReplaceNode(SDValue(Node, 0), Result); break; } @@ -776,8 +765,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { Value = DAG.getZeroExtendInReg(Value, dl, StVT); SDValue Result = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), - NVT, isVolatile, isNonTemporal, Alignment, - TBAAInfo); + NVT, isVolatile, isNonTemporal, Alignment, AAInfo); ReplaceNode(SDValue(Node, 0), Result); } else if (StWidth & (StWidth - 1)) { // If not storing a power-of-2 number of bits, expand as two stores. @@ -799,7 +787,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), RoundVT, isVolatile, isNonTemporal, Alignment, - TBAAInfo); + AAInfo); // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; @@ -811,7 +799,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize), TBAAInfo); + MinAlign(Alignment, IncrementSize), AAInfo); } else { // Big endian - avoid unaligned stores. // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X @@ -821,7 +809,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { TLI.getShiftAmountTy(Value.getValueType()))); Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(), RoundVT, isVolatile, isNonTemporal, Alignment, - TBAAInfo); + AAInfo); // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; @@ -830,7 +818,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize), TBAAInfo); + MinAlign(Alignment, IncrementSize), AAInfo); } // The order of the stores doesn't matter. @@ -842,12 +830,13 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: { unsigned AS = ST->getAddressSpace(); + unsigned Align = ST->getAlignment(); // If this is an unaligned store and the target doesn't support it, // expand it. - if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT(), AS)) { + if (!TLI.allowsMisalignedMemoryAccesses(ST->getMemoryVT(), AS, Align)) { Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment= TLI.getDataLayout()->getABITypeAlignment(Ty); - if (ST->getAlignment() < ABIAlignment) + if (Align < ABIAlignment) ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this); } break; @@ -868,7 +857,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value); SDValue Result = DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), - isVolatile, isNonTemporal, Alignment, TBAAInfo); + isVolatile, isNonTemporal, Alignment, AAInfo); ReplaceNode(SDValue(Node, 0), Result); break; } @@ -893,13 +882,14 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: { unsigned AS = LD->getAddressSpace(); + unsigned Align = LD->getAlignment(); // If this is an unaligned load and the target doesn't support it, // expand it. - if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT(), AS)) { + if (!TLI.allowsMisalignedMemoryAccesses(LD->getMemoryVT(), AS, Align)) { Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment = TLI.getDataLayout()->getABITypeAlignment(Ty); - if (LD->getAlignment() < ABIAlignment){ + if (Align < ABIAlignment){ ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, RVal, RChain); } } @@ -928,6 +918,10 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { assert(RVal.getNode() != Node && "Load must be completely replaced"); DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), RVal); DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), RChain); + if (UpdatedNodes) { + UpdatedNodes->insert(RVal.getNode()); + UpdatedNodes->insert(RChain.getNode()); + } ReplacedNode(Node); } return; @@ -938,7 +932,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { unsigned Alignment = LD->getAlignment(); bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); - const MDNode *TBAAInfo = LD->getTBAAInfo(); + bool isInvariant = LD->isInvariant(); + AAMDNodes AAInfo = LD->getAAInfo(); if (SrcWidth != SrcVT.getStoreSizeInBits() && // Some targets pretend to have an i1 loading operation, and actually @@ -949,7 +944,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // nice to have an effective generic way of getting these benefits... // Until such a way is found, don't insist on promoting i1 here. (SrcVT != MVT::i1 || - TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) { + TLI.getLoadExtAction(ExtType, Node->getValueType(0), MVT::i1) == + TargetLowering::Promote)) { // Promote to a byte-sized load if not loading an integral number of // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24. unsigned NewWidth = SrcVT.getStoreSizeInBits(); @@ -965,7 +961,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { SDValue Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo(), - NVT, isVolatile, isNonTemporal, Alignment, TBAAInfo); + NVT, isVolatile, isNonTemporal, isInvariant, Alignment, + AAInfo); Ch = Result.getValue(1); // The chain. @@ -1002,7 +999,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo(), RoundVT, isVolatile, - isNonTemporal, Alignment, TBAAInfo); + isNonTemporal, isInvariant, Alignment, AAInfo); // Load the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; @@ -1010,8 +1007,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), - ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize), TBAAInfo); + ExtraVT, isVolatile, isNonTemporal, isInvariant, + MinAlign(Alignment, IncrementSize), AAInfo); // Build a factor node to remember that this load is independent of // the other one. @@ -1031,7 +1028,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // Load the top RoundWidth bits. Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo(), RoundVT, isVolatile, - isNonTemporal, Alignment, TBAAInfo); + isNonTemporal, isInvariant, Alignment, AAInfo); // Load the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; @@ -1040,8 +1037,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), - ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize), TBAAInfo); + ExtraVT, isVolatile, isNonTemporal, isInvariant, + MinAlign(Alignment, IncrementSize), AAInfo); // Build a factor node to remember that this load is independent of // the other one. @@ -1060,7 +1057,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { Chain = Ch; } else { bool isCustom = false; - switch (TLI.getLoadExtAction(ExtType, SrcVT.getSimpleVT())) { + switch (TLI.getLoadExtAction(ExtType, Node->getValueType(0), + SrcVT.getSimpleVT())) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Custom: isCustom = true; @@ -1080,37 +1078,37 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // it, expand it. EVT MemVT = LD->getMemoryVT(); unsigned AS = LD->getAddressSpace(); - if (!TLI.allowsUnalignedMemoryAccesses(MemVT, AS)) { - Type *Ty = - LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = - TLI.getDataLayout()->getABITypeAlignment(Ty); - if (LD->getAlignment() < ABIAlignment){ - ExpandUnalignedLoad(cast<LoadSDNode>(Node), - DAG, TLI, Value, Chain); + unsigned Align = LD->getAlignment(); + if (!TLI.allowsMisalignedMemoryAccesses(MemVT, AS, Align)) { + Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment = TLI.getDataLayout()->getABITypeAlignment(Ty); + if (Align < ABIAlignment){ + ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, Value, Chain); } } } break; } case TargetLowering::Expand: - if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && - TLI.isTypeLegal(SrcVT)) { - SDValue Load = DAG.getLoad(SrcVT, dl, Chain, Ptr, - LD->getMemOperand()); - unsigned ExtendOp; - switch (ExtType) { - case ISD::EXTLOAD: - ExtendOp = (SrcVT.isFloatingPoint() ? - ISD::FP_EXTEND : ISD::ANY_EXTEND); + if (!TLI.isLoadExtLegal(ISD::EXTLOAD, Node->getValueType(0), SrcVT)) { + // If the source type is not legal, see if there is a legal extload to + // an intermediate type that we can then extend further. + EVT LoadVT = TLI.getRegisterType(SrcVT.getSimpleVT()); + if (TLI.isTypeLegal(SrcVT) || // Same as SrcVT == LoadVT? + TLI.isLoadExtLegal(ExtType, LoadVT, SrcVT)) { + // If we are loading a legal type, this is a non-extload followed by a + // full extend. + ISD::LoadExtType MidExtType = + (LoadVT == SrcVT) ? ISD::NON_EXTLOAD : ExtType; + + SDValue Load = DAG.getExtLoad(MidExtType, dl, LoadVT, Chain, Ptr, + SrcVT, LD->getMemOperand()); + unsigned ExtendOp = + ISD::getExtForLoadExtType(SrcVT.isFloatingPoint(), ExtType); + Value = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); + Chain = Load.getValue(1); break; - case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break; - case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break; - default: llvm_unreachable("Unexpected extend load type!"); } - Value = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); - Chain = Load.getValue(1); - break; } assert(!SrcVT.isVector() && @@ -1134,8 +1132,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { Result.getValueType(), Result, DAG.getValueType(SrcVT)); else - ValRes = DAG.getZeroExtendInReg(Result, dl, - SrcVT.getScalarType()); + ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType()); Value = ValRes; Chain = Result.getValue(1); break; @@ -1148,13 +1145,18 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { assert(Value.getNode() != Node && "Load must be completely replaced"); DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Value); DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain); + if (UpdatedNodes) { + UpdatedNodes->insert(Value.getNode()); + UpdatedNodes->insert(Chain.getNode()); + } ReplacedNode(Node); } } -/// LegalizeOp - Return a legal replacement for the given operation, with -/// all legal operands. +/// Return a legal replacement for the given operation, with all legal operands. void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { + DEBUG(dbgs() << "\nLegalizing: "; Node->dump(&DAG)); + if (Node->getOpcode() == ISD::TargetConstant) // Allow illegal target nodes. return; @@ -1335,10 +1337,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { } if (NewNode != Node) { - DAG.ReplaceAllUsesWith(Node, NewNode); - for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) - DAG.TransferDbgValues(SDValue(Node, i), SDValue(NewNode, i)); - ReplacedNode(Node); + ReplaceNode(Node, NewNode); Node = NewNode; } switch (Action) { @@ -1349,19 +1348,19 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { // a complete mess. SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); if (Res.getNode()) { - SmallVector<SDValue, 8> ResultVals; - for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) { - if (e == 1) - ResultVals.push_back(Res); - else - ResultVals.push_back(Res.getValue(i)); - } - if (Res.getNode() != Node || Res.getResNo() != 0) { - DAG.ReplaceAllUsesWith(Node, ResultVals.data()); - for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) - DAG.TransferDbgValues(SDValue(Node, i), ResultVals[i]); - ReplacedNode(Node); + if (!(Res.getNode() != Node || Res.getResNo() != 0)) + return; + + if (Node->getNumValues() == 1) { + // We can just directly replace this node with the lowered value. + ReplaceNode(SDValue(Node, 0), Res); + return; } + + SmallVector<SDValue, 8> ResultVals; + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) + ResultVals.push_back(Res.getValue(i)); + ReplaceNode(Node, ResultVals.data()); return; } } @@ -1449,7 +1448,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr, MachinePointerInfo(), Vec.getValueType().getVectorElementType(), - false, false, 0); + false, false, false, 0); } SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { @@ -1484,7 +1483,7 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { StackPtr); // Store the subvector. - Ch = DAG.getStore(DAG.getEntryNode(), dl, Part, SubStackPtr, + Ch = DAG.getStore(Ch, dl, Part, SubStackPtr, MachinePointerInfo(), false, false, 0); // Finally, load the updated vector. @@ -1624,7 +1623,8 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT); Chain = SP.getValue(1); unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue(); - unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); + unsigned StackAlign = + DAG.getSubtarget().getFrameLowering()->getStackAlignment(); Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value if (Align > StackAlign) Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1, @@ -1639,8 +1639,8 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, Results.push_back(Tmp2); } -/// LegalizeSetCCCondCode - Legalize a SETCC with given LHS and RHS and -/// condition code CC on the current target. +/// Legalize a SETCC with given LHS and RHS and condition code CC on the current +/// target. /// /// If the SETCC has been legalized using AND / OR, then the legalized node /// will be stored in LHS. RHS and CC will be set to SDValue(). NeedInvert @@ -1754,7 +1754,7 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, return false; } -/// EmitStackConvert - Emit a store/load combination to the stack. This stores +/// Emit a store/load combination to the stack. This stores /// SrcOp to a stack slot of type SlotVT, truncating it if needed. It then does /// a load from the stack slot to DestVT, extending it if needed. /// The resultant code need not be legal. @@ -1798,7 +1798,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, assert(SlotSize < DestSize && "Unknown extension!"); return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, - PtrInfo, SlotVT, false, false, DestAlign); + PtrInfo, SlotVT, false, false, false, DestAlign); } SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) { @@ -1878,7 +1878,8 @@ ExpandBVWithShuffles(SDNode *Node, SelectionDAG &DAG, ShuffleVec.data()); else if (!TLI.isShuffleMaskLegal(ShuffleVec, VT)) return false; - NewIntermedVals.push_back(std::make_pair(Shuffle, FinalIndices)); + NewIntermedVals.push_back( + std::make_pair(Shuffle, std::move(FinalIndices))); } // If we had an odd number of defined values, then append the last @@ -1913,7 +1914,7 @@ ExpandBVWithShuffles(SDNode *Node, SelectionDAG &DAG, return true; } -/// ExpandBUILD_VECTOR - Expand a BUILD_VECTOR node on targets that don't +/// Expand a BUILD_VECTOR node on targets that don't /// support the operation, but do support the resultant vector type. SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { unsigned NumElems = Node->getNumOperands(); @@ -2025,7 +2026,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { return ExpandVectorBuildThroughStack(Node); } -// ExpandLibCall - Expand a node into a call to a libcall. If the result value +// Expand a node into a call to a libcall. If the result value // does not fit into a register, return the lo part and set the hi part to the // by-reg argument. If it does fit into a single register, return the result // and leave the Hi part unset. @@ -2073,7 +2074,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, return CallInfo.first; } -/// ExpandLibCall - Generate a libcall taking the given operands as arguments +/// Generate a libcall taking the given operands as arguments /// and returning a result of type RetVT. SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops, unsigned NumOps, @@ -2104,7 +2105,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, return CallInfo.first; } -// ExpandChainLibCall - Expand a node into a call to a libcall. Similar to +// Expand a node into a call to a libcall. Similar to // ExpandLibCall except that the first operand is the in-chain. std::pair<SDValue, SDValue> SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, @@ -2174,7 +2175,7 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, return ExpandLibCall(LC, Node, isSigned); } -/// isDivRemLibcallAvailable - Return true if divmod libcall is available. +/// Return true if divmod libcall is available. static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, const TargetLowering &TLI) { RTLIB::Libcall LC; @@ -2190,8 +2191,7 @@ static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, return TLI.getLibcallName(LC) != nullptr; } -/// useDivRem - Only issue divrem libcall if both quotient and remainder are -/// needed. +/// Only issue divrem libcall if both quotient and remainder are needed. static bool useDivRem(SDNode *Node, bool isSigned, bool isDIV) { // The other use might have been replaced with a divrem already. unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; @@ -2216,8 +2216,7 @@ static bool useDivRem(SDNode *Node, bool isSigned, bool isDIV) { return false; } -/// ExpandDivRemLibCall - Issue libcalls to __{u}divmod to compute div / rem -/// pairs. +/// Issue libcalls to __{u}divmod to compute div / rem pairs. void SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results) { @@ -2279,7 +2278,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, Results.push_back(Rem); } -/// isSinCosLibcallAvailable - Return true if sincos libcall is available. +/// Return true if sincos libcall is available. static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) { RTLIB::Libcall LC; switch (Node->getSimpleValueType(0).SimpleTy) { @@ -2293,8 +2292,8 @@ static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) { return TLI.getLibcallName(LC) != nullptr; } -/// canCombineSinCosLibcall - Return true if sincos libcall is available and -/// can be used to combine sin and cos. +/// Return true if sincos libcall is available and can be used to combine sin +/// and cos. static bool canCombineSinCosLibcall(SDNode *Node, const TargetLowering &TLI, const TargetMachine &TM) { if (!isSinCosLibcallAvailable(Node, TLI)) @@ -2307,8 +2306,7 @@ static bool canCombineSinCosLibcall(SDNode *Node, const TargetLowering &TLI, return true; } -/// useSinCos - Only issue sincos libcall if both sin and cos are -/// needed. +/// Only issue sincos libcall if both sin and cos are needed. static bool useSinCos(SDNode *Node) { unsigned OtherOpcode = Node->getOpcode() == ISD::FSIN ? ISD::FCOS : ISD::FSIN; @@ -2326,8 +2324,7 @@ static bool useSinCos(SDNode *Node) { return false; } -/// ExpandSinCosLibCall - Issue libcalls to sincos to compute sin / cos -/// pairs. +/// Issue libcalls to sincos to compute sin / cos pairs. void SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results) { @@ -2392,7 +2389,7 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node, MachinePointerInfo(), false, false, false, 0)); } -/// ExpandLegalINT_TO_FP - This function is responsible for legalizing a +/// This function is responsible for legalizing a /// INT_TO_FP operation of the specified operand when the target requests that /// we expand it. At this point, we know that the result and operand types are /// legal for the target. @@ -2581,7 +2578,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), - MVT::f32, false, false, Alignment); + MVT::f32, false, false, false, Alignment); HandleSDNode Handle(Load); LegalizeOp(Load.getNode()); FudgeInReg = Handle.getValue(); @@ -2590,7 +2587,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg); } -/// PromoteLegalINT_TO_FP - This function is responsible for legalizing a +/// This function is responsible for legalizing a /// *INT_TO_FP operation of the specified operand when the target requests that /// we promote it. At this point, we know that the result and operand types are /// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP @@ -2632,7 +2629,7 @@ SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, dl, NewInTy, LegalOp)); } -/// PromoteLegalFP_TO_INT - This function is responsible for legalizing a +/// This function is responsible for legalizing a /// FP_TO_*INT operation of the specified operand when the target requests that /// we promote it. At this point, we know that the result and operand types are /// legal for the target, and that there is a legal FP_TO_UINT or FP_TO_SINT @@ -2676,8 +2673,7 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, return DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation); } -/// ExpandBSWAP - Open code the operations for BSWAP of the specified operation. -/// +/// Open code the operations for BSWAP of the specified operation. SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, SDLoc dl) { EVT VT = Op.getValueType(); EVT SHVT = TLI.getShiftAmountTy(VT); @@ -2723,8 +2719,7 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, SDLoc dl) { } } -/// ExpandBitCount - Expand the specified bitcount instruction into operations. -/// +/// Expand the specified bitcount instruction into operations. SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, SDLoc dl) { switch (Opc) { @@ -2783,7 +2778,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, // x = x | (x >>32); // for 64-bit input // return popcount(~x); // - // but see also: http://www.hackersdelight.org/HDcode/nlz.cc + // Ref: "Hacker's Delight" by Henry Warren EVT VT = Op.getValueType(); EVT ShVT = TLI.getShiftAmountTy(VT); unsigned len = VT.getSizeInBits(); @@ -2802,7 +2797,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, // for now, we use: { return popcount(~x & (x - 1)); } // unless the target has ctlz but not ctpop, in which case we use: // { return 32 - nlz(~x & (x-1)); } - // see also http://www.hackersdelight.org/HDcode/ntz.cc + // Ref: "Hacker's Delight" by Henry Warren EVT VT = Op.getValueType(); SDValue Tmp3 = DAG.getNode(ISD::AND, dl, VT, DAG.getNOT(dl, Op, VT), @@ -3396,6 +3391,16 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Tmp1); break; } + case ISD::FMINNUM: + Results.push_back(ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64, + RTLIB::FMIN_F80, RTLIB::FMIN_F128, + RTLIB::FMIN_PPCF128)); + break; + case ISD::FMAXNUM: + Results.push_back(ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64, + RTLIB::FMAX_F80, RTLIB::FMAX_F128, + RTLIB::FMAX_PPCF128)); + break; case ISD::FSQRT: Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64, RTLIB::SQRT_F80, RTLIB::SQRT_F128, @@ -3514,6 +3519,16 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { RTLIB::FMA_F80, RTLIB::FMA_F128, RTLIB::FMA_PPCF128)); break; + case ISD::FADD: + Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64, + RTLIB::ADD_F80, RTLIB::ADD_F128, + RTLIB::ADD_PPCF128)); + break; + case ISD::FMUL: + Results.push_back(ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64, + RTLIB::MUL_F80, RTLIB::MUL_F128, + RTLIB::MUL_PPCF128)); + break; case ISD::FP16_TO_FP: { if (Node->getValueType(0) == MVT::f32) { Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false)); @@ -3546,12 +3561,16 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { } case ISD::FSUB: { EVT VT = Node->getValueType(0); - assert(TLI.isOperationLegalOrCustom(ISD::FADD, VT) && - TLI.isOperationLegalOrCustom(ISD::FNEG, VT) && - "Don't know how to expand this FP subtraction!"); - Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1)); - Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1); - Results.push_back(Tmp1); + if (TLI.isOperationLegalOrCustom(ISD::FADD, VT) && + TLI.isOperationLegalOrCustom(ISD::FNEG, VT)) { + Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1)); + Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1); + Results.push_back(Tmp1); + } else { + Results.push_back(ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64, + RTLIB::SUB_F80, RTLIB::SUB_F128, + RTLIB::SUB_PPCF128)); + } break; } case ISD::SUB: { @@ -3806,9 +3825,11 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret, DAG.getIntPtrConstant(1)); // Ret is a node with an illegal type. Because such things are not - // generally permitted during this phase of legalization, delete the - // node. The above EXTRACT_ELEMENT nodes should have been folded. - DAG.DeleteNode(Ret.getNode()); + // generally permitted during this phase of legalization, make sure the + // node has no more uses. The above EXTRACT_ELEMENT nodes should have been + // folded. + assert(Ret->use_empty() && + "Unexpected uses of illegally type from expanded lib call."); } if (isSigned) { @@ -3869,7 +3890,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8); SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr, MachinePointerInfo::getJumpTable(), MemVT, - false, false, 0); + false, false, false, 0); Addr = LD; if (TM.getRelocationModel() == Reloc::PIC_) { // For PIC, the sequence is: @@ -4179,6 +4200,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { // use the new one. DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp2); DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain); + if (UpdatedNodes) { + UpdatedNodes->insert(Tmp2.getNode()); + UpdatedNodes->insert(Chain.getNode()); + } ReplacedNode(Node); break; } @@ -4255,6 +4280,9 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Tmp1, Tmp2, Node->getOperand(2))); break; } + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: case ISD::FDIV: case ISD::FREM: case ISD::FPOW: { @@ -4282,10 +4310,57 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { ReplaceNode(Node, Results.data()); } -// SelectionDAG::Legalize - This is the entry point for the file. -// +/// This is the entry point for the file. void SelectionDAG::Legalize() { - /// run - This is the main entry point to this class. - /// - SelectionDAGLegalize(*this).LegalizeDAG(); + AssignTopologicalOrder(); + + SmallPtrSet<SDNode *, 16> LegalizedNodes; + SelectionDAGLegalize Legalizer(*this, LegalizedNodes); + + // Visit all the nodes. We start in topological order, so that we see + // nodes with their original operands intact. Legalization can produce + // new nodes which may themselves need to be legalized. Iterate until all + // nodes have been legalized. + for (;;) { + bool AnyLegalized = false; + for (auto NI = allnodes_end(); NI != allnodes_begin();) { + --NI; + + SDNode *N = NI; + if (N->use_empty() && N != getRoot().getNode()) { + ++NI; + DeleteNode(N); + continue; + } + + if (LegalizedNodes.insert(N).second) { + AnyLegalized = true; + Legalizer.LegalizeOp(N); + + if (N->use_empty() && N != getRoot().getNode()) { + ++NI; + DeleteNode(N); + } + } + } + if (!AnyLegalized) + break; + + } + + // Remove dead nodes now. + RemoveDeadNodes(); +} + +bool SelectionDAG::LegalizeOp(SDNode *N, + SmallSetVector<SDNode *, 16> &UpdatedNodes) { + SmallPtrSet<SDNode *, 16> LegalizedNodes; + SelectionDAGLegalize Legalizer(*this, LegalizedNodes, &UpdatedNodes); + + // Directly insert the node in question, and legalize it. This will recurse + // as needed through operands. + LegalizedNodes.insert(N); + Legalizer.LegalizeOp(N); + + return LegalizedNodes.count(N); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 649dd7a349ff..4591e79316d8 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -68,6 +68,8 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::EXTRACT_VECTOR_ELT: R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N); break; case ISD::FABS: R = SoftenFloatRes_FABS(N); break; + case ISD::FMINNUM: R = SoftenFloatRes_FMINNUM(N); break; + case ISD::FMAXNUM: R = SoftenFloatRes_FMAXNUM(N); break; case ISD::FADD: R = SoftenFloatRes_FADD(N); break; case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break; case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N); break; @@ -153,6 +155,32 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) { return DAG.getNode(ISD::AND, SDLoc(N), NVT, Op, Mask); } +SDValue DAGTypeLegalizer::SoftenFloatRes_FMINNUM(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::FMIN_F32, + RTLIB::FMIN_F64, + RTLIB::FMIN_F80, + RTLIB::FMIN_F128, + RTLIB::FMIN_PPCF128), + NVT, Ops, 2, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXNUM(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::FMAX_F32, + RTLIB::FMAX_F64, + RTLIB::FMAX_F80, + RTLIB::FMAX_F128, + RTLIB::FMAX_PPCF128), + NVT, Ops, 2, false, SDLoc(N)).first; +} + SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), @@ -377,10 +405,15 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { // There's only a libcall for f16 -> f32, so proceed in two stages. Also, it's // entirely possible for both f16 and f32 to be legal, so use the fully // hard-float FP_EXTEND rather than FP16_TO_FP. - if (Op.getValueType() == MVT::f16 && N->getValueType(0) != MVT::f32) + if (Op.getValueType() == MVT::f16 && N->getValueType(0) != MVT::f32) { Op = DAG.getNode(ISD::FP_EXTEND, SDLoc(N), MVT::f32, Op); + if (getTypeAction(MVT::f32) == TargetLowering::TypeSoftenFloat) + SoftenFloatResult(Op.getNode(), 0); + } RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0)); + if (getTypeAction(Op.getValueType()) == TargetLowering::TypeSoftenFloat) + Op = GetSoftenedFloat(Op); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)).first; } @@ -543,7 +576,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(), L->getPointerInfo(), NVT, L->isVolatile(), L->isNonTemporal(), false, L->getAlignment(), - L->getTBAAInfo()); + L->getAAInfo()); // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); @@ -556,7 +589,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { L->getBasePtr(), L->getOffset(), L->getPointerInfo(), L->getMemoryVT(), L->isVolatile(), L->isNonTemporal(), false, L->getAlignment(), - L->getTBAAInfo()); + L->getAAInfo()); // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); @@ -851,6 +884,8 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { case ISD::ConstantFP: ExpandFloatRes_ConstantFP(N, Lo, Hi); break; case ISD::FABS: ExpandFloatRes_FABS(N, Lo, Hi); break; + case ISD::FMINNUM: ExpandFloatRes_FMINNUM(N, Lo, Hi); break; + case ISD::FMAXNUM: ExpandFloatRes_FMAXNUM(N, Lo, Hi); break; case ISD::FADD: ExpandFloatRes_FADD(N, Lo, Hi); break; case ISD::FCEIL: ExpandFloatRes_FCEIL(N, Lo, Hi); break; case ISD::FCOPYSIGN: ExpandFloatRes_FCOPYSIGN(N, Lo, Hi); break; @@ -914,6 +949,26 @@ void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo, ISD::SETEQ); } +void DAGTypeLegalizer::ExpandFloatRes_FMINNUM(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::FMIN_F32, RTLIB::FMIN_F64, + RTLIB::FMIN_F80, RTLIB::FMIN_F128, + RTLIB::FMIN_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FMAXNUM(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::FMAX_F32, RTLIB::FMAX_F64, + RTLIB::FMAX_F80, RTLIB::FMAX_F128, + RTLIB::FMAX_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 44d9e3875b83..82b114b80aa9 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -66,6 +66,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::EXTRACT_VECTOR_ELT: Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break; case ISD::LOAD: Res = PromoteIntRes_LOAD(cast<LoadSDNode>(N));break; + case ISD::MLOAD: Res = PromoteIntRes_MLOAD(cast<MaskedLoadSDNode>(N));break; case ISD::SELECT: Res = PromoteIntRes_SELECT(N); break; case ISD::VSELECT: Res = PromoteIntRes_VSELECT(N); break; case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break; @@ -342,9 +343,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) { EVT NVT = Op.getValueType(); Op = DAG.getNode(N->getOpcode(), dl, NVT, Op); // Subtract off the extra leading bits in the bigger type. - return DAG.getNode(ISD::SUB, dl, NVT, Op, - DAG.getConstant(NVT.getSizeInBits() - - OVT.getSizeInBits(), NVT)); + return DAG.getNode( + ISD::SUB, dl, NVT, Op, + DAG.getConstant(NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(), + NVT)); } SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP(SDNode *N) { @@ -362,8 +364,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) { // The count is the same in the promoted type except if the original // value was zero. This can be handled by setting the bit just off // the top of the original type. - APInt TopBit(NVT.getSizeInBits(), 0); - TopBit.setBit(OVT.getSizeInBits()); + auto TopBit = APInt::getOneBitSet(NVT.getScalarSizeInBits(), + OVT.getScalarSizeInBits()); Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT)); } return DAG.getNode(N->getOpcode(), dl, NVT, Op); @@ -453,6 +455,24 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) { return Res; } +SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue ExtSrc0 = GetPromotedInteger(N->getSrc0()); + SDValue ExtMask = PromoteTargetBoolean(N->getMask(), NVT); + SDLoc dl(N); + + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(N->getPointerInfo(), + MachineMemOperand::MOLoad, NVT.getStoreSize(), + N->getAlignment(), N->getAAInfo(), N->getRanges()); + + SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(), + ExtMask, ExtSrc0, MMO); + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} /// Promote the overflow flag of an overflowing arithmetic node. SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) { // Simply change the return type of the boolean result. @@ -824,6 +844,10 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::SINT_TO_FP: Res = PromoteIntOp_SINT_TO_FP(N); break; case ISD::STORE: Res = PromoteIntOp_STORE(cast<StoreSDNode>(N), OpNo); break; + case ISD::MSTORE: Res = PromoteIntOp_MSTORE(cast<MaskedStoreSDNode>(N), + OpNo); break; + case ISD::MLOAD: Res = PromoteIntOp_MLOAD(cast<MaskedLoadSDNode>(N), + OpNo); break; case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break; case ISD::FP16_TO_FP: case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break; @@ -861,7 +885,26 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS, switch (CCCode) { default: llvm_unreachable("Unknown integer comparison!"); case ISD::SETEQ: - case ISD::SETNE: + case ISD::SETNE: { + SDValue OpL = GetPromotedInteger(NewLHS); + SDValue OpR = GetPromotedInteger(NewRHS); + + // We would prefer to promote the comparison operand with sign extension, + // if we find the operand is actually to truncate an AssertSext. With this + // optimization, we can avoid inserting real truncate instruction, which + // is redudant eventually. + if (OpL->getOpcode() == ISD::AssertSext && + cast<VTSDNode>(OpL->getOperand(1))->getVT() == NewLHS.getValueType() && + OpR->getOpcode() == ISD::AssertSext && + cast<VTSDNode>(OpR->getOperand(1))->getVT() == NewRHS.getValueType()) { + NewLHS = OpL; + NewRHS = OpR; + } else { + NewLHS = ZExtPromotedInteger(NewLHS); + NewRHS = ZExtPromotedInteger(NewRHS); + } + break; + } case ISD::SETUGE: case ISD::SETUGT: case ISD::SETULE: @@ -945,7 +988,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) { EVT VecVT = N->getValueType(0); unsigned NumElts = VecVT.getVectorNumElements(); assert(!((NumElts & 1) && (!TLI.isTypeLegal(VecVT))) && - "Legal vector of one illegal element?"); + "Legal vector of one illegal element?"); // Promote the inserted value. The type does not need to match the // vector element type. Check that any extra bits introduced will be @@ -1071,6 +1114,63 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){ N->getMemoryVT(), N->getMemOperand()); } +SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo){ + + assert(OpNo == 2 && "Only know how to promote the mask!"); + SDValue DataOp = N->getData(); + EVT DataVT = DataOp.getValueType(); + SDValue Mask = N->getMask(); + EVT MaskVT = Mask.getValueType(); + SDLoc dl(N); + + if (!TLI.isTypeLegal(DataVT)) { + if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger) { + DataOp = GetPromotedInteger(DataOp); + Mask = PromoteTargetBoolean(Mask, DataOp.getValueType()); + } + else { + assert(getTypeAction(DataVT) == TargetLowering::TypeWidenVector && + "Unexpected data legalization in MSTORE"); + DataOp = GetWidenedVector(DataOp); + + if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector) + Mask = GetWidenedVector(Mask); + else { + EVT BoolVT = getSetCCResultType(DataOp.getValueType()); + + // We can't use ModifyToType() because we should fill the mask with + // zeroes + unsigned WidenNumElts = BoolVT.getVectorNumElements(); + unsigned MaskNumElts = MaskVT.getVectorNumElements(); + + unsigned NumConcat = WidenNumElts / MaskNumElts; + SmallVector<SDValue, 16> Ops(NumConcat); + SDValue ZeroVal = DAG.getConstant(0, MaskVT); + Ops[0] = Mask; + for (unsigned i = 1; i != NumConcat; ++i) + Ops[i] = ZeroVal; + + Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops); + } + } + } + else + Mask = PromoteTargetBoolean(N->getMask(), DataOp.getValueType()); + SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end()); + NewOps[2] = Mask; + NewOps[3] = DataOp; + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo){ + assert(OpNo == 2 && "Only know how to promote the mask!"); + EVT DataVT = N->getValueType(0); + SDValue Mask = PromoteTargetBoolean(N->getOperand(OpNo), DataVT); + SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end()); + NewOps[OpNo] = Mask; + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); +} + SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) { SDValue Op = GetPromotedInteger(N->getOperand(0)); return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), Op); @@ -1859,7 +1959,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, bool isVolatile = N->isVolatile(); bool isNonTemporal = N->isNonTemporal(); bool isInvariant = N->isInvariant(); - const MDNode *TBAAInfo = N->getTBAAInfo(); + AAMDNodes AAInfo = N->getAAInfo(); SDLoc dl(N); assert(NVT.isByteSized() && "Expanded type not byte sized!"); @@ -1868,7 +1968,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, EVT MemVT = N->getMemoryVT(); Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(), - MemVT, isVolatile, isNonTemporal, Alignment, TBAAInfo); + MemVT, isVolatile, isNonTemporal, isInvariant, + Alignment, AAInfo); // Remember the chain. Ch = Lo.getValue(1); @@ -1891,7 +1992,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, // Little-endian - low bits are at low addresses. Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(), isVolatile, isNonTemporal, isInvariant, Alignment, - TBAAInfo); + AAInfo); unsigned ExcessBits = N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); @@ -1903,8 +2004,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), NEVT, - isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize), TBAAInfo); + isVolatile, isNonTemporal, isInvariant, + MinAlign(Alignment, IncrementSize), AAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -1922,7 +2023,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(), EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits() - ExcessBits), - isVolatile, isNonTemporal, Alignment, TBAAInfo); + isVolatile, isNonTemporal, isInvariant, Alignment, + AAInfo); // Increment the pointer to the other half. Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, @@ -1931,8 +2033,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), EVT::getIntegerVT(*DAG.getContext(), ExcessBits), - isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize), TBAAInfo); + isVolatile, isNonTemporal, isInvariant, + MinAlign(Alignment, IncrementSize), AAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -2709,7 +2811,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { unsigned Alignment = N->getAlignment(); bool isVolatile = N->isVolatile(); bool isNonTemporal = N->isNonTemporal(); - const MDNode *TBAAInfo = N->getTBAAInfo(); + AAMDNodes AAInfo = N->getAAInfo(); SDLoc dl(N); SDValue Lo, Hi; @@ -2719,7 +2821,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { GetExpandedInteger(N->getValue(), Lo, Hi); return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo(), N->getMemoryVT(), isVolatile, isNonTemporal, - Alignment, TBAAInfo); + Alignment, AAInfo); } if (TLI.isLittleEndian()) { @@ -2727,7 +2829,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { GetExpandedInteger(N->getValue(), Lo, Hi); Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(), - isVolatile, isNonTemporal, Alignment, TBAAInfo); + isVolatile, isNonTemporal, Alignment, AAInfo); unsigned ExcessBits = N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); @@ -2740,7 +2842,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), NEVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize), TBAAInfo); + MinAlign(Alignment, IncrementSize), AAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -2768,7 +2870,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { // Store both the high bits and maybe some of the low bits. Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(), - HiVT, isVolatile, isNonTemporal, Alignment, TBAAInfo); + HiVT, isVolatile, isNonTemporal, Alignment, AAInfo); // Increment the pointer to the other half. Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, @@ -2778,7 +2880,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { N->getPointerInfo().getWithOffset(IncrementSize), EVT::getIntegerVT(*DAG.getContext(), ExcessBits), isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize), TBAAInfo); + MinAlign(Alignment, IncrementSize), AAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -2853,7 +2955,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { FudgePtr, MachinePointerInfo::getConstantPool(), MVT::f32, - false, false, Alignment); + false, false, false, Alignment); return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index bd7dacf2bc69..ebf6b28259ea 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -921,6 +921,17 @@ bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult) { // The target didn't want to custom lower it after all. return false; + // When called from DAGTypeLegalizer::ExpandIntegerResult, we might need to + // provide the same kind of custom splitting behavior. + if (Results.size() == N->getNumValues() + 1 && LegalizeResult) { + // We've legalized a return type by splitting it. If there is a chain, + // replace that too. + SetExpandedInteger(SDValue(N, 0), Results[0], Results[1]); + if (N->getNumValues() > 1) + ReplaceValueWith(SDValue(N, 1), Results[2]); + return true; + } + // Make everything that once used N's values now use those in Results instead. assert(Results.size() == N->getNumValues() && "Custom lowering returned the wrong number of results!"); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 117ff31e2e8b..1cd9f407bca1 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -13,8 +13,8 @@ // //===----------------------------------------------------------------------===// -#ifndef SELECTIONDAG_LEGALIZETYPES_H -#define SELECTIONDAG_LEGALIZETYPES_H +#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_LEGALIZETYPES_H +#define LLVM_LIB_CODEGEN_SELECTIONDAG_LEGALIZETYPES_H #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" @@ -122,8 +122,8 @@ public: explicit DAGTypeLegalizer(SelectionDAG &dag) : TLI(dag.getTargetLoweringInfo()), DAG(dag), ValueTypeActions(TLI.getValueTypeActions()) { - assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE && - "Too many value types for ValueTypeActions to hold!"); + static_assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE, + "Too many value types for ValueTypeActions to hold!"); } /// run - This is the main entry point for the type legalizer. This does a @@ -240,6 +240,7 @@ private: SDValue PromoteIntRes_FP_TO_FP16(SDNode *N); SDValue PromoteIntRes_INT_EXTEND(SDNode *N); SDValue PromoteIntRes_LOAD(LoadSDNode *N); + SDValue PromoteIntRes_MLOAD(MaskedLoadSDNode *N); SDValue PromoteIntRes_Overflow(SDNode *N); SDValue PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo); SDValue PromoteIntRes_SDIV(SDNode *N); @@ -285,6 +286,8 @@ private: SDValue PromoteIntOp_TRUNCATE(SDNode *N); SDValue PromoteIntOp_UINT_TO_FP(SDNode *N); SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N); + SDValue PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo); + SDValue PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo); void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code); @@ -387,6 +390,8 @@ private: SDValue SoftenFloatRes_ConstantFP(ConstantFPSDNode *N); SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N); SDValue SoftenFloatRes_FABS(SDNode *N); + SDValue SoftenFloatRes_FMINNUM(SDNode *N); + SDValue SoftenFloatRes_FMAXNUM(SDNode *N); SDValue SoftenFloatRes_FADD(SDNode *N); SDValue SoftenFloatRes_FCEIL(SDNode *N); SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N); @@ -450,6 +455,8 @@ private: void ExpandFloatResult(SDNode *N, unsigned ResNo); void ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FABS (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FMINNUM (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FMAXNUM (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FADD (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FCEIL (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FCOPYSIGN (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -574,6 +581,7 @@ private: void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_MLOAD(MaskedLoadSDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -590,6 +598,7 @@ private: SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo); + SDValue SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo); SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N); SDValue SplitVecOp_TRUNCATE(SDNode *N); SDValue SplitVecOp_VSETCC(SDNode *N); @@ -623,6 +632,7 @@ private: SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N); SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); SDValue WidenVecRes_LOAD(SDNode* N); + SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N); SDValue WidenVecRes_SCALAR_TO_VECTOR(SDNode* N); SDValue WidenVecRes_SIGN_EXTEND_INREG(SDNode* N); SDValue WidenVecRes_SELECT(SDNode* N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 7e2f7b6ffb55..38829b6ac076 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -256,13 +256,13 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); bool isInvariant = LD->isInvariant(); - const MDNode *TBAAInfo = LD->getTBAAInfo(); + AAMDNodes AAInfo = LD->getAAInfo(); assert(NVT.isByteSized() && "Expanded type not byte sized!"); Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(), isVolatile, isNonTemporal, isInvariant, Alignment, - TBAAInfo); + AAInfo); // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits() / 8; @@ -271,7 +271,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, Hi = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), isVolatile, isNonTemporal, isInvariant, - MinAlign(Alignment, IncrementSize), TBAAInfo); + MinAlign(Alignment, IncrementSize), AAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -470,7 +470,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { unsigned Alignment = St->getAlignment(); bool isVolatile = St->isVolatile(); bool isNonTemporal = St->isNonTemporal(); - const MDNode *TBAAInfo = St->getTBAAInfo(); + AAMDNodes AAInfo = St->getAAInfo(); assert(NVT.isByteSized() && "Expanded type not byte sized!"); unsigned IncrementSize = NVT.getSizeInBits() / 8; @@ -482,14 +482,14 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { std::swap(Lo, Hi); Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(), - isVolatile, isNonTemporal, Alignment, TBAAInfo); + isVolatile, isNonTemporal, Alignment, AAInfo); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getStore(Chain, dl, Hi, Ptr, St->getPointerInfo().getWithOffset(IncrementSize), isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize), TBAAInfo); + MinAlign(Alignment, IncrementSize), AAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 507e7ffb1d45..eac404c50365 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -199,12 +199,30 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { if (Op.getOpcode() == ISD::LOAD) { LoadSDNode *LD = cast<LoadSDNode>(Op.getNode()); ISD::LoadExtType ExtType = LD->getExtensionType(); - if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) { - if (TLI.isLoadExtLegal(LD->getExtensionType(), LD->getMemoryVT())) + if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) + switch (TLI.getLoadExtAction(LD->getExtensionType(), LD->getValueType(0), + LD->getMemoryVT())) { + default: llvm_unreachable("This action is not supported yet!"); + case TargetLowering::Legal: return TranslateLegalizeResults(Op, Result); - Changed = true; - return LegalizeOp(ExpandLoad(Op)); - } + case TargetLowering::Custom: + if (SDValue Lowered = TLI.LowerOperation(Result, DAG)) { + Changed = true; + if (Lowered->getNumValues() != Op->getNumValues()) { + // This expanded to something other than the load. Assume the + // lowering code took care of any chain values, and just handle the + // returned value. + assert(Result.getValue(1).use_empty() && + "There are still live users of the old chain!"); + return LegalizeOp(Lowered); + } else { + return TranslateLegalizeResults(Op, Lowered); + } + } + case TargetLowering::Expand: + Changed = true; + return LegalizeOp(ExpandLoad(Op)); + } } else if (Op.getOpcode() == ISD::STORE) { StoreSDNode *ST = cast<StoreSDNode>(Op.getNode()); EVT StVT = ST->getMemoryVT(); @@ -273,6 +291,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FP_TO_UINT: case ISD::FNEG: case ISD::FABS: + case ISD::FMINNUM: + case ISD::FMAXNUM: case ISD::FCOPYSIGN: case ISD::FSQRT: case ISD::FSIN: @@ -353,9 +373,11 @@ SDValue VectorLegalizer::Promote(SDValue Op) { return PromoteFP_TO_INT(Op, Op->getOpcode() == ISD::FP_TO_SINT); } - // The rest of the time, vector "promotion" is basically just bitcasting and - // doing the operation in a different type. For example, x86 promotes - // ISD::AND on v2i32 to v1i64. + // There are currently two cases of vector promotion: + // 1) Bitcasting a vector of integers to a different type to a vector of the + // same overall length. For example, x86 promotes ISD::AND on v2i32 to v1i64. + // 2) Extending a vector of floats to a vector of the same number oflarger + // floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32. MVT VT = Op.getSimpleValueType(); assert(Op.getNode()->getNumValues() == 1 && "Can't promote a vector with multiple results!"); @@ -365,14 +387,23 @@ SDValue VectorLegalizer::Promote(SDValue Op) { for (unsigned j = 0; j != Op.getNumOperands(); ++j) { if (Op.getOperand(j).getValueType().isVector()) - Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j)); + if (Op.getOperand(j) + .getValueType() + .getVectorElementType() + .isFloatingPoint()) + Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Op.getOperand(j)); + else + Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j)); else Operands[j] = Op.getOperand(j); } Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands); - - return DAG.getNode(ISD::BITCAST, dl, VT, Op); + if (VT.isFloatingPoint() || + (VT.isVector() && VT.getVectorElementType().isFloatingPoint())) + return DAG.getNode(ISD::FP_ROUND, dl, VT, Op, DAG.getIntPtrConstant(0)); + else + return DAG.getNode(ISD::BITCAST, dl, VT, Op); } SDValue VectorLegalizer::PromoteINT_TO_FP(SDValue Op) { @@ -480,7 +511,7 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { LD->getPointerInfo().getWithOffset(Offset), LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(), LD->getAlignment(), - LD->getTBAAInfo()); + LD->getAAInfo()); } else { EVT LoadVT = WideVT; while (RemainingBytes < LoadBytes) { @@ -490,8 +521,8 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR, LD->getPointerInfo().getWithOffset(Offset), LoadVT, LD->isVolatile(), - LD->isNonTemporal(), LD->getAlignment(), - LD->getTBAAInfo()); + LD->isNonTemporal(), LD->isInvariant(), + LD->getAlignment(), LD->getAAInfo()); } RemainingBytes -= LoadBytes; @@ -561,8 +592,8 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { Op.getNode()->getValueType(0).getScalarType(), Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride), SrcVT.getScalarType(), - LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment(), LD->getTBAAInfo()); + LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(), + LD->getAlignment(), LD->getAAInfo()); BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, DAG.getConstant(Stride, BasePTR.getValueType())); @@ -593,7 +624,7 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) { unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); - const MDNode *TBAAInfo = ST->getTBAAInfo(); + AAMDNodes AAInfo = ST->getAAInfo(); unsigned NumElem = StVT.getVectorNumElements(); // The type of the data we want to save @@ -621,7 +652,7 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) { // This scalar TruncStore may be illegal, but we legalize it later. SDValue Store = DAG.getTruncStore(Chain, dl, Ex, BasePTR, ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT, - isVolatile, isNonTemporal, Alignment, TBAAInfo); + isVolatile, isNonTemporal, Alignment, AAInfo); BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, DAG.getConstant(Stride, BasePTR.getValueType())); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index f77c592fddb7..96b69eec3354 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -69,8 +69,10 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::ANY_EXTEND: case ISD::BSWAP: case ISD::CTLZ: + case ISD::CTLZ_ZERO_UNDEF: case ISD::CTPOP: case ISD::CTTZ: + case ISD::CTTZ_ZERO_UNDEF: case ISD::FABS: case ISD::FCEIL: case ISD::FCOS: @@ -104,6 +106,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::FCOPYSIGN: case ISD::FDIV: case ISD::FMUL: + case ISD::FMINNUM: + case ISD::FMAXNUM: + case ISD::FPOW: case ISD::FREM: case ISD::FSUB: @@ -221,7 +226,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { N->getMemoryVT().getVectorElementType(), N->isVolatile(), N->isNonTemporal(), N->isInvariant(), N->getOriginalAlignment(), - N->getTBAAInfo()); + N->getAAInfo()); // Legalized the chain result - switch anything that used the old chain to // use the new one. @@ -232,7 +237,23 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) { // Get the dest type - it doesn't always match the input type, e.g. int_to_fp. EVT DestVT = N->getValueType(0).getVectorElementType(); - SDValue Op = GetScalarizedVector(N->getOperand(0)); + SDValue Op = N->getOperand(0); + EVT OpVT = Op.getValueType(); + SDLoc DL(N); + // The result needs scalarizing, but it's not a given that the source does. + // This is a workaround for targets where it's impossible to scalarize the + // result of a conversion, because the source type is legal. + // For instance, this happens on AArch64: v1i1 is illegal but v1i{8,16,32} + // are widened to v8i8, v4i16, and v2i32, which is legal, because v1i64 is + // legal and was not scalarized. + // See the similar logic in ScalarizeVecRes_VSETCC + if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) { + Op = GetScalarizedVector(Op); + } else { + EVT VT = OpVT.getVectorElementType(); + Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op, + DAG.getConstant(0, TLI.getVectorIdxTy())); + } return DAG.getNode(N->getOpcode(), SDLoc(N), DestVT, Op); } @@ -406,6 +427,10 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { case ISD::ZERO_EXTEND: case ISD::SIGN_EXTEND: case ISD::TRUNCATE: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: Res = ScalarizeVecOp_UnaryOp(N); break; case ISD::CONCAT_VECTORS: @@ -449,11 +474,11 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) { N->getValueType(0), Elt); } -/// ScalarizeVecOp_EXTEND - If the value to extend is a vector that needs -/// to be scalarized, it must be <1 x ty>. Extend the element instead. +/// ScalarizeVecOp_UnaryOp - If the input is a vector that needs to be +/// scalarized, it must be <1 x ty>. Do the operation on the element instead. SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) { assert(N->getValueType(0).getVectorNumElements() == 1 && - "Unexected vector type!"); + "Unexpected vector type!"); SDValue Elt = GetScalarizedVector(N->getOperand(0)); SDValue Op = DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0).getScalarType(), Elt); @@ -507,12 +532,12 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ N->getBasePtr(), N->getPointerInfo(), N->getMemoryVT().getVectorElementType(), N->isVolatile(), N->isNonTemporal(), - N->getAlignment(), N->getTBAAInfo()); + N->getAlignment(), N->getAAInfo()); return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)), N->getBasePtr(), N->getPointerInfo(), N->isVolatile(), N->isNonTemporal(), - N->getOriginalAlignment(), N->getTBAAInfo()); + N->getOriginalAlignment(), N->getAAInfo()); } /// ScalarizeVecOp_FP_ROUND - If the value to round is a vector that needs @@ -572,6 +597,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::LOAD: SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break; + case ISD::MLOAD: + SplitVecRes_MLOAD(cast<MaskedLoadSDNode>(N), Lo, Hi); + break; case ISD::SETCC: SplitVecRes_SETCC(N, Lo, Hi); break; @@ -625,6 +653,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FCOPYSIGN: case ISD::FSUB: case ISD::FMUL: + case ISD::FMINNUM: + case ISD::FMAXNUM: case ISD::SDIV: case ISD::UDIV: case ISD::FDIV: @@ -866,6 +896,10 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, return; } + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(0), true)) + return; + // Spill the vector to the stack. EVT VecVT = Vec.getValueType(); EVT EltVT = VecVT.getVectorElementType(); @@ -921,14 +955,14 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); bool isInvariant = LD->isInvariant(); - const MDNode *TBAAInfo = LD->getTBAAInfo(); + AAMDNodes AAInfo = LD->getAAInfo(); EVT LoMemVT, HiMemVT; std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset, LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal, - isInvariant, Alignment, TBAAInfo); + isInvariant, Alignment, AAInfo); unsigned IncrementSize = LoMemVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, @@ -936,7 +970,7 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, LD->getPointerInfo().getWithOffset(IncrementSize), HiMemVT, isVolatile, isNonTemporal, isInvariant, Alignment, - TBAAInfo); + AAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -948,6 +982,64 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, ReplaceValueWith(SDValue(LD, 1), Ch); } +void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, + SDValue &Lo, SDValue &Hi) { + EVT LoVT, HiVT; + SDLoc dl(MLD); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0)); + + SDValue Ch = MLD->getChain(); + SDValue Ptr = MLD->getBasePtr(); + SDValue Mask = MLD->getMask(); + unsigned Alignment = MLD->getOriginalAlignment(); + + // if Alignment is equal to the vector size, + // take the half of it for the second part + unsigned SecondHalfAlignment = + (Alignment == MLD->getValueType(0).getSizeInBits()/8) ? + Alignment/2 : Alignment; + + SDValue MaskLo, MaskHi; + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); + + EVT MemoryVT = MLD->getMemoryVT(); + EVT LoMemVT, HiMemVT; + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + + SDValue Src0 = MLD->getSrc0(); + SDValue Src0Lo, Src0Hi; + std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl); + + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(MLD->getPointerInfo(), + MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), + Alignment, MLD->getAAInfo(), MLD->getRanges()); + + Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, Src0Lo, MMO); + + unsigned IncrementSize = LoMemVT.getSizeInBits()/8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, Ptr.getValueType())); + + MMO = DAG.getMachineFunction(). + getMachineMemOperand(MLD->getPointerInfo(), + MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), + SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges()); + + Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, Src0Hi, MMO); + + + // Build a factor node to remember that this load is independent of the + // other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(MLD, 1), Ch); + +} + void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) { assert(N->getValueType(0).isVector() && N->getOperand(0).getValueType().isVector() && @@ -1203,6 +1295,9 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::STORE: Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo); break; + case ISD::MSTORE: + Res = SplitVecOp_MSTORE(cast<MaskedStoreSDNode>(N), OpNo); + break; case ISD::VSELECT: Res = SplitVecOp_VSELECT(N, OpNo); break; @@ -1347,6 +1442,10 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { Idx.getValueType())), 0); } + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(0), true)) + return SDValue(); + // Store the vector to the stack. EVT EltVT = VecVT.getVectorElementType(); SDLoc dl(N); @@ -1357,7 +1456,57 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { // Load back the required element. StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr, - MachinePointerInfo(), EltVT, false, false, 0); + MachinePointerInfo(), EltVT, false, false, false, 0); +} + +SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, + unsigned OpNo) { + SDValue Ch = N->getChain(); + SDValue Ptr = N->getBasePtr(); + SDValue Mask = N->getMask(); + SDValue Data = N->getData(); + EVT MemoryVT = N->getMemoryVT(); + unsigned Alignment = N->getOriginalAlignment(); + SDLoc DL(N); + + EVT LoMemVT, HiMemVT; + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + + SDValue DataLo, DataHi; + GetSplitVector(Data, DataLo, DataHi); + SDValue MaskLo, MaskHi; + GetSplitVector(Mask, MaskLo, MaskHi); + + // if Alignment is equal to the vector size, + // take the half of it for the second part + unsigned SecondHalfAlignment = + (Alignment == Data->getValueType(0).getSizeInBits()/8) ? + Alignment/2 : Alignment; + + SDValue Lo, Hi; + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(N->getPointerInfo(), + MachineMemOperand::MOStore, LoMemVT.getStoreSize(), + Alignment, N->getAAInfo(), N->getRanges()); + + Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, MaskLo, MMO); + + unsigned IncrementSize = LoMemVT.getSizeInBits()/8; + Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, Ptr.getValueType())); + + MMO = DAG.getMachineFunction(). + getMachineMemOperand(N->getPointerInfo(), + MachineMemOperand::MOStore, HiMemVT.getStoreSize(), + SecondHalfAlignment, N->getAAInfo(), N->getRanges()); + + Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, MMO); + + + // Build a factor node to remember that this store is independent of the + // other one. + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); + } SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { @@ -1372,7 +1521,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { unsigned Alignment = N->getOriginalAlignment(); bool isVol = N->isVolatile(); bool isNT = N->isNonTemporal(); - const MDNode *TBAAInfo = N->getTBAAInfo(); + AAMDNodes AAInfo = N->getAAInfo(); SDValue Lo, Hi; GetSplitVector(N->getOperand(1), Lo, Hi); @@ -1383,10 +1532,10 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { if (isTruncating) Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), - LoMemVT, isVol, isNT, Alignment, TBAAInfo); + LoMemVT, isVol, isNT, Alignment, AAInfo); else Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), - isVol, isNT, Alignment, TBAAInfo); + isVol, isNT, Alignment, AAInfo); // Increment the pointer to the other half. Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, @@ -1395,11 +1544,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { if (isTruncating) Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), - HiMemVT, isVol, isNT, Alignment, TBAAInfo); + HiMemVT, isVol, isNT, Alignment, AAInfo); else Hi = DAG.getStore(Ch, DL, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), - isVol, isNT, Alignment, TBAAInfo); + isVol, isNT, Alignment, AAInfo); return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); } @@ -1564,6 +1713,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::VECTOR_SHUFFLE: Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N)); break; + case ISD::MLOAD: + Res = WidenVecRes_MLOAD(cast<MaskedLoadSDNode>(N)); + break; case ISD::ADD: case ISD::AND: @@ -1573,6 +1725,8 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::OR: case ISD::SUB: case ISD::XOR: + case ISD::FMINNUM: + case ISD::FMAXNUM: Res = WidenVecRes_Binary(N); break; @@ -2252,6 +2406,48 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) { return Result; } +SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) { + + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),N->getValueType(0)); + SDValue Mask = N->getMask(); + EVT MaskVT = Mask.getValueType(); + SDValue Src0 = GetWidenedVector(N->getSrc0()); + SDLoc dl(N); + + if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector) + Mask = GetWidenedVector(Mask); + else { + EVT BoolVT = getSetCCResultType(WidenVT); + + // We can't use ModifyToType() because we should fill the mask with + // zeroes + unsigned WidenNumElts = BoolVT.getVectorNumElements(); + unsigned MaskNumElts = MaskVT.getVectorNumElements(); + + unsigned NumConcat = WidenNumElts / MaskNumElts; + SmallVector<SDValue, 16> Ops(NumConcat); + SDValue ZeroVal = DAG.getConstant(0, MaskVT); + Ops[0] = Mask; + for (unsigned i = 1; i != NumConcat; ++i) + Ops[i] = ZeroVal; + + Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops); + } + + // Rebuild memory operand because MemoryVT was changed + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(N->getPointerInfo(), + MachineMemOperand::MOLoad, WidenVT.getStoreSize(), + N->getAlignment(), N->getAAInfo(), N->getRanges()); + + SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(), + Mask, Src0, MMO); + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), @@ -2735,7 +2931,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); bool isInvariant = LD->isInvariant(); - const MDNode *TBAAInfo = LD->getTBAAInfo(); + AAMDNodes AAInfo = LD->getAAInfo(); int LdWidth = LdVT.getSizeInBits(); int WidthDiff = WidenWidth - LdWidth; // Difference @@ -2746,7 +2942,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, int NewVTWidth = NewVT.getSizeInBits(); SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(), isVolatile, isNonTemporal, isInvariant, Align, - TBAAInfo); + AAInfo); LdChain.push_back(LdOp.getValue(1)); // Check if we can load the element with one instruction @@ -2791,7 +2987,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, L = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo().getWithOffset(Offset), isVolatile, isNonTemporal, isInvariant, MinAlign(Align, Increment), - TBAAInfo); + AAInfo); LdChain.push_back(L.getValue(1)); if (L->getValueType(0).isVector()) { SmallVector<SDValue, 16> Loads; @@ -2807,7 +3003,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, L = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo().getWithOffset(Offset), isVolatile, isNonTemporal, isInvariant, MinAlign(Align, Increment), - TBAAInfo); + AAInfo); LdChain.push_back(L.getValue(1)); } @@ -2887,7 +3083,8 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain, unsigned Align = LD->getAlignment(); bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); - const MDNode *TBAAInfo = LD->getTBAAInfo(); + bool isInvariant = LD->isInvariant(); + AAMDNodes AAInfo = LD->getAAInfo(); EVT EltVT = WidenVT.getVectorElementType(); EVT LdEltVT = LdVT.getVectorElementType(); @@ -2899,7 +3096,8 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain, unsigned Increment = LdEltVT.getSizeInBits() / 8; Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, LD->getPointerInfo(), - LdEltVT, isVolatile, isNonTemporal, Align, TBAAInfo); + LdEltVT, isVolatile, isNonTemporal, isInvariant, + Align, AAInfo); LdChain.push_back(Ops[0].getValue(1)); unsigned i = 0, Offset = Increment; for (i=1; i < NumElts; ++i, Offset += Increment) { @@ -2909,7 +3107,8 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain, BasePtr.getValueType())); Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, LD->getPointerInfo().getWithOffset(Offset), LdEltVT, - isVolatile, isNonTemporal, Align, TBAAInfo); + isVolatile, isNonTemporal, isInvariant, Align, + AAInfo); LdChain.push_back(Ops[i].getValue(1)); } @@ -2932,7 +3131,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, unsigned Align = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); - const MDNode *TBAAInfo = ST->getTBAAInfo(); + AAMDNodes AAInfo = ST->getAAInfo(); SDValue ValOp = GetWidenedVector(ST->getValue()); SDLoc dl(ST); @@ -2959,7 +3158,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset), isVolatile, isNonTemporal, - MinAlign(Align, Offset), TBAAInfo)); + MinAlign(Align, Offset), AAInfo)); StWidth -= NewVTWidth; Offset += Increment; Idx += NumVTElts; @@ -2979,7 +3178,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset), isVolatile, isNonTemporal, - MinAlign(Align, Offset), TBAAInfo)); + MinAlign(Align, Offset), AAInfo)); StWidth -= NewVTWidth; Offset += Increment; BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, @@ -3001,7 +3200,7 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain, unsigned Align = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); - const MDNode *TBAAInfo = ST->getTBAAInfo(); + AAMDNodes AAInfo = ST->getAAInfo(); SDValue ValOp = GetWidenedVector(ST->getValue()); SDLoc dl(ST); @@ -3025,7 +3224,7 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain, StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, ST->getPointerInfo(), StEltVT, isVolatile, isNonTemporal, Align, - TBAAInfo)); + AAInfo)); unsigned Offset = Increment; for (unsigned i=1; i < NumElts; ++i, Offset += Increment) { SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), @@ -3036,7 +3235,7 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain, StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, ST->getPointerInfo().getWithOffset(Offset), StEltVT, isVolatile, isNonTemporal, - MinAlign(Align, Offset), TBAAInfo)); + MinAlign(Align, Offset), AAInfo)); } } diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp index 624003f5070e..db38b76cf93a 100644 --- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp +++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -27,6 +27,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -40,32 +41,29 @@ static cl::opt<signed> RegPressureThreshold( "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(5), cl::desc("Track reg pressure and switch priority to in-depth")); +ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) + : Picker(this), InstrItins(IS->MF->getSubtarget().getInstrItineraryData()) { + const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); + TRI = STI.getRegisterInfo(); + TLI = IS->TLI; + TII = STI.getInstrInfo(); + ResourcesModel = TII->CreateTargetScheduleState(STI); + // This hard requirement could be relaxed, but for now + // do not let it procede. + assert(ResourcesModel && "Unimplemented CreateTargetScheduleState."); + + unsigned NumRC = TRI->getNumRegClasses(); + RegLimit.resize(NumRC); + RegPressure.resize(NumRC); + std::fill(RegLimit.begin(), RegLimit.end(), 0); + std::fill(RegPressure.begin(), RegPressure.end(), 0); + for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), + E = TRI->regclass_end(); + I != E; ++I) + RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, *IS->MF); -ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) : - Picker(this), - InstrItins(IS->getTargetLowering()->getTargetMachine().getInstrItineraryData()) -{ - TII = IS->getTargetLowering()->getTargetMachine().getInstrInfo(); - TRI = IS->getTargetLowering()->getTargetMachine().getRegisterInfo(); - TLI = IS->getTargetLowering(); - - const TargetMachine &tm = (*IS->MF).getTarget(); - ResourcesModel = tm.getInstrInfo()->CreateTargetScheduleState(&tm,nullptr); - // This hard requirement could be relaxed, but for now - // do not let it procede. - assert (ResourcesModel && "Unimplemented CreateTargetScheduleState."); - - unsigned NumRC = TRI->getNumRegClasses(); - RegLimit.resize(NumRC); - RegPressure.resize(NumRC); - std::fill(RegLimit.begin(), RegLimit.end(), 0); - std::fill(RegPressure.begin(), RegPressure.end(), 0); - for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), - E = TRI->regclass_end(); I != E; ++I) - RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, *IS->MF); - - ParallelLiveRanges = 0; - HorizontalVerticalBalance = 0; + ParallelLiveRanges = 0; + HorizontalVerticalBalance = 0; } unsigned @@ -319,7 +317,7 @@ void ResourcePriorityQueue::reserveResources(SUnit *SU) { // If packet is now full, reset the state so in the next cycle // we start fresh. - if (Packet.size() >= InstrItins->SchedModel->IssueWidth) { + if (Packet.size() >= InstrItins->SchedModel.IssueWidth) { ResourcesModel->clearResources(); Packet.clear(); } diff --git a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h index ee5429283112..bce69d79ab12 100644 --- a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h +++ b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CODEGEN_SDNODEDBGVALUE_H -#define LLVM_CODEGEN_SDNODEDBGVALUE_H +#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_SDNODEDBGVALUE_H +#define LLVM_LIB_CODEGEN_SELECTIONDAG_SDNODEDBGVALUE_H #include "llvm/ADT/SmallVector.h" #include "llvm/IR/DebugLoc.h" @@ -44,7 +44,8 @@ private: const Value *Const; // valid for constants unsigned FrameIx; // valid for stack objects } u; - MDNode *mdPtr; + MDNode *Var; + MDNode *Expr; bool IsIndirect; uint64_t Offset; DebugLoc DL; @@ -52,69 +53,72 @@ private: bool Invalid; public: // Constructor for non-constants. - SDDbgValue(MDNode *mdP, SDNode *N, unsigned R, - bool indir, uint64_t off, DebugLoc dl, - unsigned O) : mdPtr(mdP), IsIndirect(indir), - Offset(off), DL(dl), Order(O), - Invalid(false) { + SDDbgValue(MDNode *Var, MDNode *Expr, SDNode *N, unsigned R, bool indir, + uint64_t off, DebugLoc dl, unsigned O) + : Var(Var), Expr(Expr), IsIndirect(indir), Offset(off), DL(dl), Order(O), + Invalid(false) { kind = SDNODE; u.s.Node = N; u.s.ResNo = R; } // Constructor for constants. - SDDbgValue(MDNode *mdP, const Value *C, uint64_t off, DebugLoc dl, - unsigned O) : - mdPtr(mdP), IsIndirect(false), Offset(off), DL(dl), Order(O), - Invalid(false) { + SDDbgValue(MDNode *Var, MDNode *Expr, const Value *C, uint64_t off, + DebugLoc dl, unsigned O) + : Var(Var), Expr(Expr), IsIndirect(false), Offset(off), DL(dl), Order(O), + Invalid(false) { kind = CONST; u.Const = C; } // Constructor for frame indices. - SDDbgValue(MDNode *mdP, unsigned FI, uint64_t off, DebugLoc dl, unsigned O) : - mdPtr(mdP), IsIndirect(false), Offset(off), DL(dl), Order(O), - Invalid(false) { + SDDbgValue(MDNode *Var, MDNode *Expr, unsigned FI, uint64_t off, DebugLoc dl, + unsigned O) + : Var(Var), Expr(Expr), IsIndirect(false), Offset(off), DL(dl), Order(O), + Invalid(false) { kind = FRAMEIX; u.FrameIx = FI; } // Returns the kind. - DbgValueKind getKind() { return kind; } + DbgValueKind getKind() const { return kind; } - // Returns the MDNode pointer. - MDNode *getMDPtr() { return mdPtr; } + // Returns the MDNode pointer for the variable. + MDNode *getVariable() const { return Var; } + + // Returns the MDNode pointer for the expression. + MDNode *getExpression() const { return Expr; } // Returns the SDNode* for a register ref - SDNode *getSDNode() { assert (kind==SDNODE); return u.s.Node; } + SDNode *getSDNode() const { assert (kind==SDNODE); return u.s.Node; } // Returns the ResNo for a register ref - unsigned getResNo() { assert (kind==SDNODE); return u.s.ResNo; } + unsigned getResNo() const { assert (kind==SDNODE); return u.s.ResNo; } // Returns the Value* for a constant - const Value *getConst() { assert (kind==CONST); return u.Const; } + const Value *getConst() const { assert (kind==CONST); return u.Const; } // Returns the FrameIx for a stack object - unsigned getFrameIx() { assert (kind==FRAMEIX); return u.FrameIx; } + unsigned getFrameIx() const { assert (kind==FRAMEIX); return u.FrameIx; } // Returns whether this is an indirect value. - bool isIndirect() { return IsIndirect; } + bool isIndirect() const { return IsIndirect; } // Returns the offset. - uint64_t getOffset() { return Offset; } + uint64_t getOffset() const { return Offset; } // Returns the DebugLoc. - DebugLoc getDebugLoc() { return DL; } + DebugLoc getDebugLoc() const { return DL; } // Returns the SDNodeOrder. This is the order of the preceding node in the // input. - unsigned getOrder() { return Order; } + unsigned getOrder() const { return Order; } // setIsInvalidated / isInvalidated - Setter / getter of the "Invalidated" // property. A SDDbgValue is invalid if the SDNode that produces the value is // deleted. void setIsInvalidated() { Invalid = true; } - bool isInvalidated() { return Invalid; } + bool isInvalidated() const { return Invalid; } }; } // end llvm namespace diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 4d8c2c78bce6..61a3fd728711 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -221,7 +221,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { SUnit *NewSU; bool TryUnfold = false; for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { - EVT VT = N->getValueType(i); + MVT VT = N->getSimpleValueType(i); if (VT == MVT::Glue) return nullptr; else if (VT == MVT::Other) @@ -229,7 +229,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { } for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { const SDValue &Op = N->getOperand(i); - EVT VT = Op.getNode()->getValueType(Op.getResNo()); + MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); if (VT == MVT::Glue) return nullptr; } @@ -431,17 +431,23 @@ void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, /// getPhysicalRegisterVT - Returns the ValueType of the physical register /// definition of the specified node. /// FIXME: Move to SelectionDAG? -static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, +static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const TargetInstrInfo *TII) { - const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); - assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); - unsigned NumRes = MCID.getNumDefs(); - for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { - if (Reg == *ImpDef) - break; - ++NumRes; + unsigned NumRes; + if (N->getOpcode() == ISD::CopyFromReg) { + // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type. + NumRes = 1; + } else { + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); + assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); + NumRes = MCID.getNumDefs(); + for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { + if (Reg == *ImpDef) + break; + ++NumRes; + } } - return N->getValueType(NumRes); + return N->getSimpleValueType(NumRes); } /// CheckForLiveRegDef - Return true and update live register vector if the @@ -454,7 +460,7 @@ static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg, bool Added = false; for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { if (LiveRegDefs[*AI] && LiveRegDefs[*AI] != SU) { - if (RegAdded.insert(*AI)) { + if (RegAdded.insert(*AI).second) { LRegs.push_back(*AI); Added = true; } @@ -572,7 +578,7 @@ void ScheduleDAGFast::ListScheduleBottomUp() { assert(LRegs.size() == 1 && "Can't handle this yet!"); unsigned Reg = LRegs[0]; SUnit *LRDef = LiveRegDefs[Reg]; - EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); + MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index dedca41c3aab..8b54e6568b9e 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -30,8 +30,8 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include <climits> using namespace llvm; @@ -166,12 +166,11 @@ public: NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0), Topo(SUnits, nullptr) { - const TargetMachine &tm = mf.getTarget(); + const TargetSubtargetInfo &STI = mf.getSubtarget(); if (DisableSchedCycles || !NeedLatency) HazardRec = new ScheduleHazardRecognizer(); else - HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer( - tm.getSubtargetImpl(), this); + HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this); } ~ScheduleDAGRRList() { @@ -946,7 +945,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { SUnit *NewSU; bool TryUnfold = false; for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { - EVT VT = N->getValueType(i); + MVT VT = N->getSimpleValueType(i); if (VT == MVT::Glue) return nullptr; else if (VT == MVT::Other) @@ -954,7 +953,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { } for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { const SDValue &Op = N->getOperand(i); - EVT VT = Op.getNode()->getValueType(Op.getResNo()); + MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); if (VT == MVT::Glue) return nullptr; } @@ -1189,17 +1188,23 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, /// getPhysicalRegisterVT - Returns the ValueType of the physical register /// definition of the specified node. /// FIXME: Move to SelectionDAG? -static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, +static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const TargetInstrInfo *TII) { - const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); - assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); - unsigned NumRes = MCID.getNumDefs(); - for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { - if (Reg == *ImpDef) - break; - ++NumRes; + unsigned NumRes; + if (N->getOpcode() == ISD::CopyFromReg) { + // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type. + NumRes = 1; + } else { + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); + assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); + NumRes = MCID.getNumDefs(); + for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { + if (Reg == *ImpDef) + break; + ++NumRes; + } } - return N->getValueType(NumRes); + return N->getSimpleValueType(NumRes); } /// CheckForLiveRegDef - Return true and update live register vector if the @@ -1218,7 +1223,7 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, if (LiveRegDefs[*AliasI] == SU) continue; // Add Reg to the set of interfering live regs. - if (RegAdded.insert(*AliasI)) { + if (RegAdded.insert(*AliasI).second) { LRegs.push_back(*AliasI); } } @@ -1235,7 +1240,7 @@ static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask, if (!LiveRegDefs[i]) continue; if (LiveRegDefs[i] == SU) continue; if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue; - if (RegAdded.insert(i)) + if (RegAdded.insert(i).second) LRegs.push_back(i); } } @@ -1310,7 +1315,8 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) { SDNode *Gen = LiveRegGens[CallResource]->getNode(); while (SDNode *Glued = Gen->getGluedNode()) Gen = Glued; - if (!IsChainDependent(Gen, Node, 0, TII) && RegAdded.insert(CallResource)) + if (!IsChainDependent(Gen, Node, 0, TII) && + RegAdded.insert(CallResource).second) LRegs.push_back(CallResource); } } @@ -1439,7 +1445,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { assert(LRegs.size() == 1 && "Can't handle this yet!"); unsigned Reg = LRegs[0]; SUnit *LRDef = LiveRegDefs[Reg]; - EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); + MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); @@ -1930,8 +1936,8 @@ void RegReductionPQBase::dumpRegPressure() const { unsigned Id = RC->getID(); unsigned RP = RegPressure[Id]; if (!RP) continue; - DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id] - << '\n'); + DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / " + << RegLimit[Id] << '\n'); } #endif } @@ -2754,7 +2760,7 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, if (!SUImpDefs && !SURegMask) continue; for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { - EVT VT = N->getValueType(i); + MVT VT = N->getSimpleValueType(i); if (VT == MVT::Glue || VT == MVT::Other) continue; if (!N->hasAnyUseOfValue(i)) @@ -2977,9 +2983,9 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() { llvm::ScheduleDAGSDNodes * llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel) { - const TargetMachine &TM = IS->TM; - const TargetInstrInfo *TII = TM.getInstrInfo(); - const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); + const TargetInstrInfo *TII = STI.getInstrInfo(); + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); BURegReductionPriorityQueue *PQ = new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, nullptr); @@ -2991,9 +2997,9 @@ llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, llvm::ScheduleDAGSDNodes * llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel) { - const TargetMachine &TM = IS->TM; - const TargetInstrInfo *TII = TM.getInstrInfo(); - const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); + const TargetInstrInfo *TII = STI.getInstrInfo(); + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); SrcRegReductionPriorityQueue *PQ = new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, nullptr); @@ -3005,10 +3011,10 @@ llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, llvm::ScheduleDAGSDNodes * llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel) { - const TargetMachine &TM = IS->TM; - const TargetInstrInfo *TII = TM.getInstrInfo(); - const TargetRegisterInfo *TRI = TM.getRegisterInfo(); - const TargetLowering *TLI = IS->getTargetLowering(); + const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); + const TargetInstrInfo *TII = STI.getInstrInfo(); + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + const TargetLowering *TLI = IS->TLI; HybridBURRPriorityQueue *PQ = new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI); @@ -3021,10 +3027,10 @@ llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, llvm::ScheduleDAGSDNodes * llvm::createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel) { - const TargetMachine &TM = IS->TM; - const TargetInstrInfo *TII = TM.getInstrInfo(); - const TargetRegisterInfo *TRI = TM.getRegisterInfo(); - const TargetLowering *TLI = IS->getTargetLowering(); + const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); + const TargetInstrInfo *TII = STI.getInstrInfo(); + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + const TargetLowering *TLI = IS->TLI; ILPBURRPriorityQueue *PQ = new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index de910b7c861b..f2b18fc84bf5 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -29,7 +29,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -38,17 +37,17 @@ using namespace llvm; STATISTIC(LoadsClustered, "Number of loads clustered together"); -// This allows latency based scheduler to notice high latency instructions -// without a target itinerary. The choise if number here has more to do with -// balancing scheduler heursitics than with the actual machine latency. +// This allows the latency-based scheduler to notice high latency instructions +// without a target itinerary. The choice of number here has more to do with +// balancing scheduler heuristics than with the actual machine latency. static cl::opt<int> HighLatencyCycles( "sched-high-latency-cycles", cl::Hidden, cl::init(10), cl::desc("Roughly estimate the number of cycles that 'long latency'" "instructions take for targets with no itinerary")); ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf) - : ScheduleDAG(mf), BB(nullptr), DAG(nullptr), - InstrItins(mf.getTarget().getInstrItineraryData()) {} + : ScheduleDAG(mf), BB(nullptr), DAG(nullptr), + InstrItins(mf.getSubtarget().getInstrItineraryData()) {} /// Run - perform scheduling. /// @@ -120,15 +119,20 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, return; unsigned ResNo = User->getOperand(2).getResNo(); - if (Def->isMachineOpcode()) { + if (Def->getOpcode() == ISD::CopyFromReg && + cast<RegisterSDNode>(Def->getOperand(1))->getReg() == Reg) { + PhysReg = Reg; + } else if (Def->isMachineOpcode()) { const MCInstrDesc &II = TII->get(Def->getMachineOpcode()); if (ResNo >= II.getNumDefs() && - II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) { + II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) PhysReg = Reg; - const TargetRegisterClass *RC = - TRI->getMinimalPhysRegClass(Reg, Def->getValueType(ResNo)); - Cost = RC->getCopyCost(); - } + } + + if (PhysReg != 0) { + const TargetRegisterClass *RC = + TRI->getMinimalPhysRegClass(Reg, Def->getSimpleValueType(ResNo)); + Cost = RC->getCopyCost(); } } @@ -136,7 +140,7 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG, SmallVectorImpl<EVT> &VTs, SDValue ExtraOper = SDValue()) { - SmallVector<SDValue, 4> Ops; + SmallVector<SDValue, 8> Ops; for (unsigned I = 0, E = N->getNumOperands(); I != E; ++I) Ops.push_back(N->getOperand(I)); @@ -226,7 +230,7 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end(); I != E && UseCount < 100; ++I, ++UseCount) { SDNode *User = *I; - if (User == Node || !Visited.insert(User)) + if (User == Node || !Visited.insert(User).second) continue; int64_t Offset1, Offset2; if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) || @@ -339,7 +343,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { // Add all operands to the worklist unless they've already been added. for (unsigned i = 0, e = NI->getNumOperands(); i != e; ++i) - if (Visited.insert(NI->getOperand(i).getNode())) + if (Visited.insert(NI->getOperand(i).getNode()).second) Worklist.push_back(NI->getOperand(i).getNode()); if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate. @@ -425,7 +429,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { } void ScheduleDAGSDNodes::AddSchedEdges() { - const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); + const TargetSubtargetInfo &ST = MF.getSubtarget(); // Check to see if the scheduler cares about latencies. bool UnitLatencies = forceUnitLatencies(); @@ -547,6 +551,14 @@ void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() { NodeNumDefs = 0; return; } + if (POpc == TargetOpcode::PATCHPOINT && + Node->getValueType(0) == MVT::Other) { + // PATCHPOINT is defined to have one result, but it might really have none + // if we're not using CallingConv::AnyReg. Don't mistake the chain for a + // real definition. + NodeNumDefs = 0; + return; + } unsigned NRegDefs = SchedDAG->TII->get(Node->getMachineOpcode()).getNumDefs(); // Some instructions define regs that are not represented in the selection DAG // (e.g. unused flags). See tMOVi8. Make sure we don't access past NumValues. @@ -733,7 +745,7 @@ ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, SmallVectorImpl<std::pair<unsigned, MachineInstr*> > &Orders, SmallSet<unsigned, 8> &Seen) { unsigned Order = N->getIROrder(); - if (!Order || !Seen.insert(Order)) { + if (!Order || !Seen.insert(Order).second) { // Process any valid SDDbgValues even if node does not have any order // assigned. ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, 0); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 39ebadf3011c..2cd1f4b9bd47 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef SCHEDULEDAGSDNODES_H -#define SCHEDULEDAGSDNODES_H +#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_SCHEDULEDAGSDNODES_H +#define LLVM_LIB_CODEGEN_SELECTIONDAG_SCHEDULEDAGSDNODES_H #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/ScheduleDAG.h" diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp index 4589b0c35dc3..418b58eda394 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -31,6 +31,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include <climits> using namespace llvm; @@ -71,10 +72,8 @@ public: AliasAnalysis *aa, SchedulingPriorityQueue *availqueue) : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) { - - const TargetMachine &tm = mf.getTarget(); - HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer( - tm.getSubtargetImpl(), this); + const TargetSubtargetInfo &STI = mf.getSubtarget(); + HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this); } ~ScheduleDAGVLIW() { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index a5555364c82a..c819516eca04 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -46,6 +46,7 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSelectionDAGInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> #include <cmath> @@ -95,7 +96,7 @@ bool ConstantFPSDNode::isValueValidForType(EVT VT, /// BUILD_VECTOR where all of the elements are ~0 or undef. bool ISD::isBuildVectorAllOnes(const SDNode *N) { // Look through a bit convert. - if (N->getOpcode() == ISD::BITCAST) + while (N->getOpcode() == ISD::BITCAST) N = N->getOperand(0).getNode(); if (N->getOpcode() != ISD::BUILD_VECTOR) return false; @@ -143,7 +144,7 @@ bool ISD::isBuildVectorAllOnes(const SDNode *N) { /// BUILD_VECTOR where all of the elements are 0 or undef. bool ISD::isBuildVectorAllZeros(const SDNode *N) { // Look through a bit convert. - if (N->getOpcode() == ISD::BITCAST) + while (N->getOpcode() == ISD::BITCAST) N = N->getOperand(0).getNode(); if (N->getOpcode() != ISD::BUILD_VECTOR) return false; @@ -233,10 +234,10 @@ bool ISD::allOperandsUndef(const SDNode *N) { return true; } -ISD::NodeType ISD::getExtForLoadExtType(ISD::LoadExtType ExtType) { +ISD::NodeType ISD::getExtForLoadExtType(bool IsFP, ISD::LoadExtType ExtType) { switch (ExtType) { case ISD::EXTLOAD: - return ISD::ANY_EXTEND; + return IsFP ? ISD::FP_EXTEND : ISD::ANY_EXTEND; case ISD::SEXTLOAD: return ISD::SIGN_EXTEND; case ISD::ZEXTLOAD: @@ -686,6 +687,15 @@ void SelectionDAG::DeleteNodeNotInCSEMaps(SDNode *N) { DeallocateNode(N); } +void SDDbgInfo::erase(const SDNode *Node) { + DbgValMapType::iterator I = DbgValMap.find(Node); + if (I == DbgValMap.end()) + return; + for (auto &Val: I->second) + Val->setIsInvalidated(); + DbgValMap.erase(I); +} + void SelectionDAG::DeallocateNode(SDNode *N) { if (N->OperandsNeedDelete) delete[] N->OperandList; @@ -696,10 +706,60 @@ void SelectionDAG::DeallocateNode(SDNode *N) { NodeAllocator.Deallocate(AllNodes.remove(N)); - // If any of the SDDbgValue nodes refer to this SDNode, invalidate them. - ArrayRef<SDDbgValue*> DbgVals = DbgInfo->getSDDbgValues(N); - for (unsigned i = 0, e = DbgVals.size(); i != e; ++i) - DbgVals[i]->setIsInvalidated(); + // If any of the SDDbgValue nodes refer to this SDNode, invalidate + // them and forget about that node. + DbgInfo->erase(N); +} + +#ifndef NDEBUG +/// VerifySDNode - Sanity check the given SDNode. Aborts if it is invalid. +static void VerifySDNode(SDNode *N) { + switch (N->getOpcode()) { + default: + break; + case ISD::BUILD_PAIR: { + EVT VT = N->getValueType(0); + assert(N->getNumValues() == 1 && "Too many results!"); + assert(!VT.isVector() && (VT.isInteger() || VT.isFloatingPoint()) && + "Wrong return type!"); + assert(N->getNumOperands() == 2 && "Wrong number of operands!"); + assert(N->getOperand(0).getValueType() == N->getOperand(1).getValueType() && + "Mismatched operand types!"); + assert(N->getOperand(0).getValueType().isInteger() == VT.isInteger() && + "Wrong operand type!"); + assert(VT.getSizeInBits() == 2 * N->getOperand(0).getValueSizeInBits() && + "Wrong return type size"); + break; + } + case ISD::BUILD_VECTOR: { + assert(N->getNumValues() == 1 && "Too many results!"); + assert(N->getValueType(0).isVector() && "Wrong return type!"); + assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() && + "Wrong number of operands!"); + EVT EltVT = N->getValueType(0).getVectorElementType(); + for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) { + assert((I->getValueType() == EltVT || + (EltVT.isInteger() && I->getValueType().isInteger() && + EltVT.bitsLE(I->getValueType()))) && + "Wrong operand type!"); + assert(I->getValueType() == N->getOperand(0).getValueType() && + "Operands must all have the same type"); + } + break; + } + } +} +#endif // NDEBUG + +/// \brief Insert a newly allocated node into the DAG. +/// +/// Handles insertion into the all nodes list and CSE map, as well as +/// verification and other common operations when a new node is allocated. +void SelectionDAG::InsertNode(SDNode *N) { + AllNodes.push_back(N); +#ifndef NDEBUG + VerifySDNode(N); +#endif } /// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that @@ -839,83 +899,6 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, ArrayRef<SDValue> Ops, return Node; } -#ifndef NDEBUG -/// VerifyNodeCommon - Sanity check the given node. Aborts if it is invalid. -static void VerifyNodeCommon(SDNode *N) { - switch (N->getOpcode()) { - default: - break; - case ISD::BUILD_PAIR: { - EVT VT = N->getValueType(0); - assert(N->getNumValues() == 1 && "Too many results!"); - assert(!VT.isVector() && (VT.isInteger() || VT.isFloatingPoint()) && - "Wrong return type!"); - assert(N->getNumOperands() == 2 && "Wrong number of operands!"); - assert(N->getOperand(0).getValueType() == N->getOperand(1).getValueType() && - "Mismatched operand types!"); - assert(N->getOperand(0).getValueType().isInteger() == VT.isInteger() && - "Wrong operand type!"); - assert(VT.getSizeInBits() == 2 * N->getOperand(0).getValueSizeInBits() && - "Wrong return type size"); - break; - } - case ISD::BUILD_VECTOR: { - assert(N->getNumValues() == 1 && "Too many results!"); - assert(N->getValueType(0).isVector() && "Wrong return type!"); - assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() && - "Wrong number of operands!"); - EVT EltVT = N->getValueType(0).getVectorElementType(); - for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) { - assert((I->getValueType() == EltVT || - (EltVT.isInteger() && I->getValueType().isInteger() && - EltVT.bitsLE(I->getValueType()))) && - "Wrong operand type!"); - assert(I->getValueType() == N->getOperand(0).getValueType() && - "Operands must all have the same type"); - } - break; - } - } -} - -/// VerifySDNode - Sanity check the given SDNode. Aborts if it is invalid. -static void VerifySDNode(SDNode *N) { - // The SDNode allocators cannot be used to allocate nodes with fields that are - // not present in an SDNode! - assert(!isa<MemSDNode>(N) && "Bad MemSDNode!"); - assert(!isa<ShuffleVectorSDNode>(N) && "Bad ShuffleVectorSDNode!"); - assert(!isa<ConstantSDNode>(N) && "Bad ConstantSDNode!"); - assert(!isa<ConstantFPSDNode>(N) && "Bad ConstantFPSDNode!"); - assert(!isa<GlobalAddressSDNode>(N) && "Bad GlobalAddressSDNode!"); - assert(!isa<FrameIndexSDNode>(N) && "Bad FrameIndexSDNode!"); - assert(!isa<JumpTableSDNode>(N) && "Bad JumpTableSDNode!"); - assert(!isa<ConstantPoolSDNode>(N) && "Bad ConstantPoolSDNode!"); - assert(!isa<BasicBlockSDNode>(N) && "Bad BasicBlockSDNode!"); - assert(!isa<SrcValueSDNode>(N) && "Bad SrcValueSDNode!"); - assert(!isa<MDNodeSDNode>(N) && "Bad MDNodeSDNode!"); - assert(!isa<RegisterSDNode>(N) && "Bad RegisterSDNode!"); - assert(!isa<BlockAddressSDNode>(N) && "Bad BlockAddressSDNode!"); - assert(!isa<EHLabelSDNode>(N) && "Bad EHLabelSDNode!"); - assert(!isa<ExternalSymbolSDNode>(N) && "Bad ExternalSymbolSDNode!"); - assert(!isa<CondCodeSDNode>(N) && "Bad CondCodeSDNode!"); - assert(!isa<CvtRndSatSDNode>(N) && "Bad CvtRndSatSDNode!"); - assert(!isa<VTSDNode>(N) && "Bad VTSDNode!"); - assert(!isa<MachineSDNode>(N) && "Bad MachineSDNode!"); - - VerifyNodeCommon(N); -} - -/// VerifyMachineNode - Sanity check the given MachineNode. Aborts if it is -/// invalid. -static void VerifyMachineNode(SDNode *N) { - // The MachineNode allocators cannot be used to allocate nodes with fields - // that are not present in a MachineNode! - // Currently there are no such nodes. - - VerifyNodeCommon(N); -} -#endif // NDEBUG - /// getEVTAlignment - Compute the default alignment value for the /// given type. /// @@ -924,22 +907,23 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const { PointerType::get(Type::getInt8Ty(*getContext()), 0) : VT.getTypeForEVT(*getContext()); - return TM.getTargetLowering()->getDataLayout()->getABITypeAlignment(Ty); + return TLI->getDataLayout()->getABITypeAlignment(Ty); } // EntryNode could meaningfully have debug info if we can find it... SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) - : TM(tm), TSI(*tm.getSelectionDAGInfo()), TLI(nullptr), OptLevel(OL), - EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)), - Root(getEntryNode()), NewNodesMustHaveLegalTypes(false), - UpdateListeners(nullptr) { + : TM(tm), TSI(nullptr), TLI(nullptr), OptLevel(OL), + EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)), + Root(getEntryNode()), NewNodesMustHaveLegalTypes(false), + UpdateListeners(nullptr) { AllNodes.push_back(&EntryNode); DbgInfo = new SDDbgInfo(); } -void SelectionDAG::init(MachineFunction &mf, const TargetLowering *tli) { +void SelectionDAG::init(MachineFunction &mf) { MF = &mf; - TLI = tli; + TLI = getSubtarget().getTargetLowering(); + TSI = getSubtarget().getSelectionDAGInfo(); Context = &mf.getFunction()->getContext(); } @@ -1108,8 +1092,6 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT, EVT EltVT = VT.getScalarType(); const ConstantInt *Elt = &Val; - const TargetLowering *TLI = TM.getTargetLowering(); - // In some cases the vector type is legal but the element type is illegal and // needs to be promoted, for example v8i8 on ARM. In this case, promote the // inserted value (the type does not need to match the vector element type). @@ -1185,7 +1167,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT, if (!N) { N = new (NodeAllocator) ConstantSDNode(isT, isO, Elt, EltVT); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); } SDValue Result(N, 0); @@ -1198,7 +1180,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT, } SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, bool isTarget) { - return getConstant(Val, TM.getTargetLowering()->getPointerTy(), isTarget); + return getConstant(Val, TLI->getPointerTy(), isTarget); } @@ -1227,7 +1209,7 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){ if (!N) { N = new (NodeAllocator) ConstantFPSDNode(isTarget, &V, EltVT); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); } SDValue Result(N, 0); @@ -1263,7 +1245,6 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL, unsigned char TargetFlags) { assert((TargetFlags == 0 || isTargetGA) && "Cannot set target flags on target-independent globals"); - const TargetLowering *TLI = TM.getTargetLowering(); // Truncate (with sign-extension) the offset value to the pointer size. unsigned BitWidth = TLI->getPointerTypeSizeInBits(GV->getType()); @@ -1290,7 +1271,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL, DL.getDebugLoc(), GV, VT, Offset, TargetFlags); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1305,7 +1286,7 @@ SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) { SDNode *N = new (NodeAllocator) FrameIndexSDNode(FI, VT, isTarget); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1325,7 +1306,7 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget, SDNode *N = new (NodeAllocator) JumpTableSDNode(JTI, VT, isTarget, TargetFlags); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1336,8 +1317,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); if (Alignment == 0) - Alignment = - TM.getTargetLowering()->getDataLayout()->getPrefTypeAlignment(C->getType()); + Alignment = TLI->getDataLayout()->getPrefTypeAlignment(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, getVTList(VT), None); @@ -1352,7 +1332,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment, TargetFlags); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1364,8 +1344,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); if (Alignment == 0) - Alignment = - TM.getTargetLowering()->getDataLayout()->getPrefTypeAlignment(C->getType()); + Alignment = TLI->getDataLayout()->getPrefTypeAlignment(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, getVTList(VT), None); @@ -1380,7 +1359,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment, TargetFlags); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1398,7 +1377,7 @@ SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset, SDNode *N = new (NodeAllocator) TargetIndexSDNode(Index, VT, Offset, TargetFlags); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1412,7 +1391,7 @@ SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) { SDNode *N = new (NodeAllocator) BasicBlockSDNode(MBB); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1426,7 +1405,7 @@ SDValue SelectionDAG::getValueType(EVT VT) { if (N) return SDValue(N, 0); N = new (NodeAllocator) VTSDNode(VT); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1434,7 +1413,7 @@ SDValue SelectionDAG::getExternalSymbol(const char *Sym, EVT VT) { SDNode *&N = ExternalSymbols[Sym]; if (N) return SDValue(N, 0); N = new (NodeAllocator) ExternalSymbolSDNode(false, Sym, 0, VT); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1445,7 +1424,7 @@ SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT, TargetFlags)]; if (N) return SDValue(N, 0); N = new (NodeAllocator) ExternalSymbolSDNode(true, Sym, TargetFlags, VT); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1456,7 +1435,7 @@ SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) { if (!CondCodeNodes[Cond]) { CondCodeSDNode *N = new (NodeAllocator) CondCodeSDNode(Cond); CondCodeNodes[Cond] = N; - AllNodes.push_back(N); + InsertNode(N); } return SDValue(CondCodeNodes[Cond], 0); @@ -1594,7 +1573,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, dl.getDebugLoc(), N1, N2, MaskAlloc); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1640,7 +1619,7 @@ SDValue SelectionDAG::getConvertRndSat(EVT VT, SDLoc dl, dl.getDebugLoc(), Ops, Code); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1654,7 +1633,7 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) { SDNode *N = new (NodeAllocator) RegisterSDNode(RegNo, VT); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1668,7 +1647,7 @@ SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) { SDNode *N = new (NodeAllocator) RegisterMaskSDNode(RegMask); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1684,7 +1663,7 @@ SDValue SelectionDAG::getEHLabel(SDLoc dl, SDValue Root, MCSymbol *Label) { SDNode *N = new (NodeAllocator) EHLabelSDNode(dl.getIROrder(), dl.getDebugLoc(), Root, Label); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1707,7 +1686,7 @@ SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT, SDNode *N = new (NodeAllocator) BlockAddressSDNode(Opc, VT, BA, Offset, TargetFlags); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1725,7 +1704,7 @@ SDValue SelectionDAG::getSrcValue(const Value *V) { SDNode *N = new (NodeAllocator) SrcValueSDNode(V); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1741,7 +1720,7 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) { SDNode *N = new (NodeAllocator) MDNodeSDNode(MD); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1762,7 +1741,7 @@ SDValue SelectionDAG::getAddrSpaceCast(SDLoc dl, EVT VT, SDValue Ptr, dl.getDebugLoc(), VT, Ptr, SrcAS, DestAS); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -1770,7 +1749,7 @@ SDValue SelectionDAG::getAddrSpaceCast(SDLoc dl, EVT VT, SDValue Ptr, /// the target's desired shift amount type. SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) { EVT OpTy = Op.getValueType(); - EVT ShTy = TM.getTargetLowering()->getShiftAmountTy(LHSTy); + EVT ShTy = TLI->getShiftAmountTy(LHSTy); if (OpTy == ShTy || OpTy.isVector()) return Op; ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ? ISD::TRUNCATE : ISD::ZERO_EXTEND; @@ -1783,7 +1762,6 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) { MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo(); unsigned ByteSize = VT.getStoreSize(); Type *Ty = VT.getTypeForEVT(*getContext()); - const TargetLowering *TLI = TM.getTargetLowering(); unsigned StackAlign = std::max((unsigned)TLI->getDataLayout()->getPrefTypeAlignment(Ty), minAlign); @@ -1798,7 +1776,6 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { VT2.getStoreSizeInBits())/8; Type *Ty1 = VT1.getTypeForEVT(*getContext()); Type *Ty2 = VT2.getTypeForEVT(*getContext()); - const TargetLowering *TLI = TM.getTargetLowering(); const DataLayout *TD = TLI->getDataLayout(); unsigned Align = std::max(TD->getPrefTypeAlignment(Ty1), TD->getPrefTypeAlignment(Ty2)); @@ -1817,7 +1794,6 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, case ISD::SETFALSE2: return getConstant(0, VT); case ISD::SETTRUE: case ISD::SETTRUE2: { - const TargetLowering *TLI = TM.getTargetLowering(); TargetLowering::BooleanContent Cnt = TLI->getBooleanContents(N1->getValueType(0)); return getConstant( @@ -1906,7 +1882,7 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, // Ensure that the constant occurs on the RHS. ISD::CondCode SwappedCond = ISD::getSetCCSwappedOperands(Cond); MVT CompVT = N1.getValueType().getSimpleVT(); - if (!TM.getTargetLowering()->isCondCodeLegal(SwappedCond, CompVT)) + if (!TLI->isCondCodeLegal(SwappedCond, CompVT)) return SDValue(); return getSetCC(dl, VT, N2, N1, SwappedCond); @@ -1942,7 +1918,6 @@ bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask, /// them in the KnownZero/KnownOne bitsets. void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, APInt &KnownOne, unsigned Depth) const { - const TargetLowering *TLI = TM.getTargetLowering(); unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything. @@ -2378,7 +2353,6 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, /// information. For example, immediately after an "SRA X, 2", we know that /// the top 3 bits are all equal to each other, so we return 3. unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ - const TargetLowering *TLI = TM.getTargetLowering(); EVT VT = Op.getValueType(); assert(VT.isInteger() && "Invalid VT!"); unsigned VTBits = VT.getScalarType().getSizeInBits(); @@ -2676,10 +2650,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT) { DL.getDebugLoc(), getVTList(VT)); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); -#ifndef NDEBUG - VerifySDNode(N); -#endif + InsertNode(N); return SDValue(N, 0); } @@ -2712,6 +2683,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, return getConstantFP(apf, VT); } case ISD::BITCAST: + if (VT == MVT::f16 && C->getValueType(0) == MVT::i16) + return getConstantFP(APFloat(APFloat::IEEEhalf, Val), VT); if (VT == MVT::f32 && C->getValueType(0) == MVT::i32) return getConstantFP(APFloat(APFloat::IEEEsingle, Val), VT); else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64) @@ -2774,7 +2747,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, case ISD::FP_TO_UINT: { integerPart x[2]; bool ignored; - assert(integerPartWidth >= 64); + static_assert(integerPartWidth >= 64, "APFloat parts too small!"); // FIXME need to be more flexible about rounding mode. APFloat::opStatus s = V.convertToInteger(x, VT.getSizeInBits(), Opcode==ISD::FP_TO_SINT, @@ -2785,7 +2758,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, return getConstant(api, VT); } case ISD::BITCAST: - if (VT == MVT::i32 && C->getValueType(0) == MVT::f32) + if (VT == MVT::i16 && C->getValueType(0) == MVT::f16) + return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), VT); + else if (VT == MVT::i32 && C->getValueType(0) == MVT::f32) return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT); else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64) return getConstant(V.bitcastToAPInt().getZExtValue(), VT); @@ -2977,10 +2952,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, DL.getDebugLoc(), VTs, Operand); } - AllNodes.push_back(N); -#ifndef NDEBUG - VerifySDNode(N); -#endif + InsertNode(N); return SDValue(N, 0); } @@ -3411,8 +3383,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, } // Perform trivial constant folding. - SDValue SV = FoldConstantArithmetic(Opcode, VT, N1.getNode(), N2.getNode()); - if (SV.getNode()) return SV; + if (SDValue SV = + FoldConstantArithmetic(Opcode, VT, N1.getNode(), N2.getNode())) + return SV; // Canonicalize constant to RHS if commutative. if (N1C && !N2C && isCommutativeBinOp(Opcode)) { @@ -3421,6 +3394,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, } // Constant fold FP operations. + bool HasFPExceptions = TLI->hasFloatingPointExceptions(); ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1.getNode()); ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2.getNode()); if (N1CFP) { @@ -3434,28 +3408,32 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, switch (Opcode) { case ISD::FADD: s = V1.add(V2, APFloat::rmNearestTiesToEven); - if (s != APFloat::opInvalidOp) + if (!HasFPExceptions || s != APFloat::opInvalidOp) return getConstantFP(V1, VT); break; case ISD::FSUB: s = V1.subtract(V2, APFloat::rmNearestTiesToEven); - if (s!=APFloat::opInvalidOp) + if (!HasFPExceptions || s!=APFloat::opInvalidOp) return getConstantFP(V1, VT); break; case ISD::FMUL: s = V1.multiply(V2, APFloat::rmNearestTiesToEven); - if (s!=APFloat::opInvalidOp) + if (!HasFPExceptions || s!=APFloat::opInvalidOp) return getConstantFP(V1, VT); break; case ISD::FDIV: s = V1.divide(V2, APFloat::rmNearestTiesToEven); - if (s!=APFloat::opInvalidOp && s!=APFloat::opDivByZero) + if (!HasFPExceptions || (s!=APFloat::opInvalidOp && + s!=APFloat::opDivByZero)) { return getConstantFP(V1, VT); + } break; case ISD::FREM : s = V1.mod(V2, APFloat::rmNearestTiesToEven); - if (s!=APFloat::opInvalidOp && s!=APFloat::opDivByZero) + if (!HasFPExceptions || (s!=APFloat::opInvalidOp && + s!=APFloat::opDivByZero)) { return getConstantFP(V1, VT); + } break; case ISD::FCOPYSIGN: V1.copySign(V2); @@ -3572,10 +3550,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, nuw, nsw, exact); } - AllNodes.push_back(N); -#ifndef NDEBUG - VerifySDNode(N); -#endif + InsertNode(N); return SDValue(N, 0); } @@ -3679,10 +3654,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, DL.getDebugLoc(), VTs, N1, N2, N3); } - AllNodes.push_back(N); -#ifndef NDEBUG - VerifySDNode(N); -#endif + InsertNode(N); return SDValue(N, 0); } @@ -3848,7 +3820,7 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, if (VT == MVT::Other) { unsigned AS = 0; if (DstAlign >= TLI.getDataLayout()->getPointerPrefAlignment(AS) || - TLI.allowsUnalignedMemoryAccesses(VT, AS)) { + TLI.allowsMisalignedMemoryAccesses(VT, AS, DstAlign)) { VT = TLI.getPointerTy(); } else { switch (DstAlign & 7) { @@ -3908,7 +3880,7 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, unsigned AS = 0; if (NumMemOps && AllowOverlap && VTSize >= 8 && NewVTSize < Size && - TLI.allowsUnalignedMemoryAccesses(VT, AS, &Fast) && Fast) + TLI.allowsMisalignedMemoryAccesses(VT, AS, DstAlign, &Fast) && Fast) VTSize = Size; else { VT = NewVT; @@ -3972,7 +3944,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl, // Don't promote to an alignment that would require dynamic stack // realignment. - const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (!TRI->needsStackRealignment(MF)) while (NewAlign > Align && TLI.getDataLayout()->exceedsNaturalStackAlignment(NewAlign)) @@ -4028,7 +4000,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl, Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain, getMemBasePlusOffset(Src, SrcOff, dl, DAG), SrcPtrInfo.getWithOffset(SrcOff), VT, isVol, false, - MinAlign(SrcAlign, SrcOff)); + false, MinAlign(SrcAlign, SrcOff)); Store = DAG.getTruncStore(Chain, dl, Value, getMemBasePlusOffset(Dst, DstOff, dl, DAG), DstPtrInfo.getWithOffset(DstOff), VT, isVol, @@ -4248,9 +4220,8 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, // Then check to see if we should lower the memcpy with target-specific // code. If the target chooses to do this, this is the next best. SDValue Result = - TSI.EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align, - isVol, AlwaysInline, - DstPtrInfo, SrcPtrInfo); + TSI->EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align, + isVol, AlwaysInline, DstPtrInfo, SrcPtrInfo); if (Result.getNode()) return Result; @@ -4269,8 +4240,6 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, // beyond the given memory regions. But fixing this isn't easy, and most // people don't care. - const TargetLowering *TLI = TM.getTargetLowering(); - // Emit a library call. TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; @@ -4316,17 +4285,14 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst, // Then check to see if we should lower the memmove with target-specific // code. If the target chooses to do this, this is the next best. - SDValue Result = - TSI.EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, Align, isVol, - DstPtrInfo, SrcPtrInfo); + SDValue Result = TSI->EmitTargetCodeForMemmove( + *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo, SrcPtrInfo); if (Result.getNode()) return Result; // FIXME: If the memmove is volatile, lowering it to plain libc memmove may // not be safe. See memcpy above for more details. - const TargetLowering *TLI = TM.getTargetLowering(); - // Emit a library call. TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; @@ -4371,31 +4337,22 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst, // Then check to see if we should lower the memset with target-specific // code. If the target chooses to do this, this is the next best. - SDValue Result = - TSI.EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, Size, Align, isVol, - DstPtrInfo); + SDValue Result = TSI->EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, + Size, Align, isVol, DstPtrInfo); if (Result.getNode()) return Result; // Emit a library call. - const TargetLowering *TLI = TM.getTargetLowering(); Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(*getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = Dst; Entry.Ty = IntPtrTy; Args.push_back(Entry); - // Extend or truncate the argument to be an i32 value for the call. - if (Src.getValueType().bitsGT(MVT::i32)) - Src = getNode(ISD::TRUNCATE, dl, MVT::i32, Src); - else - Src = getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Src); Entry.Node = Src; - Entry.Ty = Type::getInt32Ty(*getContext()); - Entry.isSExt = true; + Entry.Ty = Src.getValueType().getTypeForEVT(*getContext()); Args.push_back(Entry); Entry.Node = Size; Entry.Ty = IntPtrTy; - Entry.isSExt = false; Args.push_back(Entry); // FIXME: pass in SDLoc @@ -4442,7 +4399,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SuccessOrdering, FailureOrdering, SynchScope); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -4587,7 +4544,7 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, ArrayRef<SDValue> Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align, bool Vol, - bool ReadMem, bool WriteMem) { + bool ReadMem, bool WriteMem, unsigned Size) { if (Align == 0) // Ensure that codegen never sees alignment 0 Align = getEVTAlignment(MemVT); @@ -4599,8 +4556,10 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, Flags |= MachineMemOperand::MOLoad; if (Vol) Flags |= MachineMemOperand::MOVolatile; + if (!Size) + Size = MemVT.getStoreSize(); MachineMemOperand *MMO = - MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Align); + MF.getMachineMemOperand(PtrInfo, Flags, Size, Align); return getMemIntrinsicNode(Opcode, dl, VTList, Ops, MemVT, MMO); } @@ -4639,7 +4598,7 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, dl.getDebugLoc(), VTList, Ops, MemVT, MMO); } - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -4683,7 +4642,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, SDValue Ptr, SDValue Offset, MachinePointerInfo PtrInfo, EVT MemVT, bool isVolatile, bool isNonTemporal, bool isInvariant, - unsigned Alignment, const MDNode *TBAAInfo, + unsigned Alignment, const AAMDNodes &AAInfo, const MDNode *Ranges) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); @@ -4706,7 +4665,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment, - TBAAInfo, Ranges); + AAInfo, Ranges); return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO); } @@ -4755,7 +4714,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, dl.getDebugLoc(), VTs, AM, ExtType, MemVT, MMO); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -4764,12 +4723,12 @@ SDValue SelectionDAG::getLoad(EVT VT, SDLoc dl, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, bool isInvariant, unsigned Alignment, - const MDNode *TBAAInfo, + const AAMDNodes &AAInfo, const MDNode *Ranges) { SDValue Undef = getUNDEF(Ptr.getValueType()); return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef, PtrInfo, VT, isVolatile, isNonTemporal, isInvariant, Alignment, - TBAAInfo, Ranges); + AAInfo, Ranges); } SDValue SelectionDAG::getLoad(EVT VT, SDLoc dl, @@ -4784,11 +4743,12 @@ SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, SDLoc dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, bool isVolatile, bool isNonTemporal, - unsigned Alignment, const MDNode *TBAAInfo) { + bool isInvariant, unsigned Alignment, + const AAMDNodes &AAInfo) { SDValue Undef = getUNDEF(Ptr.getValueType()); return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, - PtrInfo, MemVT, isVolatile, isNonTemporal, false, Alignment, - TBAAInfo); + PtrInfo, MemVT, isVolatile, isNonTemporal, isInvariant, + Alignment, AAInfo); } @@ -4815,7 +4775,7 @@ SelectionDAG::getIndexedLoad(SDValue OrigLoad, SDLoc dl, SDValue Base, SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, - unsigned Alignment, const MDNode *TBAAInfo) { + unsigned Alignment, const AAMDNodes &AAInfo) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); if (Alignment == 0) // Ensure that codegen never sees alignment 0 @@ -4834,7 +4794,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val, MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, Flags, Val.getValueType().getStoreSize(), Alignment, - TBAAInfo); + AAInfo); return getStore(Chain, dl, Val, Ptr, MMO); } @@ -4862,7 +4822,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val, dl.getDebugLoc(), VTs, ISD::UNINDEXED, false, VT, MMO); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -4870,7 +4830,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT,bool isVolatile, bool isNonTemporal, unsigned Alignment, - const MDNode *TBAAInfo) { + const AAMDNodes &AAInfo) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); if (Alignment == 0) // Ensure that codegen never sees alignment 0 @@ -4888,7 +4848,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, Flags, SVT.getStoreSize(), Alignment, - TBAAInfo); + AAInfo); return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO); } @@ -4931,7 +4891,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, dl.getDebugLoc(), VTs, ISD::UNINDEXED, true, SVT, MMO); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); return SDValue(N, 0); } @@ -4958,7 +4918,61 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, SDLoc dl, SDValue Base, ST->getMemoryVT(), ST->getMemOperand()); CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue +SelectionDAG::getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain, + SDValue Ptr, SDValue Mask, SDValue Src0, + MachineMemOperand *MMO) { + + SDVTList VTs = getVTList(VT, MVT::Other); + SDValue Ops[] = { Chain, Ptr, Mask, Src0 }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops); + ID.AddInteger(VT.getRawBits()); + ID.AddInteger(encodeMemSDNodeFlags(ISD::NON_EXTLOAD, ISD::UNINDEXED, + MMO->isVolatile(), + MMO->isNonTemporal(), + MMO->isInvariant())); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + cast<MaskedLoadSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + SDNode *N = new (NodeAllocator) MaskedLoadSDNode(dl.getIROrder(), + dl.getDebugLoc(), Ops, 4, VTs, + VT, MMO); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val, + SDValue Ptr, SDValue Mask, MachineMemOperand *MMO) { + assert(Chain.getValueType() == MVT::Other && + "Invalid chain type"); + EVT VT = Val.getValueType(); + SDVTList VTs = getVTList(MVT::Other); + SDValue Ops[] = { Chain, Ptr, Mask, Val }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::MSTORE, VTs, Ops); + ID.AddInteger(VT.getRawBits()); + ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(), + MMO->isNonTemporal(), MMO->isInvariant())); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + cast<MaskedStoreSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + SDNode *N = new (NodeAllocator) MaskedStoreSDNode(dl.getIROrder(), + dl.getDebugLoc(), Ops, 4, + VTs, VT, MMO); + CSEMap.InsertNode(N, IP); + InsertNode(N); return SDValue(N, 0); } @@ -5037,10 +5051,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, VTs, Ops); } - AllNodes.push_back(N); -#ifndef NDEBUG - VerifySDNode(N); -#endif + InsertNode(N); return SDValue(N, 0); } @@ -5120,15 +5131,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, VTList, Ops); } } - AllNodes.push_back(N); -#ifndef NDEBUG - VerifySDNode(N); -#endif + InsertNode(N); return SDValue(N, 0); } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList) { - return getNode(Opcode, DL, VTList, ArrayRef<SDValue>()); + return getNode(Opcode, DL, VTList, None); } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, @@ -5510,6 +5518,10 @@ SDNode *SelectionDAG::UpdadeSDLocOnMergedSDNode(SDNode *N, SDLoc OLoc) { /// node, and because it doesn't require CSE recalculation for any of /// the node's users. /// +/// However, note that MorphNodeTo recursively deletes dead nodes from the DAG. +/// As a consequence it isn't appropriate to use from within the DAG combiner or +/// the legalizer which maintain worklists that would need to be updated when +/// deleting things. SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, SDVTList VTs, ArrayRef<SDValue> Ops) { unsigned NumOps = Ops.size(); @@ -5576,10 +5588,9 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, // new operands. if (!DeadNodeSet.empty()) { SmallVector<SDNode *, 16> DeadNodes; - for (SmallPtrSet<SDNode *, 16>::iterator I = DeadNodeSet.begin(), - E = DeadNodeSet.end(); I != E; ++I) - if ((*I)->use_empty()) - DeadNodes.push_back(*I); + for (SDNode *N : DeadNodeSet) + if (N->use_empty()) + DeadNodes.push_back(N); RemoveDeadNodes(DeadNodes); } @@ -5748,10 +5759,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, SDLoc DL, SDVTList VTs, if (DoCSE) CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); -#ifndef NDEBUG - VerifyMachineNode(N); -#endif + InsertNode(N); return N; } @@ -5797,26 +5805,24 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, /// getDbgValue - Creates a SDDbgValue node. /// /// SDNode -SDDbgValue * -SelectionDAG::getDbgValue(MDNode *MDPtr, SDNode *N, unsigned R, - bool IsIndirect, uint64_t Off, - DebugLoc DL, unsigned O) { - return new (Allocator) SDDbgValue(MDPtr, N, R, IsIndirect, Off, DL, O); +SDDbgValue *SelectionDAG::getDbgValue(MDNode *Var, MDNode *Expr, SDNode *N, + unsigned R, bool IsIndirect, uint64_t Off, + DebugLoc DL, unsigned O) { + return new (Allocator) SDDbgValue(Var, Expr, N, R, IsIndirect, Off, DL, O); } /// Constant -SDDbgValue * -SelectionDAG::getConstantDbgValue(MDNode *MDPtr, const Value *C, - uint64_t Off, - DebugLoc DL, unsigned O) { - return new (Allocator) SDDbgValue(MDPtr, C, Off, DL, O); +SDDbgValue *SelectionDAG::getConstantDbgValue(MDNode *Var, MDNode *Expr, + const Value *C, uint64_t Off, + DebugLoc DL, unsigned O) { + return new (Allocator) SDDbgValue(Var, Expr, C, Off, DL, O); } /// FrameIndex -SDDbgValue * -SelectionDAG::getFrameIndexDbgValue(MDNode *MDPtr, unsigned FI, uint64_t Off, - DebugLoc DL, unsigned O) { - return new (Allocator) SDDbgValue(MDPtr, FI, Off, DL, O); +SDDbgValue *SelectionDAG::getFrameIndexDbgValue(MDNode *Var, MDNode *Expr, + unsigned FI, uint64_t Off, + DebugLoc DL, unsigned O) { + return new (Allocator) SDDbgValue(Var, Expr, FI, Off, DL, O); } namespace { @@ -6205,9 +6211,11 @@ unsigned SelectionDAG::AssignTopologicalOrder() { /// AddDbgValue - Add a dbg_value SDNode. If SD is non-null that means the /// value is produced by SD. void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) { - DbgInfo->add(DB, SD, isParameter); - if (SD) + if (SD) { + assert(DbgInfo->getSDDbgValues(SD).empty() || SD->getHasDebugValue()); SD->setHasDebugValue(true); + } + DbgInfo->add(DB, SD, isParameter); } /// TransferDbgValues - Transfer SDDbgValues. @@ -6222,10 +6230,10 @@ void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) { I != E; ++I) { SDDbgValue *Dbg = *I; if (Dbg->getKind() == SDDbgValue::SDNODE) { - SDDbgValue *Clone = getDbgValue(Dbg->getMDPtr(), ToNode, To.getResNo(), - Dbg->isIndirect(), - Dbg->getOffset(), Dbg->getDebugLoc(), - Dbg->getOrder()); + SDDbgValue *Clone = + getDbgValue(Dbg->getVariable(), Dbg->getExpression(), ToNode, + To.getResNo(), Dbg->isIndirect(), Dbg->getOffset(), + Dbg->getDebugLoc(), Dbg->getOrder()); ClonedDVs.push_back(Clone); } } @@ -6263,7 +6271,10 @@ MemSDNode::MemSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs, assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!"); assert(isNonTemporal() == MMO->isNonTemporal() && "Non-temporal encoding error!"); - assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!"); + // We check here that the size of the memory operand fits within the size of + // the MMO. This is because the MMO might indicate only a possible address + // range instead of specifying the affected memory addresses precisely. + assert(memvt.getStoreSize() <= MMO->getSize() && "Size mismatch!"); } MemSDNode::MemSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs, @@ -6273,7 +6284,7 @@ MemSDNode::MemSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs, SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(), MMO->isNonTemporal(), MMO->isInvariant()); assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!"); - assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!"); + assert(memvt.getStoreSize() <= MMO->getSize() && "Size mismatch!"); } /// Profile - Gather unique data for the node. @@ -6417,7 +6428,7 @@ bool SDNode::hasPredecessor(const SDNode *N) const { bool SDNode::hasPredecessorHelper(const SDNode *N, - SmallPtrSet<const SDNode *, 32> &Visited, + SmallPtrSetImpl<const SDNode *> &Visited, SmallVectorImpl<const SDNode *> &Worklist) const { if (Visited.empty()) { Worklist.push_back(this); @@ -6433,7 +6444,7 @@ SDNode::hasPredecessorHelper(const SDNode *N, const SDNode *M = Worklist.pop_back_val(); for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) { SDNode *Op = M->getOperand(i).getNode(); - if (Visited.insert(Op)) + if (Visited.insert(Op).second) Worklist.push_back(Op); if (Op == N) return true; @@ -6473,7 +6484,6 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { EVT OperandVT = Operand.getValueType(); if (OperandVT.isVector()) { // A vector operand; extract a single element. - const TargetLowering *TLI = TM.getTargetLowering(); EVT OperandEltVT = OperandVT.getVectorElementType(); Operands[j] = getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, @@ -6544,16 +6554,29 @@ bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes); } - // Handle X+C - if (isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc && - cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes) - return true; - + // Handle X + C. + if (isBaseWithConstantOffset(Loc)) { + int64_t LocOffset = cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue(); + if (Loc.getOperand(0) == BaseLoc) { + // If the base location is a simple address with no offset itself, then + // the second load's first add operand should be the base address. + if (LocOffset == Dist * (int)Bytes) + return true; + } else if (isBaseWithConstantOffset(BaseLoc)) { + // The base location itself has an offset, so subtract that value from the + // second load's offset before comparing to distance * size. + int64_t BOffset = + cast<ConstantSDNode>(BaseLoc.getOperand(1))->getSExtValue(); + if (Loc.getOperand(0) == BaseLoc.getOperand(0)) { + if ((LocOffset - BOffset) == Dist * (int)Bytes) + return true; + } + } + } const GlobalValue *GV1 = nullptr; const GlobalValue *GV2 = nullptr; int64_t Offset1 = 0; int64_t Offset2 = 0; - const TargetLowering *TLI = TM.getTargetLowering(); bool isGA1 = TLI->isGAPlusOffset(Loc.getNode(), GV1, Offset1); bool isGA2 = TLI->isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); if (isGA1 && isGA2 && GV1 == GV2) @@ -6568,7 +6591,6 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { // If this is a GlobalAddress + cst, return the alignment. const GlobalValue *GV; int64_t GVOffset = 0; - const TargetLowering *TLI = TM.getTargetLowering(); if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { unsigned PtrWidth = TLI->getPointerTypeSizeInBits(GV->getType()); APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0); @@ -6795,8 +6817,8 @@ bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) { #ifndef NDEBUG static void checkForCyclesHelper(const SDNode *N, - SmallPtrSet<const SDNode*, 32> &Visited, - SmallPtrSet<const SDNode*, 32> &Checked, + SmallPtrSetImpl<const SDNode*> &Visited, + SmallPtrSetImpl<const SDNode*> &Checked, const llvm::SelectionDAG *DAG) { // If this node has already been checked, don't check it again. if (Checked.count(N)) @@ -6804,7 +6826,7 @@ static void checkForCyclesHelper(const SDNode *N, // If a node has already been visited on this depth-first walk, reject it as // a cycle. - if (!Visited.insert(N)) { + if (!Visited.insert(N).second) { errs() << "Detected cycle in SelectionDAG\n"; dbgs() << "Offending node:\n"; N->dumprFull(DAG); dbgs() << "\n"; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index c07b5e6a7362..86a63eea7c2a 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/ConstantFolding.h" @@ -46,6 +47,8 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Statepoint.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -58,6 +61,7 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetSelectionDAGInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include <algorithm> using namespace llvm; @@ -563,6 +567,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, } else if (NumParts > 0) { // If the intermediate type was expanded, split each the value into // legal parts. + assert(NumIntermediates != 0 && "division by zero"); assert(NumParts % NumIntermediates == 0 && "Must expand into a divisible number of parts!"); unsigned Factor = NumParts / NumIntermediates; @@ -645,8 +650,10 @@ namespace { /// specified value into the registers specified by this object. This uses /// Chain/Flag as the input and updates them for the output Chain/Flag. /// If the Flag pointer is NULL, no flag is used. - void getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, - SDValue &Chain, SDValue *Flag, const Value *V) const; + void + getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, SDValue &Chain, + SDValue *Flag, const Value *V, + ISD::NodeType PreferredExtendType = ISD::ANY_EXTEND) const; /// AddInlineAsmOperands - Add this value to the specified inlineasm node /// operand list. This adds the code marker, matching input operand index @@ -761,9 +768,10 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, /// Chain/Flag as the input and updates them for the output Chain/Flag. /// If the Flag pointer is NULL, no flag is used. void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, - SDValue &Chain, SDValue *Flag, - const Value *V) const { + SDValue &Chain, SDValue *Flag, const Value *V, + ISD::NodeType PreferredExtendType) const { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + ISD::NodeType ExtendKind = PreferredExtendType; // Get the list of the values's legal parts. unsigned NumRegs = Regs.size(); @@ -772,8 +780,9 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, EVT ValueVT = ValueVTs[Value]; unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT); MVT RegisterVT = RegVTs[Value]; - ISD::NodeType ExtendKind = - TLI.isZExtFree(Val, RegisterVT)? ISD::ZERO_EXTEND: ISD::ANY_EXTEND; + + if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT)) + ExtendKind = ISD::ZERO_EXTEND; getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), &Parts[Part], NumParts, RegisterVT, V, ExtendKind); @@ -860,7 +869,7 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa, AA = &aa; GFI = gfi; LibInfo = li; - DL = DAG.getTarget().getDataLayout(); + DL = DAG.getSubtarget().getDataLayout(); Context = DAG.getContext(); LPadToCallSiteMap.clear(); } @@ -879,6 +888,7 @@ void SelectionDAGBuilder::clear() { CurInst = nullptr; HasTailCall = false; SDNodeOrder = LowestSDNodeOrder; + StatepointLowering.clear(); } /// clearDanglingDebugInfo - Clear the dangling debug information @@ -988,15 +998,16 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, DebugLoc dl = DDI.getdl(); unsigned DbgSDNodeOrder = DDI.getSDNodeOrder(); MDNode *Variable = DI->getVariable(); + MDNode *Expr = DI->getExpression(); uint64_t Offset = DI->getOffset(); // A dbg.value for an alloca is always indirect. bool IsIndirect = isa<AllocaInst>(V) || Offset != 0; SDDbgValue *SDV; if (Val.getNode()) { - if (!EmitFuncArgumentDbgValue(V, Variable, Offset, IsIndirect, Val)) { - SDV = DAG.getDbgValue(Variable, Val.getNode(), - Val.getResNo(), IsIndirect, - Offset, dl, DbgSDNodeOrder); + if (!EmitFuncArgumentDbgValue(V, Variable, Expr, Offset, IsIndirect, + Val)) { + SDV = DAG.getDbgValue(Variable, Expr, Val.getNode(), Val.getResNo(), + IsIndirect, Offset, dl, DbgSDNodeOrder); DAG.AddDbgValue(SDV, Val.getNode(), false); } } else @@ -1018,8 +1029,8 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V); if (It != FuncInfo.ValueMap.end()) { unsigned InReg = It->second; - RegsForValue RFV(*DAG.getContext(), *TM.getTargetLowering(), - InReg, V->getType()); + RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), InReg, + V->getType()); SDValue Chain = DAG.getEntryNode(); N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); resolveDanglingDebugInfo(V, N); @@ -1050,10 +1061,10 @@ SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) { /// getValueImpl - Helper function for getValue and getNonRegisterValue. /// Create an SDValue for the given value. SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { - const TargetLowering *TLI = TM.getTargetLowering(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (const Constant *C = dyn_cast<Constant>(V)) { - EVT VT = TLI->getValueType(V->getType(), true); + EVT VT = TLI.getValueType(V->getType(), true); if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) return DAG.getConstant(*CI, VT); @@ -1063,7 +1074,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { if (isa<ConstantPointerNull>(C)) { unsigned AS = V->getType()->getPointerAddressSpace(); - return DAG.getConstant(0, TLI->getPointerTy(AS)); + return DAG.getConstant(0, TLI.getPointerTy(AS)); } if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) @@ -1117,7 +1128,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { "Unknown struct or array constant!"); SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(*TLI, C->getType(), ValueVTs); + ComputeValueVTs(TLI, C->getType(), ValueVTs); unsigned NumElts = ValueVTs.size(); if (NumElts == 0) return SDValue(); // empty struct @@ -1149,7 +1160,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { Ops.push_back(getValue(CV->getOperand(i))); } else { assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!"); - EVT EltVT = TLI->getValueType(VecTy->getElementType()); + EVT EltVT = TLI.getValueType(VecTy->getElementType()); SDValue Op; if (EltVT.isFloatingPoint()) @@ -1169,13 +1180,13 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { DenseMap<const AllocaInst*, int>::iterator SI = FuncInfo.StaticAllocaMap.find(AI); if (SI != FuncInfo.StaticAllocaMap.end()) - return DAG.getFrameIndex(SI->second, TLI->getPointerTy()); + return DAG.getFrameIndex(SI->second, TLI.getPointerTy()); } // If this is an instruction which fast-isel has deferred, select it now. if (const Instruction *Inst = dyn_cast<Instruction>(V)) { unsigned InReg = FuncInfo.InitializeRegForValue(Inst); - RegsForValue RFV(*DAG.getContext(), *TLI, InReg, Inst->getType()); + RegsForValue RFV(*DAG.getContext(), TLI, InReg, Inst->getType()); SDValue Chain = DAG.getEntryNode(); return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); } @@ -1184,7 +1195,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { } void SelectionDAGBuilder::visitRet(const ReturnInst &I) { - const TargetLowering *TLI = TM.getTargetLowering(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Chain = getControlRoot(); SmallVector<ISD::OutputArg, 8> Outs; SmallVector<SDValue, 8> OutVals; @@ -1197,7 +1208,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { // Leave Outs empty so that LowerReturn won't try to load return // registers the usual way. SmallVector<EVT, 1> PtrValueVTs; - ComputeValueVTs(*TLI, PointerType::getUnqual(F->getReturnType()), + ComputeValueVTs(TLI, PointerType::getUnqual(F->getReturnType()), PtrValueVTs); SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]); @@ -1205,7 +1216,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { SmallVector<EVT, 4> ValueVTs; SmallVector<uint64_t, 4> Offsets; - ComputeValueVTs(*TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets); + ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); SmallVector<SDValue, 4> Chains(NumValues); @@ -1224,28 +1235,33 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { MVT::Other, Chains); } else if (I.getNumOperands() != 0) { SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(*TLI, I.getOperand(0)->getType(), ValueVTs); + ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); if (NumValues) { SDValue RetOp = getValue(I.getOperand(0)); - for (unsigned j = 0, f = NumValues; j != f; ++j) { - EVT VT = ValueVTs[j]; - ISD::NodeType ExtendKind = ISD::ANY_EXTEND; + const Function *F = I.getParent()->getParent(); + + ISD::NodeType ExtendKind = ISD::ANY_EXTEND; + if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, + Attribute::SExt)) + ExtendKind = ISD::SIGN_EXTEND; + else if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, + Attribute::ZExt)) + ExtendKind = ISD::ZERO_EXTEND; + + LLVMContext &Context = F->getContext(); + bool RetInReg = F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, + Attribute::InReg); - const Function *F = I.getParent()->getParent(); - if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, - Attribute::SExt)) - ExtendKind = ISD::SIGN_EXTEND; - else if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, - Attribute::ZExt)) - ExtendKind = ISD::ZERO_EXTEND; + for (unsigned j = 0; j != NumValues; ++j) { + EVT VT = ValueVTs[j]; if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) - VT = TLI->getTypeForExtArgOrReturn(VT.getSimpleVT(), ExtendKind); + VT = TLI.getTypeForExtArgOrReturn(Context, VT, ExtendKind); - unsigned NumParts = TLI->getNumRegisters(*DAG.getContext(), VT); - MVT PartVT = TLI->getRegisterType(*DAG.getContext(), VT); + unsigned NumParts = TLI.getNumRegisters(Context, VT); + MVT PartVT = TLI.getRegisterType(Context, VT); SmallVector<SDValue, 4> Parts(NumParts); getCopyToParts(DAG, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + j), @@ -1253,8 +1269,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { // 'inreg' on function refers to return value ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); - if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, - Attribute::InReg)) + if (RetInReg) Flags.setInReg(); // Propagate extension type if any @@ -1275,9 +1290,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); CallingConv::ID CallConv = DAG.getMachineFunction().getFunction()->getCallingConv(); - Chain = TM.getTargetLowering()->LowerReturn(Chain, CallConv, isVarArg, - Outs, OutVals, getCurSDLoc(), - DAG); + Chain = DAG.getTargetLoweringInfo().LowerReturn( + Chain, CallConv, isVarArg, Outs, OutVals, getCurSDLoc(), DAG); // Verify that the target's LowerReturn behaved as expected. assert(Chain.getNode() && Chain.getValueType() == MVT::Other && @@ -1400,7 +1414,7 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, if (TM.Options.NoNaNsFPMath) Condition = getFCmpCodeWithoutNaN(Condition); } else { - Condition = ISD::SETEQ; // silence warning. + (void)Condition; // silence warning. llvm_unreachable("Unknown compare instruction"); } @@ -1601,10 +1615,9 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // jle foo // if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) { - if (!TM.getTargetLowering()->isJumpExpensive() && - BOp->hasOneUse() && - (BOp->getOpcode() == Instruction::And || - BOp->getOpcode() == Instruction::Or)) { + if (!DAG.getTargetLoweringInfo().isJumpExpensive() && + BOp->hasOneUse() && (BOp->getOpcode() == Instruction::And || + BOp->getOpcode() == Instruction::Or)) { FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, BOp->getOpcode(), getEdgeWeight(BrMBB, Succ0MBB), getEdgeWeight(BrMBB, Succ1MBB)); @@ -1724,7 +1737,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) { // Emit the code for the jump table assert(JT.Reg != -1U && "Should lower JT Header first!"); - EVT PTy = TM.getTargetLowering()->getPointerTy(); + EVT PTy = DAG.getTargetLoweringInfo().getPointerTy(); SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(), JT.Reg, PTy); SDValue Table = DAG.getJumpTable(JT.JTI, PTy); @@ -1752,10 +1765,10 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, // can be used as an index into the jump table in a subsequent basic block. // This value may be smaller or larger than the target's pointer type, and // therefore require extension or truncating. - const TargetLowering *TLI = TM.getTargetLowering(); - SwitchOp = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), TLI->getPointerTy()); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SwitchOp = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), TLI.getPointerTy()); - unsigned JumpTableReg = FuncInfo.CreateReg(TLI->getPointerTy()); + unsigned JumpTableReg = FuncInfo.CreateReg(TLI.getPointerTy()); SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurSDLoc(), JumpTableReg, SwitchOp); JT.Reg = JumpTableReg; @@ -1763,12 +1776,10 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, // Emit the range check for the jump table, and branch to the default block // for the switch statement if the value being switched on exceeds the largest // case in the switch. - SDValue CMP = DAG.getSetCC(getCurSDLoc(), - TLI->getSetCCResultType(*DAG.getContext(), - Sub.getValueType()), - Sub, - DAG.getConstant(JTH.Last - JTH.First,VT), - ISD::SETUGT); + SDValue CMP = + DAG.getSetCC(getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), + Sub.getValueType()), + Sub, DAG.getConstant(JTH.Last - JTH.First, VT), ISD::SETUGT); // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. @@ -1799,8 +1810,8 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, MachineBasicBlock *ParentBB) { // First create the loads to the guard/stack slot for the comparison. - const TargetLowering *TLI = TM.getTargetLowering(); - EVT PtrTy = TLI->getPointerTy(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT PtrTy = TLI.getPointerTy(); MachineFrameInfo *MFI = ParentBB->getParent()->getFrameInfo(); int FI = MFI->getStackProtectorIndex(); @@ -1810,10 +1821,22 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy); unsigned Align = - TLI->getDataLayout()->getPrefTypeAlignment(IRGuard->getType()); - SDValue Guard = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(), - GuardPtr, MachinePointerInfo(IRGuard, 0), - true, false, false, Align); + TLI.getDataLayout()->getPrefTypeAlignment(IRGuard->getType()); + + SDValue Guard; + + // If GuardReg is set and useLoadStackGuardNode returns true, retrieve the + // guard value from the virtual register holding the value. Otherwise, emit a + // volatile load to retrieve the stack guard value. + unsigned GuardReg = SPD.getGuardReg(); + + if (GuardReg && TLI.useLoadStackGuardNode()) + Guard = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), GuardReg, + PtrTy); + else + Guard = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(), + GuardPtr, MachinePointerInfo(IRGuard, 0), + true, false, false, Align); SDValue StackSlot = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(), StackSlotPtr, @@ -1824,11 +1847,10 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, EVT VT = Guard.getValueType(); SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, Guard, StackSlot); - SDValue Cmp = DAG.getSetCC(getCurSDLoc(), - TLI->getSetCCResultType(*DAG.getContext(), - Sub.getValueType()), - Sub, DAG.getConstant(0, VT), - ISD::SETNE); + SDValue Cmp = + DAG.getSetCC(getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), + Sub.getValueType()), + Sub, DAG.getConstant(0, VT), ISD::SETNE); // If the sub is not 0, then we know the guard/stackslot do not equal, so // branch to failure MBB. @@ -1853,10 +1875,10 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, /// StackProtectorDescriptor. void SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) { - const TargetLowering *TLI = TM.getTargetLowering(); - SDValue Chain = TLI->makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, - MVT::isVoid, nullptr, 0, false, - getCurSDLoc(), false, false).second; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Chain = + TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid, + nullptr, 0, false, getCurSDLoc(), false, false).second; DAG.setRoot(Chain); } @@ -1871,16 +1893,15 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, DAG.getConstant(B.First, VT)); // Check range - const TargetLowering *TLI = TM.getTargetLowering(); - SDValue RangeCmp = DAG.getSetCC(getCurSDLoc(), - TLI->getSetCCResultType(*DAG.getContext(), + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue RangeCmp = + DAG.getSetCC(getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), Sub.getValueType()), - Sub, DAG.getConstant(B.Range, VT), - ISD::SETUGT); + Sub, DAG.getConstant(B.Range, VT), ISD::SETUGT); // Determine the type of the test operands. bool UsePtrType = false; - if (!TLI->isTypeLegal(VT)) + if (!TLI.isTypeLegal(VT)) UsePtrType = true; else { for (unsigned i = 0, e = B.Cases.size(); i != e; ++i) @@ -1892,7 +1913,7 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, } } if (UsePtrType) { - VT = TLI->getPointerTy(); + VT = TLI.getPointerTy(); Sub = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), VT); } @@ -1936,22 +1957,18 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, Reg, VT); SDValue Cmp; unsigned PopCount = CountPopulation_64(B.Mask); - const TargetLowering *TLI = TM.getTargetLowering(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (PopCount == 1) { // Testing for a single bit; just compare the shift count with what it // would need to be to shift a 1 bit in that position. - Cmp = DAG.getSetCC(getCurSDLoc(), - TLI->getSetCCResultType(*DAG.getContext(), VT), - ShiftOp, - DAG.getConstant(countTrailingZeros(B.Mask), VT), - ISD::SETEQ); + Cmp = DAG.getSetCC( + getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp, + DAG.getConstant(countTrailingZeros(B.Mask), VT), ISD::SETEQ); } else if (PopCount == BB.Range) { // There is only one zero bit in the range, test for it directly. - Cmp = DAG.getSetCC(getCurSDLoc(), - TLI->getSetCCResultType(*DAG.getContext(), VT), - ShiftOp, - DAG.getConstant(CountTrailingOnes_64(B.Mask), VT), - ISD::SETNE); + Cmp = DAG.getSetCC( + getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp, + DAG.getConstant(CountTrailingOnes_64(B.Mask), VT), ISD::SETNE); } else { // Make desired shift SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurSDLoc(), VT, @@ -1961,9 +1978,8 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, SDValue AndOp = DAG.getNode(ISD::AND, getCurSDLoc(), VT, SwitchVal, DAG.getConstant(B.Mask, VT)); Cmp = DAG.getSetCC(getCurSDLoc(), - TLI->getSetCCResultType(*DAG.getContext(), VT), - AndOp, DAG.getConstant(0, VT), - ISD::SETNE); + TLI.getSetCCResultType(*DAG.getContext(), VT), AndOp, + DAG.getConstant(0, VT), ISD::SETNE); } // The branch weight from SwitchBB to B.TargetBB is B.ExtraWeight. @@ -2001,8 +2017,17 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { if (isa<InlineAsm>(Callee)) visitInlineAsm(&I); else if (Fn && Fn->isIntrinsic()) { - assert(Fn->getIntrinsicID() == Intrinsic::donothing); - // Ignore invokes to @llvm.donothing: jump directly to the next BB. + switch (Fn->getIntrinsicID()) { + default: + llvm_unreachable("Cannot invoke this intrinsic"); + case Intrinsic::donothing: + // Ignore invokes to @llvm.donothing: jump directly to the next BB. + break; + case Intrinsic::experimental_patchpoint_void: + case Intrinsic::experimental_patchpoint_i64: + visitPatchpoint(&I, LandingPad); + break; + } } else LowerCallTo(&I, getValue(Callee), false, LandingPad); @@ -2034,26 +2059,30 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { // If there aren't registers to copy the values into (e.g., during SjLj // exceptions), then don't bother to create these DAG nodes. - const TargetLowering *TLI = TM.getTargetLowering(); - if (TLI->getExceptionPointerRegister() == 0 && - TLI->getExceptionSelectorRegister() == 0) + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (TLI.getExceptionPointerRegister() == 0 && + TLI.getExceptionSelectorRegister() == 0) return; SmallVector<EVT, 2> ValueVTs; - ComputeValueVTs(*TLI, LP.getType(), ValueVTs); + ComputeValueVTs(TLI, LP.getType(), ValueVTs); assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported"); // Get the two live-in registers as SDValues. The physregs have already been // copied into virtual registers. SDValue Ops[2]; - Ops[0] = DAG.getZExtOrTrunc( - DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), - FuncInfo.ExceptionPointerVirtReg, TLI->getPointerTy()), - getCurSDLoc(), ValueVTs[0]); + if (FuncInfo.ExceptionPointerVirtReg) { + Ops[0] = DAG.getZExtOrTrunc( + DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), + FuncInfo.ExceptionPointerVirtReg, TLI.getPointerTy()), + getCurSDLoc(), ValueVTs[0]); + } else { + Ops[0] = DAG.getConstant(0, TLI.getPointerTy()); + } Ops[1] = DAG.getZExtOrTrunc( - DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), - FuncInfo.ExceptionSelectorVirtReg, TLI->getPointerTy()), - getCurSDLoc(), ValueVTs[1]); + DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), + FuncInfo.ExceptionSelectorVirtReg, TLI.getPointerTy()), + getCurSDLoc(), ValueVTs[1]); // Merge into one. SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), @@ -2061,6 +2090,27 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { setValue(&LP, Res); } +unsigned +SelectionDAGBuilder::visitLandingPadClauseBB(GlobalValue *ClauseGV, + MachineBasicBlock *LPadBB) { + SDValue Chain = getControlRoot(); + + // Get the typeid that we will dispatch on later. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy()); + unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC); + unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(ClauseGV); + SDValue Sel = DAG.getConstant(TypeID, TLI.getPointerTy()); + Chain = DAG.getCopyToReg(Chain, getCurSDLoc(), VReg, Sel); + + // Branch to the main landing pad block. + MachineBasicBlock *ClauseMBB = FuncInfo.MBB; + ClauseMBB->addSuccessor(LPadBB); + DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, Chain, + DAG.getBasicBlock(LPadBB))); + return VReg; +} + /// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for /// small case ranges). bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, @@ -2218,9 +2268,8 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, } static inline bool areJTsAllowed(const TargetLowering &TLI) { - return TLI.supportJumpTables() && - (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || - TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other)); + return TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || + TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other); } static APInt ComputeRange(const APInt &First, const APInt &Last) { @@ -2245,8 +2294,8 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) TSize += I->size(); - const TargetLowering *TLI = TM.getTargetLowering(); - if (!areJTsAllowed(*TLI) || TSize.ult(TLI->getMinimumJumpTableEntries())) + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!areJTsAllowed(TLI) || TSize.ult(TLI.getMinimumJumpTableEntries())) return false; APInt Range = ComputeRange(First, Last); @@ -2327,7 +2376,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, } // Create a jump table index for this jump table. - unsigned JTEncoding = TLI->getJumpTableEncoding(); + unsigned JTEncoding = TLI.getJumpTableEncoding(); unsigned JTI = CurMF->getOrCreateJumpTableInfo(JTEncoding) ->createJumpTableIndex(DestBBs); @@ -2347,7 +2396,6 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, CaseRecVector& WorkList, const Value* SV, - MachineBasicBlock* Default, MachineBasicBlock* SwitchBB) { // Get the MachineFunction which holds the current MBB. This is used when // inserting any additional MBBs necessary to represent the switch. @@ -2413,8 +2461,8 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, RSize -= J->size(); } - const TargetLowering *TLI = TM.getTargetLowering(); - if (areJTsAllowed(*TLI)) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (areJTsAllowed(TLI)) { // If our case is dense we *really* should handle it earlier! assert((FMetric > 0) && "Should handle dense range earlier!"); } else { @@ -2484,8 +2532,8 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, const Value* SV, MachineBasicBlock* Default, MachineBasicBlock* SwitchBB) { - const TargetLowering *TLI = TM.getTargetLowering(); - EVT PTy = TLI->getPointerTy(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT PTy = TLI.getPointerTy(); unsigned IntPtrBits = PTy.getSizeInBits(); Case& FrontCase = *CR.Range.first; @@ -2496,19 +2544,18 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, MachineFunction *CurMF = FuncInfo.MF; // If target does not have legal shift left, do not emit bit tests at all. - if (!TLI->isOperationLegal(ISD::SHL, PTy)) + if (!TLI.isOperationLegal(ISD::SHL, PTy)) return false; size_t numCmps = 0; - for (CaseItr I = CR.Range.first, E = CR.Range.second; - I!=E; ++I) { + for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { // Single case counts one, case range - two. numCmps += (I->Low == I->High ? 1 : 2); } // Count unique destinations SmallSet<MachineBasicBlock*, 4> Dests; - for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) { + for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { Dests.insert(I->BB); if (Dests.size() > 3) // Don't bother the code below, if there are too much unique destinations @@ -2601,25 +2648,22 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, BitTestBlock BTB(lowBound, cmpRange, SV, -1U, MVT::Other, (CR.CaseBB == SwitchBB), - CR.CaseBB, Default, BTC); + CR.CaseBB, Default, std::move(BTC)); if (CR.CaseBB == SwitchBB) visitBitTestHeader(BTB, SwitchBB); - BitTestCases.push_back(BTB); + BitTestCases.push_back(std::move(BTB)); return true; } /// Clusterify - Transform simple list of Cases into list of CaseRange's -size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, - const SwitchInst& SI) { - size_t numCmps = 0; - +void SelectionDAGBuilder::Clusterify(CaseVector& Cases, + const SwitchInst& SI) { BranchProbabilityInfo *BPI = FuncInfo.BPI; - // Start with "simple" cases - for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end(); - i != e; ++i) { + // Start with "simple" cases. + for (SwitchInst::ConstCaseIt i : SI.cases()) { const BasicBlock *SuccBB = i.getCaseSuccessor(); MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB]; @@ -2653,13 +2697,15 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, } } - for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) { - if (I->Low != I->High) - // A range counts double, since it requires two compares. - ++numCmps; - } + DEBUG({ + size_t numCmps = 0; + for (auto &I : Cases) + // A range counts double, since it requires two compares. + numCmps += I.Low != I.High ? 2 : 1; - return numCmps; + dbgs() << "Clusterify finished. Total clusters: " << Cases.size() + << ". Total compares: " << numCmps << '\n'; + }); } void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First, @@ -2680,35 +2726,58 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { // Figure out which block is immediately after the current one. MachineBasicBlock *NextBlock = nullptr; + if (SwitchMBB + 1 != FuncInfo.MF->end()) + NextBlock = SwitchMBB + 1; + + + // Create a vector of Cases, sorted so that we can efficiently create a binary + // search tree from them. + CaseVector Cases; + Clusterify(Cases, SI); + + // Get the default destination MBB. MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()]; - // If there is only the default destination, branch to it if it is not the - // next basic block. Otherwise, just fall through. - if (!SI.getNumCases()) { - // Update machine-CFG edges. + if (isa<UnreachableInst>(SI.getDefaultDest()->getFirstNonPHIOrDbg()) && + !Cases.empty()) { + // Replace an unreachable default destination with the most popular case + // destination. + DenseMap<const BasicBlock *, unsigned> Popularity; + unsigned MaxPop = 0; + const BasicBlock *MaxBB = nullptr; + for (auto I : SI.cases()) { + const BasicBlock *BB = I.getCaseSuccessor(); + if (++Popularity[BB] > MaxPop) { + MaxPop = Popularity[BB]; + MaxBB = BB; + } + } - // If this is not a fall-through branch, emit the branch. + // Set new default. + assert(MaxPop > 0); + assert(MaxBB); + Default = FuncInfo.MBBMap[MaxBB]; + + // Remove cases that were pointing to the destination that is now the default. + Cases.erase(std::remove_if(Cases.begin(), Cases.end(), + [&](const Case &C) { return C.BB == Default; }), + Cases.end()); + } + + // If there is only the default destination, go there directly. + if (Cases.empty()) { + // Update machine-CFG edges. SwitchMBB->addSuccessor(Default); - if (Default != NextBlock) - DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), - MVT::Other, getControlRoot(), - DAG.getBasicBlock(Default))); + // If this is not a fall-through branch, emit the branch. + if (Default != NextBlock) { + DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, + getControlRoot(), DAG.getBasicBlock(Default))); + } return; } - // If there are any non-default case statements, create a vector of Cases - // representing each one, and sort the vector so that we can efficiently - // create a binary search tree from them. - CaseVector Cases; - size_t numCmps = Clusterify(Cases, SI); - DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size() - << ". Total compares: " << numCmps << '\n'); - (void)numCmps; - - // Get the Value to be switched on and default basic blocks, which will be - // inserted into CaseBlock records, representing basic blocks in the binary - // search tree. + // Get the Value to be switched on. const Value *SV = SI.getCondition(); // Push the initial CaseRec onto the worklist @@ -2738,7 +2807,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { // Emit binary tree. We need to pick a pivot, and push left and right ranges // onto the worklist. Leafs are handled via handleSmallSwitchRange() call. - handleBTSplitSwitchCase(CR, WorkList, SV, Default, SwitchMBB); + handleBTSplitSwitchCase(CR, WorkList, SV, SwitchMBB); } } @@ -2749,7 +2818,7 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { SmallSet<BasicBlock*, 32> Done; for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) { BasicBlock *BB = I.getSuccessor(i); - bool Inserted = Done.insert(BB); + bool Inserted = Done.insert(BB).second; if (!Inserted) continue; @@ -2806,7 +2875,8 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); - EVT ShiftTy = TM.getTargetLowering()->getShiftAmountTy(Op2.getValueType()); + EVT ShiftTy = + DAG.getTargetLoweringInfo().getShiftAmountTy(Op2.getValueType()); // Coerce the shift amount to the right type if we can. if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) { @@ -2861,8 +2931,8 @@ void SelectionDAGBuilder::visitSDiv(const User &I) { if (isa<BinaryOperator>(&I) && cast<BinaryOperator>(&I)->isExact() && !isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) && !cast<ConstantSDNode>(Op2)->isNullValue()) - setValue(&I, TM.getTargetLowering()->BuildExactSDIV(Op1, Op2, - getCurSDLoc(), DAG)); + setValue(&I, DAG.getTargetLoweringInfo() + .BuildExactSDIV(Op1, Op2, getCurSDLoc(), DAG)); else setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), Op1, Op2)); @@ -2878,7 +2948,7 @@ void SelectionDAGBuilder::visitICmp(const User &I) { SDValue Op2 = getValue(I.getOperand(1)); ISD::CondCode Opcode = getICmpCondCode(predicate); - EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode)); } @@ -2893,13 +2963,13 @@ void SelectionDAGBuilder::visitFCmp(const User &I) { ISD::CondCode Condition = getFCmpCondCode(predicate); if (TM.Options.NoNaNsFPMath) Condition = getFCmpCodeWithoutNaN(Condition); - EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition)); } void SelectionDAGBuilder::visitSelect(const User &I) { SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(*TM.getTargetLowering(), I.getType(), ValueVTs); + ComputeValueVTs(DAG.getTargetLoweringInfo(), I.getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; @@ -2926,7 +2996,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) { void SelectionDAGBuilder::visitTrunc(const User &I) { // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest). SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N)); } @@ -2934,7 +3004,7 @@ void SelectionDAGBuilder::visitZExt(const User &I) { // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest). // ZExt also can't be a cast to bool for same reason. So, nothing much to do SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N)); } @@ -2942,52 +3012,51 @@ void SelectionDAGBuilder::visitSExt(const User &I) { // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest). // SExt also can't be a cast to bool for same reason. So, nothing much to do SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitFPTrunc(const User &I) { // FPTrunc is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - const TargetLowering *TLI = TM.getTargetLowering(); - EVT DestVT = TLI->getValueType(I.getType()); - setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurSDLoc(), - DestVT, N, - DAG.getTargetConstant(0, TLI->getPointerTy()))); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT DestVT = TLI.getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurSDLoc(), DestVT, N, + DAG.getTargetConstant(0, TLI.getPointerTy()))); } void SelectionDAGBuilder::visitFPExt(const User &I) { // FPExt is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitFPToUI(const User &I) { // FPToUI is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitFPToSI(const User &I) { // FPToSI is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitUIToFP(const User &I) { // UIToFP is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitSIToFP(const User &I) { // SIToFP is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N)); } @@ -2995,7 +3064,7 @@ void SelectionDAGBuilder::visitPtrToInt(const User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT)); } @@ -3003,13 +3072,13 @@ void SelectionDAGBuilder::visitIntToPtr(const User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT)); } void SelectionDAGBuilder::visitBitCast(const User &I) { SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); // BitCast assures us that source and destination are the same size so this is // either a BITCAST or a no-op. @@ -3031,7 +3100,7 @@ void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); const Value *SV = I.getOperand(0); SDValue N = getValue(SV); - EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + EVT DestVT = TLI.getValueType(I.getType()); unsigned SrcAS = SV->getType()->getPointerAddressSpace(); unsigned DestAS = I.getType()->getPointerAddressSpace(); @@ -3049,8 +3118,7 @@ void SelectionDAGBuilder::visitInsertElement(const User &I) { SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(), TLI.getVectorIdxTy()); setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(), - TM.getTargetLowering()->getValueType(I.getType()), - InVec, InVal, InIdx)); + TLI.getValueType(I.getType()), InVec, InVal, InIdx)); } void SelectionDAGBuilder::visitExtractElement(const User &I) { @@ -3059,8 +3127,7 @@ void SelectionDAGBuilder::visitExtractElement(const User &I) { SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(), TLI.getVectorIdxTy()); setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(), - TM.getTargetLowering()->getValueType(I.getType()), - InVec, InIdx)); + TLI.getValueType(I.getType()), InVec, InIdx)); } // Utility for visitShuffleVector - Return true if every element in Mask, @@ -3082,8 +3149,8 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask); unsigned MaskNumElts = Mask.size(); - const TargetLowering *TLI = TM.getTargetLowering(); - EVT VT = TLI->getValueType(I.getType()); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT VT = TLI.getValueType(I.getType()); EVT SrcVT = Src1.getValueType(); unsigned SrcNumElts = SrcVT.getVectorNumElements(); @@ -3202,9 +3269,9 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { if (RangeUse[Input] == 0) Src = DAG.getUNDEF(VT); else - Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, getCurSDLoc(), VT, - Src, DAG.getConstant(StartIdx[Input], - TLI->getVectorIdxTy())); + Src = DAG.getNode( + ISD::EXTRACT_SUBVECTOR, getCurSDLoc(), VT, Src, + DAG.getConstant(StartIdx[Input], TLI.getVectorIdxTy())); } // Calculate new mask. @@ -3230,7 +3297,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { // replacing the shuffle with extract and build vector. // to insert and build vector. EVT EltVT = VT.getVectorElementType(); - EVT IdxVT = TLI->getVectorIdxTy(); + EVT IdxVT = TLI.getVectorIdxTy(); SmallVector<SDValue,8> Ops; for (unsigned i = 0; i != MaskNumElts; ++i) { int Idx = Mask[i]; @@ -3262,16 +3329,22 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); - const TargetLowering *TLI = TM.getTargetLowering(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector<EVT, 4> AggValueVTs; - ComputeValueVTs(*TLI, AggTy, AggValueVTs); + ComputeValueVTs(TLI, AggTy, AggValueVTs); SmallVector<EVT, 4> ValValueVTs; - ComputeValueVTs(*TLI, ValTy, ValValueVTs); + ComputeValueVTs(TLI, ValTy, ValValueVTs); unsigned NumAggValues = AggValueVTs.size(); unsigned NumValValues = ValValueVTs.size(); SmallVector<SDValue, 4> Values(NumAggValues); + // Ignore an insertvalue that produces an empty object + if (!NumAggValues) { + setValue(&I, DAG.getUNDEF(MVT(MVT::Other))); + return; + } + SDValue Agg = getValue(Op0); unsigned i = 0; // Copy the beginning value(s) from the original aggregate. @@ -3302,9 +3375,9 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); - const TargetLowering *TLI = TM.getTargetLowering(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector<EVT, 4> ValValueVTs; - ComputeValueVTs(*TLI, ValTy, ValValueVTs); + ComputeValueVTs(TLI, ValTy, ValValueVTs); unsigned NumValValues = ValValueVTs.size(); @@ -3353,13 +3426,13 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { Ty = cast<SequentialType>(Ty)->getElementType(); // If this is a constant subscript, handle it quickly. - const TargetLowering *TLI = TM.getTargetLowering(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) { if (CI->isZero()) continue; uint64_t Offs = DL->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue(); SDValue OffsVal; - EVT PTy = TLI->getPointerTy(AS); + EVT PTy = TLI.getPointerTy(AS); unsigned PtrBits = PTy.getSizeInBits(); if (PtrBits < 64) OffsVal = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), PTy, @@ -3373,8 +3446,8 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { } // N = N + Idx * ElementSize; - APInt ElementSize = APInt(TLI->getPointerSizeInBits(AS), - DL->getTypeAllocSize(Ty)); + APInt ElementSize = + APInt(TLI.getPointerSizeInBits(AS), DL->getTypeAllocSize(Ty)); SDValue IdxN = getValue(Idx); // If the index is smaller or larger than intptr_t, truncate or extend @@ -3411,15 +3484,15 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { return; // getValue will auto-populate this. Type *Ty = I.getAllocatedType(); - const TargetLowering *TLI = TM.getTargetLowering(); - uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty); unsigned Align = - std::max((unsigned)TLI->getDataLayout()->getPrefTypeAlignment(Ty), - I.getAlignment()); + std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty), + I.getAlignment()); SDValue AllocSize = getValue(I.getArraySize()); - EVT IntPtr = TLI->getPointerTy(); + EVT IntPtr = TLI.getPointerTy(); if (AllocSize.getValueType() != IntPtr) AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurSDLoc(), IntPtr); @@ -3430,7 +3503,8 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { // Handle alignment. If the requested alignment is less than or equal to // the stack alignment, ignore it. If the size is greater than or equal to // the stack alignment, we note this in the DYNAMIC_STACKALLOC node. - unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); + unsigned StackAlign = + DAG.getSubtarget().getFrameLowering()->getStackAlignment(); if (Align <= StackAlign) Align = 0; @@ -3464,15 +3538,18 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { Type *Ty = I.getType(); bool isVolatile = I.isVolatile(); - bool isNonTemporal = I.getMetadata("nontemporal") != nullptr; - bool isInvariant = I.getMetadata("invariant.load") != nullptr; + bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr; + bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr; unsigned Alignment = I.getAlignment(); - const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa); + + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector<EVT, 4> ValueVTs; SmallVector<uint64_t, 4> Offsets; - ComputeValueVTs(*TM.getTargetLowering(), Ty, ValueVTs, &Offsets); + ComputeValueVTs(TLI, Ty, ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; @@ -3483,7 +3560,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { // Serialize volatile loads with other side effects. Root = getRoot(); else if (AA->pointsToConstantMemory( - AliasAnalysis::Location(SV, AA->getTypeStoreSize(Ty), TBAAInfo))) { + AliasAnalysis::Location(SV, AA->getTypeStoreSize(Ty), AAInfo))) { // Do not serialize (non-volatile) loads of constant memory with anything. Root = DAG.getEntryNode(); ConstantMemory = true; @@ -3492,9 +3569,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { Root = DAG.getRoot(); } - const TargetLowering *TLI = TM.getTargetLowering(); if (isVolatile) - Root = TLI->prepareVolatileOrAtomicLoad(Root, getCurSDLoc(), DAG); + Root = TLI.prepareVolatileOrAtomicLoad(Root, getCurSDLoc(), DAG); SmallVector<SDValue, 4> Values(NumValues); SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains), @@ -3520,7 +3596,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { DAG.getConstant(Offsets[i], PtrVT)); SDValue L = DAG.getLoad(ValueVTs[i], getCurSDLoc(), Root, A, MachinePointerInfo(SV, Offsets[i]), isVolatile, - isNonTemporal, isInvariant, Alignment, TBAAInfo, + isNonTemporal, isInvariant, Alignment, AAInfo, Ranges); Values[i] = L; @@ -3549,7 +3625,8 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { SmallVector<EVT, 4> ValueVTs; SmallVector<uint64_t, 4> Offsets; - ComputeValueVTs(*TM.getTargetLowering(), SrcV->getType(), ValueVTs, &Offsets); + ComputeValueVTs(DAG.getTargetLoweringInfo(), SrcV->getType(), + ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; @@ -3565,9 +3642,11 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { NumValues)); EVT PtrVT = Ptr.getValueType(); bool isVolatile = I.isVolatile(); - bool isNonTemporal = I.getMetadata("nontemporal") != nullptr; + bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr; unsigned Alignment = I.getAlignment(); - const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa); + + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); unsigned ChainI = 0; for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { @@ -3583,7 +3662,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { SDValue St = DAG.getStore(Root, getCurSDLoc(), SDValue(Src.getNode(), Src.getResNo() + i), Add, MachinePointerInfo(PtrV, Offsets[i]), - isVolatile, isNonTemporal, Alignment, TBAAInfo); + isVolatile, isNonTemporal, Alignment, AAInfo); Chains[ChainI] = St; } @@ -3592,28 +3671,70 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { DAG.setRoot(StoreNode); } -static SDValue InsertFenceForAtomic(SDValue Chain, AtomicOrdering Order, - SynchronizationScope Scope, - bool Before, SDLoc dl, - SelectionDAG &DAG, - const TargetLowering &TLI) { - // Fence, if necessary - if (Before) { - if (Order == AcquireRelease || Order == SequentiallyConsistent) - Order = Release; - else if (Order == Acquire || Order == Monotonic || Order == Unordered) - return Chain; - } else { - if (Order == AcquireRelease) - Order = Acquire; - else if (Order == Release || Order == Monotonic || Order == Unordered) - return Chain; +void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) { + SDLoc sdl = getCurSDLoc(); + + // llvm.masked.store.*(Src0, Ptr, alignemt, Mask) + Value *PtrOperand = I.getArgOperand(1); + SDValue Ptr = getValue(PtrOperand); + SDValue Src0 = getValue(I.getArgOperand(0)); + SDValue Mask = getValue(I.getArgOperand(3)); + EVT VT = Src0.getValueType(); + unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(2)))->getZExtValue(); + if (!Alignment) + Alignment = DAG.getEVTAlignment(VT); + + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); + + MachineMemOperand *MMO = + DAG.getMachineFunction(). + getMachineMemOperand(MachinePointerInfo(PtrOperand), + MachineMemOperand::MOStore, VT.getStoreSize(), + Alignment, AAInfo); + SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, MMO); + DAG.setRoot(StoreNode); + setValue(&I, StoreNode); +} + +void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) { + SDLoc sdl = getCurSDLoc(); + + // @llvm.masked.load.*(Ptr, alignment, Mask, Src0) + Value *PtrOperand = I.getArgOperand(0); + SDValue Ptr = getValue(PtrOperand); + SDValue Src0 = getValue(I.getArgOperand(3)); + SDValue Mask = getValue(I.getArgOperand(2)); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT VT = TLI.getValueType(I.getType()); + unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(1)))->getZExtValue(); + if (!Alignment) + Alignment = DAG.getEVTAlignment(VT); + + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); + const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); + + SDValue InChain = DAG.getRoot(); + if (AA->pointsToConstantMemory( + AliasAnalysis::Location(PtrOperand, + AA->getTypeStoreSize(I.getType()), + AAInfo))) { + // Do not serialize (non-volatile) loads of constant memory with anything. + InChain = DAG.getEntryNode(); } - SDValue Ops[3]; - Ops[0] = Chain; - Ops[1] = DAG.getConstant(Order, TLI.getPointerTy()); - Ops[2] = DAG.getConstant(Scope, TLI.getPointerTy()); - return DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops); + + MachineMemOperand *MMO = + DAG.getMachineFunction(). + getMachineMemOperand(MachinePointerInfo(PtrOperand), + MachineMemOperand::MOLoad, VT.getStoreSize(), + Alignment, AAInfo, Ranges); + + SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, MMO); + SDValue OutChain = Load.getValue(1); + DAG.setRoot(OutChain); + setValue(&I, Load); } void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { @@ -3624,27 +3745,16 @@ void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { SDValue InChain = getRoot(); - const TargetLowering *TLI = TM.getTargetLowering(); - if (TLI->getInsertFencesForAtomic()) - InChain = InsertFenceForAtomic(InChain, SuccessOrder, Scope, true, dl, - DAG, *TLI); - MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType(); SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other); SDValue L = DAG.getAtomicCmpSwap( ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, MemVT, VTs, InChain, getValue(I.getPointerOperand()), getValue(I.getCompareOperand()), getValue(I.getNewValOperand()), MachinePointerInfo(I.getPointerOperand()), - 0 /* Alignment */, - TLI->getInsertFencesForAtomic() ? Monotonic : SuccessOrder, - TLI->getInsertFencesForAtomic() ? Monotonic : FailureOrder, Scope); + /*Alignment=*/ 0, SuccessOrder, FailureOrder, Scope); SDValue OutChain = L.getValue(2); - if (TLI->getInsertFencesForAtomic()) - OutChain = InsertFenceForAtomic(OutChain, SuccessOrder, Scope, false, dl, - DAG, *TLI); - setValue(&I, L); DAG.setRoot(OutChain); } @@ -3671,38 +3781,28 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { SDValue InChain = getRoot(); - const TargetLowering *TLI = TM.getTargetLowering(); - if (TLI->getInsertFencesForAtomic()) - InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl, - DAG, *TLI); - SDValue L = DAG.getAtomic(NT, dl, getValue(I.getValOperand()).getSimpleValueType(), InChain, getValue(I.getPointerOperand()), getValue(I.getValOperand()), - I.getPointerOperand(), 0 /* Alignment */, - TLI->getInsertFencesForAtomic() ? Monotonic : Order, - Scope); + I.getPointerOperand(), + /* Alignment=*/ 0, Order, Scope); SDValue OutChain = L.getValue(1); - if (TLI->getInsertFencesForAtomic()) - OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, - DAG, *TLI); - setValue(&I, L); DAG.setRoot(OutChain); } void SelectionDAGBuilder::visitFence(const FenceInst &I) { SDLoc dl = getCurSDLoc(); - const TargetLowering *TLI = TM.getTargetLowering(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Ops[3]; Ops[0] = getRoot(); - Ops[1] = DAG.getConstant(I.getOrdering(), TLI->getPointerTy()); - Ops[2] = DAG.getConstant(I.getSynchScope(), TLI->getPointerTy()); + Ops[1] = DAG.getConstant(I.getOrdering(), TLI.getPointerTy()); + Ops[2] = DAG.getConstant(I.getSynchScope(), TLI.getPointerTy()); DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops)); } @@ -3713,8 +3813,8 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { SDValue InChain = getRoot(); - const TargetLowering *TLI = TM.getTargetLowering(); - EVT VT = TLI->getValueType(I.getType()); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT VT = TLI.getValueType(I.getType()); if (I.getAlignment() < VT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic load"); @@ -3728,19 +3828,14 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { I.getAlignment() ? I.getAlignment() : DAG.getEVTAlignment(VT)); - InChain = TLI->prepareVolatileOrAtomicLoad(InChain, dl, DAG); + InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG); SDValue L = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain, getValue(I.getPointerOperand()), MMO, - TLI->getInsertFencesForAtomic() ? Monotonic : Order, - Scope); + Order, Scope); SDValue OutChain = L.getValue(1); - if (TLI->getInsertFencesForAtomic()) - OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, - DAG, *TLI); - setValue(&I, L); DAG.setRoot(OutChain); } @@ -3753,28 +3848,19 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { SDValue InChain = getRoot(); - const TargetLowering *TLI = TM.getTargetLowering(); - EVT VT = TLI->getValueType(I.getValueOperand()->getType()); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT VT = TLI.getValueType(I.getValueOperand()->getType()); if (I.getAlignment() < VT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic store"); - if (TLI->getInsertFencesForAtomic()) - InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl, - DAG, *TLI); - SDValue OutChain = DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT, InChain, getValue(I.getPointerOperand()), getValue(I.getValueOperand()), I.getPointerOperand(), I.getAlignment(), - TLI->getInsertFencesForAtomic() ? Monotonic : Order, - Scope); - - if (TLI->getInsertFencesForAtomic()) - OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, - DAG, *TLI); + Order, Scope); DAG.setRoot(OutChain); } @@ -3799,13 +3885,13 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, // Info is set by getTgtMemInstrinsic TargetLowering::IntrinsicInfo Info; - const TargetLowering *TLI = TM.getTargetLowering(); - bool IsTgtIntrinsic = TLI->getTgtMemIntrinsic(Info, I, Intrinsic); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic); // Add the intrinsic ID as an integer operand if it's not a target intrinsic. if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID || Info.opc == ISD::INTRINSIC_W_CHAIN) - Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI->getPointerTy())); + Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI.getPointerTy())); // Add all operands of the call to the operand list. for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { @@ -3814,7 +3900,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, } SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(*TLI, I.getType(), ValueVTs); + ComputeValueVTs(TLI, I.getType(), ValueVTs); if (HasChain) ValueVTs.push_back(MVT::Other); @@ -3829,7 +3915,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, VTs, Ops, Info.memVT, MachinePointerInfo(Info.ptrVal, Info.offset), Info.align, Info.vol, - Info.readMem, Info.writeMem); + Info.readMem, Info.writeMem, Info.size); } else if (!HasChain) { Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops); } else if (!I.getType()->isVoidTy()) { @@ -3848,7 +3934,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, if (!I.getType()->isVoidTy()) { if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) { - EVT VT = TLI->getValueType(PTy); + EVT VT = TLI.getValueType(PTy); Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result); } @@ -4555,16 +4641,17 @@ static unsigned getTruncatedArgReg(const SDValue &N) { /// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function /// argument, create the corresponding DBG_VALUE machine instruction for it now. /// At the end of instruction selection, they will be inserted to the entry BB. -bool -SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, - int64_t Offset, bool IsIndirect, - const SDValue &N) { +bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, + MDNode *Variable, + MDNode *Expr, int64_t Offset, + bool IsIndirect, + const SDValue &N) { const Argument *Arg = dyn_cast<Argument>(V); if (!Arg) return false; MachineFunction &MF = DAG.getMachineFunction(); - const TargetInstrInfo *TII = DAG.getTarget().getInstrInfo(); + const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); // Ignore inlined function arguments here. DIVariable DV(Variable); @@ -4610,14 +4697,16 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, return false; if (Op->isReg()) - FuncInfo.ArgDbgValues.push_back(BuildMI(MF, getCurDebugLoc(), - TII->get(TargetOpcode::DBG_VALUE), - IsIndirect, - Op->getReg(), Offset, Variable)); + FuncInfo.ArgDbgValues.push_back( + BuildMI(MF, getCurDebugLoc(), TII->get(TargetOpcode::DBG_VALUE), + IsIndirect, Op->getReg(), Offset, Variable, Expr)); else FuncInfo.ArgDbgValues.push_back( - BuildMI(MF, getCurDebugLoc(), TII->get(TargetOpcode::DBG_VALUE)) - .addOperand(*Op).addImm(Offset).addMetadata(Variable)); + BuildMI(MF, getCurDebugLoc(), TII->get(TargetOpcode::DBG_VALUE)) + .addOperand(*Op) + .addImm(Offset) + .addMetadata(Variable) + .addMetadata(Expr)); return true; } @@ -4635,7 +4724,7 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, /// otherwise lower it and return null. const char * SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { - const TargetLowering *TLI = TM.getTargetLowering(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDLoc sdl = getCurSDLoc(); DebugLoc dl = getCurDebugLoc(); SDValue Res; @@ -4649,17 +4738,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::vaend: visitVAEnd(I); return nullptr; case Intrinsic::vacopy: visitVACopy(I); return nullptr; case Intrinsic::returnaddress: - setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl, TLI->getPointerTy(), + setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl, TLI.getPointerTy(), getValue(I.getArgOperand(0)))); return nullptr; case Intrinsic::frameaddress: - setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, TLI->getPointerTy(), + setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, TLI.getPointerTy(), getValue(I.getArgOperand(0)))); return nullptr; case Intrinsic::read_register: { Value *Reg = I.getArgOperand(0); - SDValue RegName = DAG.getMDNode(cast<MDNode>(Reg)); - EVT VT = TM.getTargetLowering()->getValueType(I.getType()); + SDValue RegName = + DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata())); + EVT VT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::READ_REGISTER, sdl, VT, RegName)); return nullptr; } @@ -4667,15 +4757,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Value *Reg = I.getArgOperand(0); Value *RegValue = I.getArgOperand(1); SDValue Chain = getValue(RegValue).getOperand(0); - SDValue RegName = DAG.getMDNode(cast<MDNode>(Reg)); + SDValue RegName = + DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata())); DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain, RegName, getValue(RegValue))); return nullptr; } case Intrinsic::setjmp: - return &"_setjmp"[!TLI->usesUnderscoreSetJmp()]; + return &"_setjmp"[!TLI.usesUnderscoreSetJmp()]; case Intrinsic::longjmp: - return &"_longjmp"[!TLI->usesUnderscoreLongJmp()]; + return &"_longjmp"[!TLI.usesUnderscoreLongJmp()]; case Intrinsic::memcpy: { // Assert for address < 256 since we support only user defined address // spaces. @@ -4736,6 +4827,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::dbg_declare: { const DbgDeclareInst &DI = cast<DbgDeclareInst>(I); MDNode *Variable = DI.getVariable(); + MDNode *Expression = DI.getExpression(); const Value *Address = DI.getAddress(); DIVariable DIVar(Variable); assert((!DIVar || DIVar.isVariable()) && @@ -4771,16 +4863,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N.getNode()); if (FINode) // Byval parameter. We have a frame index at this point. - SDV = DAG.getFrameIndexDbgValue(Variable, FINode->getIndex(), - 0, dl, SDNodeOrder); + SDV = DAG.getFrameIndexDbgValue( + Variable, Expression, FINode->getIndex(), 0, dl, SDNodeOrder); else { // Address is an argument, so try to emit its dbg value using // virtual register info from the FuncInfo.ValueMap. - EmitFuncArgumentDbgValue(Address, Variable, 0, false, N); + EmitFuncArgumentDbgValue(Address, Variable, Expression, 0, false, N); return nullptr; } } else if (AI) - SDV = DAG.getDbgValue(Variable, N.getNode(), N.getResNo(), + SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(), true, 0, dl, SDNodeOrder); else { // Can't do anything with other non-AI cases yet. @@ -4793,7 +4885,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } else { // If Address is an argument then try to emit its dbg value using // virtual register info from the FuncInfo.ValueMap. - if (!EmitFuncArgumentDbgValue(Address, Variable, 0, false, N)) { + if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, 0, false, + N)) { // If variable is pinned by a alloca in dominating bb then // use StaticAllocaMap. if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) { @@ -4801,7 +4894,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DenseMap<const AllocaInst*, int>::iterator SI = FuncInfo.StaticAllocaMap.find(AI); if (SI != FuncInfo.StaticAllocaMap.end()) { - SDV = DAG.getFrameIndexDbgValue(Variable, SI->second, + SDV = DAG.getFrameIndexDbgValue(Variable, Expression, SI->second, 0, dl, SDNodeOrder); DAG.AddDbgValue(SDV, nullptr, false); return nullptr; @@ -4822,6 +4915,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; MDNode *Variable = DI.getVariable(); + MDNode *Expression = DI.getExpression(); uint64_t Offset = DI.getOffset(); const Value *V = DI.getValue(); if (!V) @@ -4829,7 +4923,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDDbgValue *SDV; if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V)) { - SDV = DAG.getConstantDbgValue(Variable, V, Offset, dl, SDNodeOrder); + SDV = DAG.getConstantDbgValue(Variable, Expression, V, Offset, dl, + SDNodeOrder); DAG.AddDbgValue(SDV, nullptr, false); } else { // Do not use getValue() in here; we don't want to generate code at @@ -4841,10 +4936,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (N.getNode()) { // A dbg.value for an alloca is always indirect. bool IsIndirect = isa<AllocaInst>(V) || Offset != 0; - if (!EmitFuncArgumentDbgValue(V, Variable, Offset, IsIndirect, N)) { - SDV = DAG.getDbgValue(Variable, N.getNode(), - N.getResNo(), IsIndirect, - Offset, dl, SDNodeOrder); + if (!EmitFuncArgumentDbgValue(V, Variable, Expression, Offset, + IsIndirect, N)) { + SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(), + IsIndirect, Offset, dl, SDNodeOrder); DAG.AddDbgValue(SDV, N.getNode(), false); } } else if (!V->use_empty() ) { @@ -4878,7 +4973,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::eh_typeid_for: { // Find the type id for the given typeinfo. - GlobalVariable *GV = ExtractTypeInfo(I.getArgOperand(0)); + GlobalValue *GV = ExtractTypeInfo(I.getArgOperand(0)); unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV); Res = DAG.getConstant(TypeID, MVT::i32); setValue(&I, Res); @@ -4899,15 +4994,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; case Intrinsic::eh_dwarf_cfa: { SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), sdl, - TLI->getPointerTy()); + TLI.getPointerTy()); SDValue Offset = DAG.getNode(ISD::ADD, sdl, CfaArg.getValueType(), DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, sdl, CfaArg.getValueType()), CfaArg); - SDValue FA = DAG.getNode(ISD::FRAMEADDR, sdl, - TLI->getPointerTy(), - DAG.getConstant(0, TLI->getPointerTy())); + SDValue FA = DAG.getNode(ISD::FRAMEADDR, sdl, TLI.getPointerTy(), + DAG.getConstant(0, TLI.getPointerTy())); setValue(&I, DAG.getNode(ISD::ADD, sdl, FA.getValueType(), FA, Offset)); return nullptr; @@ -4946,6 +5040,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } + case Intrinsic::masked_load: + visitMaskedLoad(I); + return nullptr; + case Intrinsic::masked_store: + visitMaskedStore(I); + return nullptr; case Intrinsic::x86_mmx_pslli_w: case Intrinsic::x86_mmx_pslli_d: case Intrinsic::x86_mmx_pslli_q: @@ -4997,7 +5097,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { ShOps[0] = ShAmt; ShOps[1] = DAG.getConstant(0, MVT::i32); ShAmt = DAG.getNode(ISD::BUILD_VECTOR, sdl, ShAmtVT, ShOps); - EVT DestVT = TLI->getValueType(I.getType()); + EVT DestVT = TLI.getValueType(I.getType()); ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt); Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT, DAG.getConstant(NewIntrinsic, MVT::i32), @@ -5009,14 +5109,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::x86_avx_vinsertf128_ps_256: case Intrinsic::x86_avx_vinsertf128_si_256: case Intrinsic::x86_avx2_vinserti128: { - EVT DestVT = TLI->getValueType(I.getType()); - EVT ElVT = TLI->getValueType(I.getArgOperand(1)->getType()); + EVT DestVT = TLI.getValueType(I.getType()); + EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType()); uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue() & 1) * ElVT.getVectorNumElements(); - Res = DAG.getNode(ISD::INSERT_SUBVECTOR, sdl, DestVT, - getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)), - DAG.getConstant(Idx, TLI->getVectorIdxTy())); + Res = + DAG.getNode(ISD::INSERT_SUBVECTOR, sdl, DestVT, + getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), + DAG.getConstant(Idx, TLI.getVectorIdxTy())); setValue(&I, Res); return nullptr; } @@ -5024,12 +5124,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::x86_avx_vextractf128_ps_256: case Intrinsic::x86_avx_vextractf128_si_256: case Intrinsic::x86_avx2_vextracti128: { - EVT DestVT = TLI->getValueType(I.getType()); + EVT DestVT = TLI.getValueType(I.getType()); uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(1))->getZExtValue() & 1) * DestVT.getVectorNumElements(); Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, sdl, DestVT, getValue(I.getArgOperand(0)), - DAG.getConstant(Idx, TLI->getVectorIdxTy())); + DAG.getConstant(Idx, TLI.getVectorIdxTy())); setValue(&I, Res); return nullptr; } @@ -5055,7 +5155,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::convertus: Code = ISD::CVT_US; break; case Intrinsic::convertuu: Code = ISD::CVT_UU; break; } - EVT DestVT = TLI->getValueType(I.getType()); + EVT DestVT = TLI.getValueType(I.getType()); const Value *Op1 = I.getArgOperand(0); Res = DAG.getConvertRndSat(DestVT, sdl, getValue(Op1), DAG.getValueType(DestVT), @@ -5071,23 +5171,23 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(1)), DAG)); return nullptr; case Intrinsic::log: - setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); + setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); return nullptr; case Intrinsic::log2: - setValue(&I, expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); + setValue(&I, expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); return nullptr; case Intrinsic::log10: - setValue(&I, expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); + setValue(&I, expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); return nullptr; case Intrinsic::exp: - setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); + setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); return nullptr; case Intrinsic::exp2: - setValue(&I, expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); + setValue(&I, expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); return nullptr; case Intrinsic::pow: setValue(&I, expandPow(sdl, getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)), DAG, *TLI)); + getValue(I.getArgOperand(1)), DAG, TLI)); return nullptr; case Intrinsic::sqrt: case Intrinsic::fabs: @@ -5119,6 +5219,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(0)))); return nullptr; } + case Intrinsic::minnum: + setValue(&I, DAG.getNode(ISD::FMINNUM, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)))); + return nullptr; + case Intrinsic::maxnum: + setValue(&I, DAG.getNode(ISD::FMAXNUM, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)))); + return nullptr; case Intrinsic::copysign: setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl, getValue(I.getArgOperand(0)).getValueType(), @@ -5133,9 +5245,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { getValue(I.getArgOperand(2)))); return nullptr; case Intrinsic::fmuladd: { - EVT VT = TLI->getValueType(I.getType()); + EVT VT = TLI.getValueType(I.getType()); if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && - TLI->isFMAFasterThanFMulAndFAdd(VT)) { + TLI.isFMAFasterThanFMulAndFAdd(VT)) { setValue(&I, DAG.getNode(ISD::FMA, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), @@ -5162,7 +5274,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; case Intrinsic::convert_from_fp16: setValue(&I, - DAG.getNode(ISD::FP_EXTEND, sdl, TLI->getValueType(I.getType()), + DAG.getNode(ISD::FP_EXTEND, sdl, TLI.getValueType(I.getType()), DAG.getNode(ISD::BITCAST, sdl, MVT::f16, getValue(I.getArgOperand(0))))); return nullptr; @@ -5209,7 +5321,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::stacksave: { SDValue Op = getRoot(); Res = DAG.getNode(ISD::STACKSAVE, sdl, - DAG.getVTList(TLI->getPointerTy(), MVT::Other), Op); + DAG.getVTList(TLI.getPointerTy(), MVT::Other), Op); setValue(&I, Res); DAG.setRoot(Res.getValue(1)); return nullptr; @@ -5223,9 +5335,44 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // Emit code into the DAG to store the stack guard onto the stack. MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - EVT PtrTy = TLI->getPointerTy(); + EVT PtrTy = TLI.getPointerTy(); + SDValue Src, Chain = getRoot(); + const Value *Ptr = cast<LoadInst>(I.getArgOperand(0))->getPointerOperand(); + const GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr); + + // See if Ptr is a bitcast. If it is, look through it and see if we can get + // global variable __stack_chk_guard. + if (!GV) + if (const Operator *BC = dyn_cast<Operator>(Ptr)) + if (BC->getOpcode() == Instruction::BitCast) + GV = dyn_cast<GlobalVariable>(BC->getOperand(0)); + + if (GV && TLI.useLoadStackGuardNode()) { + // Emit a LOAD_STACK_GUARD node. + MachineSDNode *Node = DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD, + sdl, PtrTy, Chain); + MachinePointerInfo MPInfo(GV); + MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(1); + unsigned Flags = MachineMemOperand::MOLoad | + MachineMemOperand::MOInvariant; + *MemRefs = MF.getMachineMemOperand(MPInfo, Flags, + PtrTy.getSizeInBits() / 8, + DAG.getEVTAlignment(PtrTy)); + Node->setMemRefs(MemRefs, MemRefs + 1); + + // Copy the guard value to a virtual register so that it can be + // retrieved in the epilogue. + Src = SDValue(Node, 0); + const TargetRegisterClass *RC = + TLI.getRegClassFor(Src.getSimpleValueType()); + unsigned Reg = MF.getRegInfo().createVirtualRegister(RC); + + SPDescriptor.setGuardReg(Reg); + Chain = DAG.getCopyToReg(Chain, sdl, Reg, Src); + } else { + Src = getValue(I.getArgOperand(0)); // The guard's value. + } - SDValue Src = getValue(I.getArgOperand(0)); // The guard's value. AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1)); int FI = FuncInfo.StaticAllocaMap[Slot]; @@ -5234,7 +5381,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue FIN = DAG.getFrameIndex(FI, PtrTy); // Store the stack protector onto the stack. - Res = DAG.getStore(getRoot(), sdl, Src, FIN, + Res = DAG.getStore(Chain, sdl, Src, FIN, MachinePointerInfo::getFixedStack(FI), true, false, 0); setValue(&I, Res); @@ -5263,8 +5410,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // Drop the intrinsic, but forward the value setValue(&I, getValue(I.getOperand(0))); return nullptr; + case Intrinsic::assume: case Intrinsic::var_annotation: - // Discard annotate attributes + // Discard annotate attributes and assumptions return nullptr; case Intrinsic::init_trampoline: { @@ -5285,7 +5433,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::adjust_trampoline: { setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl, - TLI->getPointerTy(), + TLI.getPointerTy(), getValue(I.getArgOperand(0)))); return nullptr; } @@ -5325,10 +5473,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(sdl).setChain(getRoot()) .setCallee(CallingConv::C, I.getType(), - DAG.getExternalSymbol(TrapFuncName.data(), TLI->getPointerTy()), + DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()), std::move(Args), 0); - std::pair<SDValue, SDValue> Result = TLI->LowerCallTo(CLI); + std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI); DAG.setRoot(Result.second); return nullptr; } @@ -5392,11 +5540,17 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (!LifetimeObject) continue; - int FI = FuncInfo.StaticAllocaMap[LifetimeObject]; + // First check that the Alloca is static, otherwise it won't have a + // valid frame index. + auto SI = FuncInfo.StaticAllocaMap.find(LifetimeObject); + if (SI == FuncInfo.StaticAllocaMap.end()) + return nullptr; + + int FI = SI->second; SDValue Ops[2]; Ops[0] = getRoot(); - Ops[1] = DAG.getFrameIndex(FI, TLI->getPointerTy(), true); + Ops[1] = DAG.getFrameIndex(FI, TLI.getPointerTy(), true); unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END); Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops); @@ -5406,7 +5560,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::invariant_start: // Discard region information. - setValue(&I, DAG.getUNDEF(TLI->getPointerTy())); + setValue(&I, DAG.getUNDEF(TLI.getPointerTy())); return nullptr; case Intrinsic::invariant_end: // Discard region information. @@ -5424,7 +5578,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } case Intrinsic::clear_cache: - return TLI->getClearCacheBuiltinName(); + return TLI.getClearCacheBuiltinName(); case Intrinsic::donothing: // ignore return nullptr; @@ -5434,41 +5588,85 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::experimental_patchpoint_void: case Intrinsic::experimental_patchpoint_i64: { - visitPatchpoint(I); + visitPatchpoint(&I); return nullptr; } + case Intrinsic::experimental_gc_statepoint: { + visitStatepoint(I); + return nullptr; } -} - -void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, - bool isTailCall, - MachineBasicBlock *LandingPad) { - const TargetLowering *TLI = TM.getTargetLowering(); - PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); - FunctionType *FTy = cast<FunctionType>(PT->getElementType()); - Type *RetTy = FTy->getReturnType(); - MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); - MCSymbol *BeginLabel = nullptr; - - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - Args.reserve(CS.arg_size()); + case Intrinsic::experimental_gc_result_int: + case Intrinsic::experimental_gc_result_float: + case Intrinsic::experimental_gc_result_ptr: { + visitGCResult(I); + return nullptr; + } + case Intrinsic::experimental_gc_relocate: { + visitGCRelocate(I); + return nullptr; + } + case Intrinsic::instrprof_increment: + llvm_unreachable("instrprof failed to lower an increment"); - for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); - i != e; ++i) { - const Value *V = *i; + case Intrinsic::frameallocate: { + MachineFunction &MF = DAG.getMachineFunction(); + const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); + + // Do the allocation and map it as a normal value. + // FIXME: Maybe we should add this to the alloca map so that we don't have + // to register allocate it? + uint64_t Size = cast<ConstantInt>(I.getArgOperand(0))->getZExtValue(); + int Alloc = MF.getFrameInfo()->CreateFrameAllocation(Size); + MVT PtrVT = TLI.getPointerTy(0); + SDValue FIVal = DAG.getFrameIndex(Alloc, PtrVT); + setValue(&I, FIVal); + + // Directly emit a FRAME_ALLOC machine instr. Label assignment emission is + // the same on all targets. + MCSymbol *FrameAllocSym = + MF.getMMI().getContext().getOrCreateFrameAllocSymbol(MF.getName()); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl, + TII->get(TargetOpcode::FRAME_ALLOC)) + .addSym(FrameAllocSym) + .addFrameIndex(Alloc); - // Skip empty types - if (V->getType()->isEmptyTy()) - continue; + return nullptr; + } - SDValue ArgNode = getValue(V); - Entry.Node = ArgNode; Entry.Ty = V->getType(); + case Intrinsic::framerecover: { + // i8* @llvm.framerecover(i8* %fn, i8* %fp) + MachineFunction &MF = DAG.getMachineFunction(); + MVT PtrVT = TLI.getPointerTy(0); + + // Get the symbol that defines the frame offset. + Function *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts()); + MCSymbol *FrameAllocSym = + MF.getMMI().getContext().getOrCreateFrameAllocSymbol(Fn->getName()); + + // Create a TargetExternalSymbol for the label to avoid any target lowering + // that would make this PC relative. + StringRef Name = FrameAllocSym->getName(); + assert(Name.size() == strlen(Name.data()) && "not null terminated"); + SDValue OffsetSym = DAG.getTargetExternalSymbol(Name.data(), PtrVT); + SDValue OffsetVal = + DAG.getNode(ISD::FRAME_ALLOC_RECOVER, sdl, PtrVT, OffsetSym); + + // Add the offset to the FP. + Value *FP = I.getArgOperand(1); + SDValue FPVal = getValue(FP); + SDValue Add = DAG.getNode(ISD::ADD, sdl, PtrVT, FPVal, OffsetVal); + setValue(&I, Add); - // Skip the first return-type Attribute to get to params. - Entry.setAttributes(&CS, i - CS.arg_begin() + 1); - Args.push_back(Entry); + return nullptr; } + } +} + +std::pair<SDValue, SDValue> +SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, + MachineBasicBlock *LandingPad) { + MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); + MCSymbol *BeginLabel = nullptr; if (LandingPad) { // Insert a label before the invoke call to mark the try range. This can be @@ -5490,24 +5688,17 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, // this call might not return. (void)getRoot(); DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getControlRoot(), BeginLabel)); - } - // Check if target-independent constraints permit a tail call here. - // Target-dependent constraints are checked within TLI->LowerCallTo. - if (isTailCall && !isInTailCallPosition(CS, DAG.getTarget())) - isTailCall = false; + CLI.setChain(getRoot()); + } - TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot()) - .setCallee(RetTy, FTy, Callee, std::move(Args), CS).setTailCall(isTailCall); + const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); + std::pair<SDValue, SDValue> Result = TLI->LowerCallTo(CLI); - std::pair<SDValue,SDValue> Result = TLI->LowerCallTo(CLI); - assert((isTailCall || Result.second.getNode()) && + assert((CLI.IsTailCall || Result.second.getNode()) && "Non-null chain expected with non-tail call!"); assert((Result.second.getNode() || !Result.first.getNode()) && "Null value expected with tail call!"); - if (Result.first.getNode()) - setValue(CS.getInstruction(), Result.first); if (!Result.second.getNode()) { // As a special case, a null chain means that a tail call has been emitted @@ -5530,6 +5721,50 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, // Inform MachineModuleInfo of range. MMI.addInvoke(LandingPad, BeginLabel, EndLabel); } + + return Result; +} + +void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, + bool isTailCall, + MachineBasicBlock *LandingPad) { + PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); + FunctionType *FTy = cast<FunctionType>(PT->getElementType()); + Type *RetTy = FTy->getReturnType(); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Args.reserve(CS.arg_size()); + + for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); + i != e; ++i) { + const Value *V = *i; + + // Skip empty types + if (V->getType()->isEmptyTy()) + continue; + + SDValue ArgNode = getValue(V); + Entry.Node = ArgNode; Entry.Ty = V->getType(); + + // Skip the first return-type Attribute to get to params. + Entry.setAttributes(&CS, i - CS.arg_begin() + 1); + Args.push_back(Entry); + } + + // Check if target-independent constraints permit a tail call here. + // Target-dependent constraints are checked within TLI->LowerCallTo. + if (isTailCall && !isInTailCallPosition(CS, DAG.getTarget())) + isTailCall = false; + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot()) + .setCallee(RetTy, FTy, Callee, std::move(Args), CS) + .setTailCall(isTailCall); + std::pair<SDValue,SDValue> Result = lowerInvokable(CLI, LandingPad); + + if (Result.first.getNode()) + setValue(CS.getInstruction(), Result.first); } /// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the @@ -5595,7 +5830,7 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I, SDValue Value, bool IsSigned) { - EVT VT = TM.getTargetLowering()->getValueType(I.getType(), true); + EVT VT = DAG.getTargetLoweringInfo().getValueType(I.getType(), true); if (IsSigned) Value = DAG.getSExtOrTrunc(Value, getCurSDLoc(), VT); else @@ -5620,7 +5855,7 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { const Value *Size = I.getArgOperand(2); const ConstantInt *CSize = dyn_cast<ConstantInt>(Size); if (CSize && CSize->getZExtValue() == 0) { - EVT CallVT = TM.getTargetLowering()->getValueType(I.getType(), true); + EVT CallVT = DAG.getTargetLoweringInfo().getValueType(I.getType(), true); setValue(&I, DAG.getConstant(0, CallVT)); return true; } @@ -5677,15 +5912,16 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { // Require that we can find a legal MVT, and only do this if the target // supports unaligned loads of that type. Expanding into byte loads would // bloat the code. - const TargetLowering *TLI = TM.getTargetLowering(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (ActuallyDoIt && CSize->getZExtValue() > 4) { unsigned DstAS = LHS->getType()->getPointerAddressSpace(); unsigned SrcAS = RHS->getType()->getPointerAddressSpace(); // TODO: Handle 5 byte compare as 4-byte + 1 byte. // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads. - if (!TLI->isTypeLegal(LoadVT) || - !TLI->allowsUnalignedMemoryAccesses(LoadVT, SrcAS) || - !TLI->allowsUnalignedMemoryAccesses(LoadVT, DstAS)) + // TODO: Check alignment of src and dest ptrs. + if (!TLI.isTypeLegal(LoadVT) || + !TLI.allowsMisalignedMemoryAccesses(LoadVT, SrcAS) || + !TLI.allowsMisalignedMemoryAccesses(LoadVT, DstAS)) ActuallyDoIt = false; } @@ -5863,6 +6099,26 @@ bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I, return true; } +/// visitBinaryFloatCall - If a call instruction is a binary floating-point +/// operation (as expected), translate it to an SDNode with the specified opcode +/// and return true. +bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I, + unsigned Opcode) { + // Sanity check that it really is a binary floating-point call. + if (I.getNumArgOperands() != 2 || + !I.getArgOperand(0)->getType()->isFloatingPointTy() || + I.getType() != I.getArgOperand(0)->getType() || + I.getType() != I.getArgOperand(1)->getType() || + !I.onlyReadsMemory()) + return false; + + SDValue Tmp0 = getValue(I.getArgOperand(0)); + SDValue Tmp1 = getValue(I.getArgOperand(1)); + EVT VT = Tmp0.getValueType(); + setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), VT, Tmp0, Tmp1)); + return true; +} + void SelectionDAGBuilder::visitCall(const CallInst &I) { // Handle inline assembly differently. if (isa<InlineAsm>(I.getCalledValue())) { @@ -5919,6 +6175,18 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { if (visitUnaryFloatCall(I, ISD::FABS)) return; break; + case LibFunc::fmin: + case LibFunc::fminf: + case LibFunc::fminl: + if (visitBinaryFloatCall(I, ISD::FMINNUM)) + return; + break; + case LibFunc::fmax: + case LibFunc::fmaxf: + case LibFunc::fmaxl: + if (visitBinaryFloatCall(I, ISD::FMAXNUM)) + return; + break; case LibFunc::sin: case LibFunc::sinf: case LibFunc::sinl: @@ -6025,7 +6293,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { Callee = getValue(I.getCalledValue()); else Callee = DAG.getExternalSymbol(RenameFn, - TM.getTargetLowering()->getPointerTy()); + DAG.getTargetLoweringInfo().getPointerTy()); // Check if we can potentially perform a tail call. More detailed checking is // be done within LowerCallTo, after more information about the call is known. @@ -6220,9 +6488,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { /// ConstraintOperands - Information about all of the constraints. SDISelAsmOperandInfoVector ConstraintOperands; - const TargetLowering *TLI = TM.getTargetLowering(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); TargetLowering::AsmOperandInfoVector - TargetConstraints = TLI->ParseConstraints(CS); + TargetConstraints = TLI.ParseConstraints(CS); bool hasMemory = false; @@ -6247,10 +6515,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // corresponding argument. assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); if (StructType *STy = dyn_cast<StructType>(CS.getType())) { - OpVT = TLI->getSimpleValueType(STy->getElementType(ResNo)); + OpVT = TLI.getSimpleValueType(STy->getElementType(ResNo)); } else { assert(ResNo == 0 && "Asm only has one result!"); - OpVT = TLI->getSimpleValueType(CS.getType()); + OpVT = TLI.getSimpleValueType(CS.getType()); } ++ResNo; break; @@ -6271,8 +6539,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); } - OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), *TLI, DL). - getSimpleVT(); + OpVT = + OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, DL).getSimpleVT(); } OpInfo.ConstraintVT = OpVT; @@ -6283,7 +6551,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { else { for (unsigned j = 0, ee = OpInfo.Codes.size(); j != ee; ++j) { TargetLowering::ConstraintType - CType = TLI->getConstraintType(OpInfo.Codes[j]); + CType = TLI.getConstraintType(OpInfo.Codes[j]); if (CType == TargetLowering::C_Memory) { hasMemory = true; break; @@ -6315,10 +6583,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { if (OpInfo.ConstraintVT != Input.ConstraintVT) { std::pair<unsigned, const TargetRegisterClass*> MatchRC = - TLI->getRegForInlineAsmConstraint(OpInfo.ConstraintCode, + TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode, OpInfo.ConstraintVT); std::pair<unsigned, const TargetRegisterClass*> InputRC = - TLI->getRegForInlineAsmConstraint(Input.ConstraintCode, + TLI.getRegForInlineAsmConstraint(Input.ConstraintCode, Input.ConstraintVT); if ((OpInfo.ConstraintVT.isInteger() != Input.ConstraintVT.isInteger()) || @@ -6332,7 +6600,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } // Compute the constraint code and ConstraintType to use. - TLI->ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG); + TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG); if (OpInfo.ConstraintType == TargetLowering::C_Memory && OpInfo.Type == InlineAsm::isClobber) @@ -6360,16 +6628,16 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) || isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) { OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal), - TLI->getPointerTy()); + TLI.getPointerTy()); } else { // Otherwise, create a stack slot and emit a store to it before the // asm. Type *Ty = OpVal->getType(); - uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty); - unsigned Align = TLI->getDataLayout()->getPrefTypeAlignment(Ty); + uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty); + unsigned Align = TLI.getDataLayout()->getPrefTypeAlignment(Ty); MachineFunction &MF = DAG.getMachineFunction(); int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); - SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI->getPointerTy()); + SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy()); Chain = DAG.getStore(Chain, getCurSDLoc(), OpInfo.CallOperand, StackSlot, MachinePointerInfo::getFixedStack(SSFI), @@ -6387,7 +6655,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // If this constraint is for a specific register, allocate it before // anything else. if (OpInfo.ConstraintType == TargetLowering::C_Register) - GetRegistersForValue(DAG, *TLI, getCurSDLoc(), OpInfo); + GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo); } // Second pass - Loop over all of the operands, assigning virtual or physregs @@ -6398,7 +6666,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // C_Register operands have already been allocated, Other/Memory don't need // to be. if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass) - GetRegistersForValue(DAG, *TLI, getCurSDLoc(), OpInfo); + GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo); } // AsmNodeOperands - The operands for the ISD::INLINEASM node. @@ -6406,7 +6674,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { AsmNodeOperands.push_back(SDValue()); // reserve space for input chain AsmNodeOperands.push_back( DAG.getTargetExternalSymbol(IA->getAsmString().c_str(), - TLI->getPointerTy())); + TLI.getPointerTy())); // If we have a !srcloc metadata node associated with it, we want to attach // this to the ultimately generated inline asm machineinstr. To do this, we @@ -6429,7 +6697,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; // Compute the constraint code and ConstraintType to use. - TLI->ComputeConstraintToUse(OpInfo, SDValue()); + TLI.ComputeConstraintToUse(OpInfo, SDValue()); // Ideally, we would only check against memory constraints. However, the // meaning of an other constraint can be target-specific and we can't easily @@ -6447,7 +6715,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo, - TLI->getPointerTy())); + TLI.getPointerTy())); // Loop over all of the inputs, copying the operand values into the // appropriate registers and processing the output regs. @@ -6469,7 +6737,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Add information to the INLINEASM node to know about this output. unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, - TLI->getPointerTy())); + TLI.getPointerTy())); AsmNodeOperands.push_back(OpInfo.CallOperand); break; } @@ -6549,7 +6817,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo(); for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag); i != e; ++i) { - if (const TargetRegisterClass *RC = TLI->getRegClassFor(RegVT)) + if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT)) MatchedRegs.Regs.push_back(RegInfo.createVirtualRegister(RC)); else { LLVMContext &Ctx = *DAG.getContext(); @@ -6576,7 +6844,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag, OpInfo.getMatchedOperand()); AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag, - TLI->getPointerTy())); + TLI.getPointerTy())); AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]); break; } @@ -6588,7 +6856,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { if (OpInfo.ConstraintType == TargetLowering::C_Other) { std::vector<SDValue> Ops; - TLI->LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode, + TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode, Ops, DAG); if (Ops.empty()) { LLVMContext &Ctx = *DAG.getContext(); @@ -6602,20 +6870,20 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size()); AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, - TLI->getPointerTy())); + TLI.getPointerTy())); AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end()); break; } if (OpInfo.ConstraintType == TargetLowering::C_Memory) { assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!"); - assert(InOperandVal.getValueType() == TLI->getPointerTy() && + assert(InOperandVal.getValueType() == TLI.getPointerTy() && "Memory operands expect pointer values"); // Add information to the INLINEASM node to know about this input. unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, - TLI->getPointerTy())); + TLI.getPointerTy())); AsmNodeOperands.push_back(InOperandVal); break; } @@ -6678,7 +6946,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // FIXME: Why don't we do this for inline asms with MRVs? if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) { - EVT ResultType = TLI->getValueType(CS.getType()); + EVT ResultType = TLI.getValueType(CS.getType()); // If any of the results of the inline asm is a vector, it may have the // wrong width/num elts. This can happen for register classes that can @@ -6743,9 +7011,9 @@ void SelectionDAGBuilder::visitVAStart(const CallInst &I) { } void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { - const TargetLowering *TLI = TM.getTargetLowering(); - const DataLayout &DL = *TLI->getDataLayout(); - SDValue V = DAG.getVAArg(TLI->getValueType(I.getType()), getCurSDLoc(), + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const DataLayout &DL = *TLI.getDataLayout(); + SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurSDLoc(), getRoot(), getValue(I.getOperand(0)), DAG.getSrcValue(I.getOperand(0)), DL.getABITypeAlignment(I.getType())); @@ -6777,18 +7045,19 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) { /// convention or require stack pointer adjustment. Only a subset of the /// intrinsic's operands need to participate in the calling convention. std::pair<SDValue, SDValue> -SelectionDAGBuilder::LowerCallOperands(const CallInst &CI, unsigned ArgIdx, +SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx, unsigned NumArgs, SDValue Callee, - bool useVoidTy) { + bool UseVoidTy, + MachineBasicBlock *LandingPad, + bool IsPatchPoint) { TargetLowering::ArgListTy Args; Args.reserve(NumArgs); // Populate the argument list. // Attributes for args start at offset 1, after the return attribute. - ImmutableCallSite CS(&CI); for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs, AttrI = ArgIdx + 1; ArgI != ArgE; ++ArgI) { - const Value *V = CI.getOperand(ArgI); + const Value *V = CS->getOperand(ArgI); assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic."); @@ -6799,14 +7068,13 @@ SelectionDAGBuilder::LowerCallOperands(const CallInst &CI, unsigned ArgIdx, Args.push_back(Entry); } - Type *retTy = useVoidTy ? Type::getVoidTy(*DAG.getContext()) : CI.getType(); + Type *retTy = UseVoidTy ? Type::getVoidTy(*DAG.getContext()) : CS->getType(); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot()) - .setCallee(CI.getCallingConv(), retTy, Callee, std::move(Args), NumArgs) - .setDiscardResult(!CI.use_empty()); + .setCallee(CS.getCallingConv(), retTy, Callee, std::move(Args), NumArgs) + .setDiscardResult(CS->use_empty()).setIsPatchPoint(IsPatchPoint); - const TargetLowering *TLI = TM.getTargetLowering(); - return TLI->LowerCallTo(CLI); + return lowerInvokable(CLI, LandingPad); } /// \brief Add a stack map intrinsic call's live variable operands to a stackmap @@ -6826,11 +7094,11 @@ SelectionDAGBuilder::LowerCallOperands(const CallInst &CI, unsigned ArgIdx, /// assumption made by the llvm.gcroot intrinsic). If the alloca's location were /// only available in a register, then the runtime would need to trap when /// execution reaches the StackMap in order to read the alloca's location. -static void addStackMapLiveVars(const CallInst &CI, unsigned StartIdx, +static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx, SmallVectorImpl<SDValue> &Ops, SelectionDAGBuilder &Builder) { - for (unsigned i = StartIdx, e = CI.getNumArgOperands(); i != e; ++i) { - SDValue OpVal = Builder.getValue(CI.getArgOperand(i)); + for (unsigned i = StartIdx, e = CS.arg_size(); i != e; ++i) { + SDValue OpVal = Builder.getValue(CS.getArgument(i)); if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) { Ops.push_back( Builder.DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64)); @@ -6881,7 +7149,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { cast<ConstantSDNode>(NBytesVal)->getZExtValue(), MVT::i32)); // Push live variables for the stack map. - addStackMapLiveVars(CI, 2, Ops, *this); + addStackMapLiveVars(&CI, 2, Ops, *this); // We are not pushing any register mask info here on the operands list, // because the stackmap doesn't clobber anything. @@ -6908,7 +7176,8 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { } /// \brief Lower llvm.experimental.patchpoint directly to its target opcode. -void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { +void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, + MachineBasicBlock *LandingPad) { // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>, // i32 <numBytes>, // i8* <target>, @@ -6916,32 +7185,29 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { // [Args...], // [live variables...]) - CallingConv::ID CC = CI.getCallingConv(); - bool isAnyRegCC = CC == CallingConv::AnyReg; - bool hasDef = !CI.getType()->isVoidTy(); - SDValue Callee = getValue(CI.getOperand(2)); // <target> + CallingConv::ID CC = CS.getCallingConv(); + bool IsAnyRegCC = CC == CallingConv::AnyReg; + bool HasDef = !CS->getType()->isVoidTy(); + SDValue Callee = getValue(CS->getOperand(2)); // <target> // Get the real number of arguments participating in the call <numArgs> - SDValue NArgVal = getValue(CI.getArgOperand(PatchPointOpers::NArgPos)); + SDValue NArgVal = getValue(CS.getArgument(PatchPointOpers::NArgPos)); unsigned NumArgs = cast<ConstantSDNode>(NArgVal)->getZExtValue(); // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs> // Intrinsics include all meta-operands up to but not including CC. unsigned NumMetaOpers = PatchPointOpers::CCPos; - assert(CI.getNumArgOperands() >= NumMetaOpers + NumArgs && + assert(CS.arg_size() >= NumMetaOpers + NumArgs && "Not enough arguments provided to the patchpoint intrinsic"); // For AnyRegCC the arguments are lowered later on manually. - unsigned NumCallArgs = isAnyRegCC ? 0 : NumArgs; + unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs; std::pair<SDValue, SDValue> Result = - LowerCallOperands(CI, NumMetaOpers, NumCallArgs, Callee, isAnyRegCC); - - // Set the root to the target-lowered call chain. - SDValue Chain = Result.second; - DAG.setRoot(Chain); + lowerCallOperands(CS, NumMetaOpers, NumCallArgs, Callee, IsAnyRegCC, + LandingPad, true); - SDNode *CallEnd = Chain.getNode(); - if (hasDef && (CallEnd->getOpcode() == ISD::CopyFromReg)) + SDNode *CallEnd = Result.second.getNode(); + if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg)) CallEnd = CallEnd->getOperand(0).getNode(); /// Get a call instruction from the call sequence chain. @@ -6949,16 +7215,16 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && "Expected a callseq node."); SDNode *Call = CallEnd->getOperand(0).getNode(); - bool hasGlue = Call->getGluedNode(); + bool HasGlue = Call->getGluedNode(); // Replace the target specific call node with the patchable intrinsic. SmallVector<SDValue, 8> Ops; // Add the <id> and <numBytes> constants. - SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos)); + SDValue IDVal = getValue(CS->getOperand(PatchPointOpers::IDPos)); Ops.push_back(DAG.getTargetConstant( cast<ConstantSDNode>(IDVal)->getZExtValue(), MVT::i64)); - SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos)); + SDValue NBytesVal = getValue(CS->getOperand(PatchPointOpers::NBytesPos)); Ops.push_back(DAG.getTargetConstant( cast<ConstantSDNode>(NBytesVal)->getZExtValue(), MVT::i32)); @@ -6971,8 +7237,8 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { // Adjust <numArgs> to account for any arguments that have been passed on the // stack instead. // Call Node: Chain, Target, {Args}, RegMask, [Glue] - unsigned NumCallRegArgs = Call->getNumOperands() - (hasGlue ? 4 : 3); - NumCallRegArgs = isAnyRegCC ? NumArgs : NumCallRegArgs; + unsigned NumCallRegArgs = Call->getNumOperands() - (HasGlue ? 4 : 3); + NumCallRegArgs = IsAnyRegCC ? NumArgs : NumCallRegArgs; Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, MVT::i32)); // Add the calling convention @@ -6980,20 +7246,20 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { // Add the arguments we omitted previously. The register allocator should // place these in any free register. - if (isAnyRegCC) + if (IsAnyRegCC) for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i) - Ops.push_back(getValue(CI.getArgOperand(i))); + Ops.push_back(getValue(CS.getArgument(i))); // Push the arguments from the call instruction up to the register mask. - SDNode::op_iterator e = hasGlue ? Call->op_end()-2 : Call->op_end()-1; + SDNode::op_iterator e = HasGlue ? Call->op_end()-2 : Call->op_end()-1; for (SDNode::op_iterator i = Call->op_begin()+2; i != e; ++i) Ops.push_back(*i); // Push live variables for the stack map. - addStackMapLiveVars(CI, NumMetaOpers + NumArgs, Ops, *this); + addStackMapLiveVars(CS, NumMetaOpers + NumArgs, Ops, *this); // Push the register mask info. - if (hasGlue) + if (HasGlue) Ops.push_back(*(Call->op_end()-2)); else Ops.push_back(*(Call->op_end()-1)); @@ -7003,15 +7269,15 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { Ops.push_back(*(Call->op_begin())); // Push the glue flag (last operand). - if (hasGlue) + if (HasGlue) Ops.push_back(*(Call->op_end()-1)); SDVTList NodeTys; - if (isAnyRegCC && hasDef) { + if (IsAnyRegCC && HasDef) { // Create the return types based on the intrinsic definition const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector<EVT, 3> ValueVTs; - ComputeValueVTs(TLI, CI.getType(), ValueVTs); + ComputeValueVTs(TLI, CS->getType(), ValueVTs); assert(ValueVTs.size() == 1 && "Expected only one return value type."); // There is always a chain and a glue type at the end @@ -7026,18 +7292,18 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { getCurSDLoc(), NodeTys, Ops); // Update the NodeMap. - if (hasDef) { - if (isAnyRegCC) - setValue(&CI, SDValue(MN, 0)); + if (HasDef) { + if (IsAnyRegCC) + setValue(CS.getInstruction(), SDValue(MN, 0)); else - setValue(&CI, Result.first); + setValue(CS.getInstruction(), Result.first); } // Fixup the consumers of the intrinsic. The chain and glue may be used in the // call sequence. Furthermore the location of the chain and glue can change // when the AnyReg calling convention is used and the intrinsic returns a // value. - if (isAnyRegCC && hasDef) { + if (IsAnyRegCC && HasDef) { SDValue From[] = {SDValue(Call, 0), SDValue(Call, 1)}; SDValue To[] = {SDValue(MN, 1), SDValue(MN, 2)}; DAG.ReplaceAllUsesOfValuesWith(From, To, 2); @@ -7186,8 +7452,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { } if (Args[i].isNest) Flags.setNest(); - if (NeedsRegBlock) + if (NeedsRegBlock) { Flags.setInConsecutiveRegs(); + if (Value == NumValues - 1) + Flags.setInConsecutiveRegsLast(); + } Flags.setOrigAlign(OriginalAlignment); MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT); @@ -7233,10 +7502,6 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { else if (j != 0) MyFlags.Flags.setOrigAlign(1); - // Only mark the end at the last register of the last value. - if (NeedsRegBlock && Value == NumValues - 1 && j == NumParts - 1) - MyFlags.Flags.setInConsecutiveRegsLast(); - CLI.Outs.push_back(MyFlags); CLI.OutVals.push_back(Parts[j]); } @@ -7349,10 +7614,15 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { "Copy from a reg to the same reg!"); assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg"); - const TargetLowering *TLI = TM.getTargetLowering(); - RegsForValue RFV(V->getContext(), *TLI, Reg, V->getType()); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + RegsForValue RFV(V->getContext(), TLI, Reg, V->getType()); SDValue Chain = DAG.getEntryNode(); - RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V); + + ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) == + FuncInfo.PreferredExtendType.end()) + ? ISD::ANY_EXTEND + : FuncInfo.PreferredExtendType[V]; + RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V, ExtendType); PendingExports.push_back(Chain); } @@ -7378,15 +7648,13 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) { void SelectionDAGISel::LowerArguments(const Function &F) { SelectionDAG &DAG = SDB->DAG; SDLoc dl = SDB->getCurSDLoc(); - const TargetLowering *TLI = getTargetLowering(); const DataLayout *DL = TLI->getDataLayout(); SmallVector<ISD::InputArg, 16> Ins; if (!FuncInfo->CanLowerReturn) { // Put in an sret pointer parameter before all the other parameters. SmallVector<EVT, 1> ValueVTs; - ComputeValueVTs(*getTargetLowering(), - PointerType::getUnqual(F.getReturnType()), ValueVTs); + ComputeValueVTs(*TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs); // NOTE: Assuming that a pointer will never break down to more than one VT // or one register. @@ -7451,8 +7719,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) { } if (F.getAttributes().hasAttribute(Idx, Attribute::Nest)) Flags.setNest(); - if (NeedsRegBlock) + if (NeedsRegBlock) { Flags.setInConsecutiveRegs(); + if (Value == NumValues - 1) + Flags.setInConsecutiveRegsLast(); + } Flags.setOrigAlign(OriginalAlignment); MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT); @@ -7465,11 +7736,6 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // if it isn't first piece, alignment must be 1 else if (i > 0) MyFlags.Flags.setOrigAlign(1); - - // Only mark the end at the last register of the last value. - if (NeedsRegBlock && Value == NumValues - 1 && i == NumRegs - 1) - MyFlags.Flags.setInConsecutiveRegsLast(); - Ins.push_back(MyFlags); } PartBase += VT.getStoreSize(); @@ -7478,9 +7744,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // Call the target to set up the argument values. SmallVector<SDValue, 8> InVals; - SDValue NewRoot = TLI->LowerFormalArguments(DAG.getRoot(), F.getCallingConv(), - F.isVarArg(), Ins, - dl, DAG, InVals); + SDValue NewRoot = TLI->LowerFormalArguments( + DAG.getRoot(), F.getCallingConv(), F.isVarArg(), Ins, dl, DAG, InVals); // Verify that the target's LowerFormalArguments behaved as expected. assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other && @@ -7517,8 +7782,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) { MachineRegisterInfo& RegInfo = MF.getRegInfo(); unsigned SRetReg = RegInfo.createVirtualRegister(TLI->getRegClassFor(RegVT)); FuncInfo->DemoteRegister = SRetReg; - NewRoot = SDB->DAG.getCopyToReg(NewRoot, SDB->getCurSDLoc(), - SRetReg, ArgValue); + NewRoot = + SDB->DAG.getCopyToReg(NewRoot, SDB->getCurSDLoc(), SRetReg, ArgValue); DAG.setRoot(NewRoot); // i indexes lowered arguments. Bump it past the hidden sret argument. @@ -7633,7 +7898,8 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // If this terminator has multiple identical successors (common for // switches), only handle each succ once. - if (!SuccsHandled.insert(SuccMBB)) continue; + if (!SuccsHandled.insert(SuccMBB).second) + continue; MachineBasicBlock::iterator MBBI = SuccMBB->begin(); @@ -7676,11 +7942,11 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // Remember that this register needs to added to the machine PHI node as // the input for this MBB. SmallVector<EVT, 4> ValueVTs; - const TargetLowering *TLI = TM.getTargetLowering(); - ComputeValueVTs(*TLI, PN->getType(), ValueVTs); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + ComputeValueVTs(TLI, PN->getType(), ValueVTs); for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { EVT VT = ValueVTs[vti]; - unsigned NumRegisters = TLI->getNumRegisters(*DAG.getContext(), VT); + unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT); for (unsigned i = 0, e = NumRegisters; i != e; ++i) FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i)); Reg += NumRegisters; @@ -7697,6 +7963,7 @@ MachineBasicBlock * SelectionDAGBuilder::StackProtectorDescriptor:: AddSuccessorMBB(const BasicBlock *BB, MachineBasicBlock *ParentMBB, + bool IsLikely, MachineBasicBlock *SuccMBB) { // If SuccBB has not been created yet, create it. if (!SuccMBB) { @@ -7706,6 +7973,7 @@ AddSuccessorMBB(const BasicBlock *BB, MF->insert(++BBI, SuccMBB); } // Add it as a successor of ParentMBB. - ParentMBB->addSuccessor(SuccMBB); + ParentMBB->addSuccessor( + SuccMBB, BranchProbabilityInfo::getBranchWeightStackProtector(IsLikely)); return SuccMBB; } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 84679f98d84e..eba98b8086b7 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -11,9 +11,10 @@ // //===----------------------------------------------------------------------===// -#ifndef SELECTIONDAGBUILDER_H -#define SELECTIONDAGBUILDER_H +#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_SELECTIONDAGBUILDER_H +#define LLVM_LIB_CODEGEN_SELECTIONDAG_SELECTIONDAGBUILDER_H +#include "StatepointLowering.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/SelectionDAG.h" @@ -21,6 +22,7 @@ #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetLowering.h" #include <vector> namespace llvm { @@ -114,6 +116,10 @@ public: /// get simple disambiguation between loads without worrying about alias /// analysis. SmallVector<SDValue, 8> PendingLoads; + + /// State used while lowering a statepoint sequence (gc_statepoint, + /// gc_relocate, and gc_result). See StatepointLowering.hpp/cpp for details. + StatepointLoweringState StatepointLowering; private: /// PendingExports - CopyToReg nodes that copy values to virtual registers @@ -200,7 +206,7 @@ private: } }; - size_t Clusterify(CaseVector &Cases, const SwitchInst &SI); + void Clusterify(CaseVector &Cases, const SwitchInst &SI); /// CaseBlock - This structure is used to communicate between /// SelectionDAGBuilder and SDISel for the code generation of additional basic @@ -276,9 +282,9 @@ private: BitTestBlock(APInt F, APInt R, const Value* SV, unsigned Rg, MVT RgVT, bool E, MachineBasicBlock* P, MachineBasicBlock* D, - const BitTestInfo& C): + BitTestInfo C): First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E), - Parent(P), Default(D), Cases(C) { } + Parent(P), Default(D), Cases(std::move(C)) { } APInt First; APInt Range; const Value *SValue; @@ -397,7 +403,8 @@ private: class StackProtectorDescriptor { public: StackProtectorDescriptor() : ParentMBB(nullptr), SuccessMBB(nullptr), - FailureMBB(nullptr), Guard(nullptr) { } + FailureMBB(nullptr), Guard(nullptr), + GuardReg(0) { } ~StackProtectorDescriptor() { } /// Returns true if all fields of the stack protector descriptor are @@ -415,8 +422,8 @@ private: assert(!shouldEmitStackProtector() && "Stack Protector Descriptor is " "already initialized!"); ParentMBB = MBB; - SuccessMBB = AddSuccessorMBB(BB, MBB); - FailureMBB = AddSuccessorMBB(BB, MBB, FailureMBB); + SuccessMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ true); + FailureMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ false, FailureMBB); if (!Guard) Guard = StackProtCheckCall.getArgOperand(0); } @@ -455,6 +462,9 @@ private: MachineBasicBlock *getFailureMBB() { return FailureMBB; } const Value *getGuard() { return Guard; } + unsigned getGuardReg() const { return GuardReg; } + void setGuardReg(unsigned R) { GuardReg = R; } + private: /// The basic block for which we are generating the stack protector. /// @@ -477,11 +487,15 @@ private: /// stack protector stack slot. const Value *Guard; + /// The virtual register holding the stack guard value. + unsigned GuardReg; + /// Add a successor machine basic block to ParentMBB. If the successor mbb /// has not been created yet (i.e. if SuccMBB = 0), then the machine basic - /// block will be created. + /// block will be created. Assign a large weight if IsLikely is true. MachineBasicBlock *AddSuccessorMBB(const BasicBlock *BB, MachineBasicBlock *ParentMBB, + bool IsLikely, MachineBasicBlock *SuccMBB = nullptr); }; @@ -604,6 +618,13 @@ public: N = NewN; } + void removeValue(const Value *V) { + // This is to support hack in lowerCallFromStatepoint + // Should be removed when hack is resolved + if (NodeMap.count(V)) + NodeMap.erase(V); + } + void setUnusedArgValue(const Value *V, SDValue NewN) { SDValue &N = UnusedArgNodeMap[V]; assert(!N.getNode() && "Already set a value for this node!"); @@ -626,17 +647,24 @@ public: void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall, MachineBasicBlock *LandingPad = nullptr); - std::pair<SDValue, SDValue> LowerCallOperands(const CallInst &CI, - unsigned ArgIdx, - unsigned NumArgs, - SDValue Callee, - bool useVoidTy = false); + std::pair<SDValue, SDValue> lowerCallOperands( + ImmutableCallSite CS, + unsigned ArgIdx, + unsigned NumArgs, + SDValue Callee, + bool UseVoidTy = false, + MachineBasicBlock *LandingPad = nullptr, + bool IsPatchPoint = false); /// UpdateSplitBlock - When an MBB was split during scheduling, update the /// references that need to refer to the last resulting block. void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last); private: + std::pair<SDValue, SDValue> lowerInvokable( + TargetLowering::CallLoweringInfo &CLI, + MachineBasicBlock *LandingPad); + // Terminator instructions. void visitRet(const ReturnInst &I); void visitBr(const BranchInst &I); @@ -658,7 +686,6 @@ private: bool handleBTSplitSwitchCase(CaseRec& CR, CaseRecVector& WorkList, const Value* SV, - MachineBasicBlock* Default, MachineBasicBlock *SwitchBB); bool handleBitTestsSwitchCase(CaseRec& CR, CaseRecVector& WorkList, @@ -686,6 +713,8 @@ public: void visitJumpTable(JumpTable &JT); void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH, MachineBasicBlock *SwitchBB); + unsigned visitLandingPadClauseBB(GlobalValue *ClauseGV, + MachineBasicBlock *LPadMBB); private: // These all get lowered before this pass. @@ -743,6 +772,8 @@ private: void visitAlloca(const AllocaInst &I); void visitLoad(const LoadInst &I); void visitStore(const StoreInst &I); + void visitMaskedLoad(const CallInst &I); + void visitMaskedStore(const CallInst &I); void visitAtomicCmpXchg(const AtomicCmpXchgInst &I); void visitAtomicRMW(const AtomicRMWInst &I); void visitFence(const FenceInst &I); @@ -755,6 +786,7 @@ private: bool visitStrLenCall(const CallInst &I); bool visitStrNLenCall(const CallInst &I); bool visitUnaryFloatCall(const CallInst &I, unsigned Opcode); + bool visitBinaryFloatCall(const CallInst &I, unsigned Opcode); void visitAtomicLoad(const LoadInst &I); void visitAtomicStore(const StoreInst &I); @@ -767,7 +799,13 @@ private: void visitVAEnd(const CallInst &I); void visitVACopy(const CallInst &I); void visitStackmap(const CallInst &I); - void visitPatchpoint(const CallInst &I); + void visitPatchpoint(ImmutableCallSite CS, + MachineBasicBlock *LandingPad = nullptr); + + // These three are implemented in StatepointLowering.cpp + void visitStatepoint(const CallInst &I); + void visitGCRelocate(const CallInst &I); + void visitGCResult(const CallInst &I); void visitUserOp1(const Instruction &I) { llvm_unreachable("UserOp1 should not exist at instruction selection time!"); @@ -784,7 +822,7 @@ private: /// EmitFuncArgumentDbgValue - If V is an function argument then create /// corresponding DBG_VALUE machine instruction for it now. At the end of /// instruction selection, they will be inserted to the entry BB. - bool EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, + bool EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, MDNode *Expr, int64_t Offset, bool IsIndirect, const SDValue &N); }; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index a71cc6859ea0..e8577d898c2d 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -27,6 +27,7 @@ #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; std::string SDNode::getOperationName(const SelectionDAG *G) const { @@ -36,7 +37,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { return "<<Unknown DAG Node>>"; if (isMachineOpcode()) { if (G) - if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo()) + if (const TargetInstrInfo *TII = G->getSubtarget().getInstrInfo()) if (getMachineOpcode() < TII->getNumOpcodes()) return TII->getName(getMachineOpcode()); return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>"; @@ -140,6 +141,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { // Unary operators case ISD::FABS: return "fabs"; + case ISD::FMINNUM: return "fminnum"; + case ISD::FMAXNUM: return "fmaxnum"; case ISD::FNEG: return "fneg"; case ISD::FSQRT: return "fsqrt"; case ISD::FSIN: return "fsin"; @@ -266,6 +269,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { // Other operators case ISD::LOAD: return "load"; case ISD::STORE: return "store"; + case ISD::MLOAD: return "masked_load"; + case ISD::MSTORE: return "masked_store"; case ISD::VAARG: return "vaarg"; case ISD::VACOPY: return "vacopy"; case ISD::VAEND: return "vaend"; @@ -433,7 +438,8 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { OS << LBB->getName() << " "; OS << (const void*)BBDN->getBasicBlock() << ">"; } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) { - OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :nullptr); + OS << ' ' << PrintReg(R->getReg(), + G ? G->getSubtarget().getRegisterInfo() : nullptr); } else if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(this)) { OS << "'" << ES->getSymbol() << "'"; @@ -565,7 +571,7 @@ void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const { typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet; static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent, const SelectionDAG *G, VisitedSDNodeSet &once) { - if (!once.insert(N)) // If we've been here before, return now. + if (!once.insert(N).second) // If we've been here before, return now. return; // Dump the current SDNode, but don't end the line yet. diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 57e22e21c371..4f031d3ff7e7 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -19,6 +19,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" @@ -40,6 +41,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -181,6 +183,10 @@ UseMBPI("use-mbpi", cl::init(true), cl::Hidden); #ifndef NDEBUG +static cl::opt<std::string> +FilterDAGBasicBlockName("filter-view-dags", cl::Hidden, + cl::desc("Only display the basic block whose name " + "matches this for all view-*-dags options")); static cl::opt<bool> ViewDAGCombine1("view-dag-combine1-dags", cl::Hidden, cl::desc("Pop up a window to show dags before the first " @@ -284,8 +290,8 @@ namespace llvm { /// for the target. ScheduleDAGSDNodes* createDefaultScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel) { - const TargetLowering *TLI = IS->getTargetLowering(); - const TargetSubtargetInfo &ST = IS->TM.getSubtarget<TargetSubtargetInfo>(); + const TargetLowering *TLI = IS->TLI; + const TargetSubtargetInfo &ST = IS->MF->getSubtarget(); if (OptLevel == CodeGenOpt::None || ST.useMachineScheduler() || TLI->getSchedulingPreference() == Sched::Source) @@ -336,7 +342,7 @@ void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, CodeGenOpt::Level OL) : MachineFunctionPass(ID), TM(tm), - FuncInfo(new FunctionLoweringInfo(TM)), + FuncInfo(new FunctionLoweringInfo()), CurDAG(new SelectionDAG(tm, OL)), SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)), GFI(), @@ -411,32 +417,32 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { "-fast-isel-abort requires -fast-isel"); const Function &Fn = *mf.getFunction(); - const TargetInstrInfo &TII = *TM.getInstrInfo(); - const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); - const TargetLowering *TLI = TM.getTargetLowering(); - MF = &mf; - RegInfo = &MF->getRegInfo(); - AA = &getAnalysis<AliasAnalysis>(); - LibInfo = &getAnalysis<TargetLibraryInfo>(); - GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr; - - TargetSubtargetInfo &ST = - const_cast<TargetSubtargetInfo&>(TM.getSubtarget<TargetSubtargetInfo>()); - ST.resetSubtargetFeatures(MF); - TM.resetTargetOptions(MF); + // Reset the target options before resetting the optimization + // level below. + // FIXME: This is a horrible hack and should be processed via + // codegen looking at the optimization level explicitly when + // it wants to look at it. + TM.resetTargetOptions(Fn); // Reset OptLevel to None for optnone functions. CodeGenOpt::Level NewOptLevel = OptLevel; if (Fn.hasFnAttribute(Attribute::OptimizeNone)) NewOptLevel = CodeGenOpt::None; OptLevelChanger OLC(*this, NewOptLevel); + TII = MF->getSubtarget().getInstrInfo(); + TLI = MF->getSubtarget().getTargetLowering(); + RegInfo = &MF->getRegInfo(); + AA = &getAnalysis<AliasAnalysis>(); + LibInfo = &getAnalysis<TargetLibraryInfo>(); + GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr; + DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), this); - CurDAG->init(*MF, TLI); + CurDAG->init(*MF); FuncInfo->set(Fn, *MF, CurDAG); if (UseMBPI && OptLevel != CodeGenOpt::None) @@ -454,7 +460,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // copied into vregs, emit the copies into the top of the block before // emitting the code for the block. MachineBasicBlock *EntryMBB = MF->begin(); - RegInfo->EmitLiveInCopies(EntryMBB, TRI, TII); + const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo(); + RegInfo->EmitLiveInCopies(EntryMBB, TRI, *TII); DenseMap<unsigned, unsigned> LiveInMap; if (!FuncInfo->ArgDbgValues.empty()) @@ -489,15 +496,14 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { "- add if needed"); MachineInstr *Def = RegInfo->getVRegDef(LDI->second); MachineBasicBlock::iterator InsertPos = Def; - const MDNode *Variable = - MI->getOperand(MI->getNumOperands()-1).getMetadata(); + const MDNode *Variable = MI->getDebugVariable(); + const MDNode *Expr = MI->getDebugExpression(); bool IsIndirect = MI->isIndirectDebugValue(); unsigned Offset = IsIndirect ? MI->getOperand(1).getImm() : 0; // Def is never a terminator here, so it is ok to increment InsertPos. BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(), - TII.get(TargetOpcode::DBG_VALUE), - IsIndirect, - LDI->second, Offset, Variable); + TII->get(TargetOpcode::DBG_VALUE), IsIndirect, LDI->second, Offset, + Variable, Expr); // If this vreg is directly copied into an exported register then // that COPY instructions also need DBG_VALUE, if it is the only @@ -516,11 +522,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { } if (CopyUseMI) { MachineInstr *NewMI = - BuildMI(*MF, CopyUseMI->getDebugLoc(), - TII.get(TargetOpcode::DBG_VALUE), - IsIndirect, - CopyUseMI->getOperand(0).getReg(), - Offset, Variable); + BuildMI(*MF, CopyUseMI->getDebugLoc(), + TII->get(TargetOpcode::DBG_VALUE), IsIndirect, + CopyUseMI->getOperand(0).getReg(), Offset, Variable, Expr); MachineBasicBlock::iterator Pos = CopyUseMI; EntryMBB->insertAfter(Pos, NewMI); } @@ -534,7 +538,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { break; for (const auto &MI : MBB) { - const MCInstrDesc &MCID = TM.getInstrInfo()->get(MI.getOpcode()); + const MCInstrDesc &MCID = TII->get(MI.getOpcode()); if ((MCID.isCall() && !MCID.isReturn()) || MI.isStackAligningInlineAsm()) { MFI->setHasCalls(true); @@ -617,7 +621,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() { SDNode *N = Worklist.pop_back_val(); // If we've already seen this node, ignore it. - if (!VisitedNodes.insert(N)) + if (!VisitedNodes.insert(N).second) continue; // Otherwise, add all chain operands to the worklist. @@ -652,6 +656,12 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { std::string BlockName; int BlockNumber = -1; (void)BlockNumber; + bool MatchFilterBB = false; (void)MatchFilterBB; +#ifndef NDEBUG + MatchFilterBB = (!FilterDAGBasicBlockName.empty() && + FilterDAGBasicBlockName == + FuncInfo->MBB->getBasicBlock()->getName().str()); +#endif #ifdef NDEBUG if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewLegalizeDAGs || ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs || @@ -665,7 +675,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); - if (ViewDAGCombine1) CurDAG->viewGraph("dag-combine1 input for " + BlockName); + if (ViewDAGCombine1 && MatchFilterBB) + CurDAG->viewGraph("dag-combine1 input for " + BlockName); // Run the DAG combiner in pre-legalize mode. { @@ -678,8 +689,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { // Second step, hack on the DAG until it only uses operations and types that // the target supports. - if (ViewLegalizeTypesDAGs) CurDAG->viewGraph("legalize-types input for " + - BlockName); + if (ViewLegalizeTypesDAGs && MatchFilterBB) + CurDAG->viewGraph("legalize-types input for " + BlockName); bool Changed; { @@ -693,7 +704,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->NewNodesMustHaveLegalTypes = true; if (Changed) { - if (ViewDAGCombineLT) + if (ViewDAGCombineLT && MatchFilterBB) CurDAG->viewGraph("dag-combine-lt input for " + BlockName); // Run the DAG combiner in post-type-legalize mode. @@ -719,7 +730,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->LegalizeTypes(); } - if (ViewDAGCombineLT) + if (ViewDAGCombineLT && MatchFilterBB) CurDAG->viewGraph("dag-combine-lv input for " + BlockName); // Run the DAG combiner in post-type-legalize mode. @@ -733,7 +744,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); } - if (ViewLegalizeDAGs) CurDAG->viewGraph("legalize input for " + BlockName); + if (ViewLegalizeDAGs && MatchFilterBB) + CurDAG->viewGraph("legalize input for " + BlockName); { NamedRegionTimer T("DAG Legalization", GroupName, TimePassesIsEnabled); @@ -743,7 +755,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { DEBUG(dbgs() << "Legalized selection DAG: BB#" << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); - if (ViewDAGCombine2) CurDAG->viewGraph("dag-combine2 input for " + BlockName); + if (ViewDAGCombine2 && MatchFilterBB) + CurDAG->viewGraph("dag-combine2 input for " + BlockName); // Run the DAG combiner in post-legalize mode. { @@ -757,7 +770,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { if (OptLevel != CodeGenOpt::None) ComputeLiveOutVRegInfo(); - if (ViewISelDAGs) CurDAG->viewGraph("isel input for " + BlockName); + if (ViewISelDAGs && MatchFilterBB) + CurDAG->viewGraph("isel input for " + BlockName); // Third, instruction select all of the operations to machine code, adding the // code to the MachineBasicBlock. @@ -769,7 +783,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { DEBUG(dbgs() << "Selected selection DAG: BB#" << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); - if (ViewSchedDAGs) CurDAG->viewGraph("scheduler input for " + BlockName); + if (ViewSchedDAGs && MatchFilterBB) + CurDAG->viewGraph("scheduler input for " + BlockName); // Schedule machine code. ScheduleDAGSDNodes *Scheduler = CreateScheduler(); @@ -779,7 +794,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { Scheduler->Run(CurDAG, FuncInfo->MBB); } - if (ViewSUnitDAGs) Scheduler->viewGraph(); + if (ViewSUnitDAGs && MatchFilterBB) Scheduler->viewGraph(); // Emit machine code to BB. This can change 'BB' to the last block being // inserted into. @@ -894,6 +909,8 @@ void SelectionDAGISel::DoInstructionSelection() { void SelectionDAGISel::PrepareEHLandingPad() { MachineBasicBlock *MBB = FuncInfo->MBB; + const TargetRegisterClass *PtrRC = TLI->getRegClassFor(TLI->getPointerTy()); + // Add a label to mark the beginning of the landing pad. Deletion of the // landing pad can thus be detected via the MachineModuleInfo. MCSymbol *Label = MF->getMMI().addLandingPad(MBB); @@ -901,13 +918,70 @@ void SelectionDAGISel::PrepareEHLandingPad() { // Assign the call site to the landing pad's begin label. MF->getMMI().setCallSiteLandingPad(Label, SDB->LPadToCallSiteMap[MBB]); - const MCInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL); + const MCInstrDesc &II = TII->get(TargetOpcode::EH_LABEL); BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II) .addSym(Label); + if (TM.getMCAsmInfo()->getExceptionHandlingType() == + ExceptionHandling::MSVC) { + // Make virtual registers and a series of labels that fill in values for the + // clauses. + auto &RI = MF->getRegInfo(); + FuncInfo->ExceptionSelectorVirtReg = RI.createVirtualRegister(PtrRC); + + // Get all invoke BBs that will unwind into the clause BBs. + SmallVector<MachineBasicBlock *, 4> InvokeBBs(MBB->pred_begin(), + MBB->pred_end()); + + // Emit separate machine basic blocks with separate labels for each clause + // before the main landing pad block. + const BasicBlock *LLVMBB = MBB->getBasicBlock(); + const LandingPadInst *LPadInst = LLVMBB->getLandingPadInst(); + MachineInstrBuilder SelectorPHI = BuildMI( + *MBB, MBB->begin(), SDB->getCurDebugLoc(), TII->get(TargetOpcode::PHI), + FuncInfo->ExceptionSelectorVirtReg); + for (unsigned I = 0, E = LPadInst->getNumClauses(); I != E; ++I) { + MachineBasicBlock *ClauseBB = MF->CreateMachineBasicBlock(LLVMBB); + MF->insert(MBB, ClauseBB); + + // Add the edge from the invoke to the clause. + for (MachineBasicBlock *InvokeBB : InvokeBBs) + InvokeBB->addSuccessor(ClauseBB); + + // Mark the clause as a landing pad or MI passes will delete it. + ClauseBB->setIsLandingPad(); + + GlobalValue *ClauseGV = ExtractTypeInfo(LPadInst->getClause(I)); + + // Start the BB with a label. + MCSymbol *ClauseLabel = MF->getMMI().addClauseForLandingPad(MBB); + BuildMI(*ClauseBB, ClauseBB->begin(), SDB->getCurDebugLoc(), II) + .addSym(ClauseLabel); + + // Construct a simple BB that defines a register with the typeid constant. + FuncInfo->MBB = ClauseBB; + FuncInfo->InsertPt = ClauseBB->end(); + unsigned VReg = SDB->visitLandingPadClauseBB(ClauseGV, MBB); + CurDAG->setRoot(SDB->getRoot()); + SDB->clear(); + CodeGenAndEmitDAG(); + + // Add the typeid virtual register to the phi in the main landing pad. + SelectorPHI.addReg(VReg).addMBB(ClauseBB); + } + + // Remove the edge from the invoke to the lpad. + for (MachineBasicBlock *InvokeBB : InvokeBBs) + InvokeBB->removeSuccessor(MBB); + + // Restore FuncInfo back to its previous state and select the main landing + // pad block. + FuncInfo->MBB = MBB; + FuncInfo->InsertPt = MBB->end(); + return; + } + // Mark exception register as live in. - const TargetLowering *TLI = getTargetLowering(); - const TargetRegisterClass *PtrRC = TLI->getRegClassFor(TLI->getPointerTy()); if (unsigned Reg = TLI->getExceptionPointerRegister()) FuncInfo->ExceptionPointerVirtReg = MBB->addLiveIn(Reg, PtrRC); @@ -1042,7 +1116,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Initialize the Fast-ISel state, if needed. FastISel *FastIS = nullptr; if (TM.Options.EnableFastISel) - FastIS = getTargetLowering()->createFastISel(*FuncInfo, LibInfo); + FastIS = TLI->createFastISel(*FuncInfo, LibInfo); // Iterate over all basic blocks in the function. ReversePostOrderTraversal<const Function*> RPOT(&Fn); @@ -1096,7 +1170,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { ++NumEntryBlocks; // Lower any arguments needed in this block if this is the entry block. - if (!FastIS->LowerArguments()) { + if (!FastIS->lowerArguments()) { // Fast isel failed to lower these arguments ++NumFastIselFailLowerArguments; if (EnableFastISelAbortArgs) @@ -1134,7 +1208,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FastIS->recomputeInsertPt(); // Try to select the instruction with FastISel. - if (FastIS->SelectInstruction(Inst)) { + if (FastIS->selectInstruction(Inst)) { --NumFastIselRemaining; ++NumFastIselSuccess; // If fast isel succeeded, skip over all the folded instructions, and @@ -1729,7 +1803,7 @@ static SDNode *findGlueUse(SDNode *N) { /// This function recursively traverses up the operand chain, ignoring /// certain nodes. static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse, - SDNode *Root, SmallPtrSet<SDNode*, 16> &Visited, + SDNode *Root, SmallPtrSetImpl<SDNode*> &Visited, bool IgnoreChains) { // The NodeID's are given uniques ID's where a node ID is guaranteed to be // greater than all of its (recursive) operands. If we scan to a point where @@ -1744,7 +1818,7 @@ static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse, // Don't revisit nodes if we already scanned it and didn't fail, we know we // won't fail if we scan it again. - if (!Visited.insert(Use)) + if (!Visited.insert(Use).second) return false; for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) { @@ -1861,8 +1935,8 @@ SDNode SDLoc dl(Op); MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(0)); const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0)); - unsigned Reg = getTargetLowering()->getRegisterByName( - RegStr->getString().data(), Op->getValueType(0)); + unsigned Reg = + TLI->getRegisterByName(RegStr->getString().data(), Op->getValueType(0)); SDValue New = CurDAG->getCopyFromReg( CurDAG->getEntryNode(), dl, Reg, Op->getValueType(0)); New->setNodeId(-1); @@ -1874,8 +1948,8 @@ SDNode SDLoc dl(Op); MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1)); const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0)); - unsigned Reg = getTargetLowering()->getRegisterByName( - RegStr->getString().data(), Op->getOperand(2).getValueType()); + unsigned Reg = TLI->getRegisterByName(RegStr->getString().data(), + Op->getOperand(2).getValueType()); SDValue New = CurDAG->getCopyToReg( CurDAG->getEntryNode(), dl, Reg, Op->getOperand(2)); New->setNodeId(-1); @@ -2375,7 +2449,7 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table, Result = !::CheckOpcode(Table, Index, N.getNode()); return Index; case SelectionDAGISel::OPC_CheckType: - Result = !::CheckType(Table, Index, N, SDISel.getTargetLowering()); + Result = !::CheckType(Table, Index, N, SDISel.TLI); return Index; case SelectionDAGISel::OPC_CheckChild0Type: case SelectionDAGISel::OPC_CheckChild1Type: @@ -2385,14 +2459,15 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table, case SelectionDAGISel::OPC_CheckChild5Type: case SelectionDAGISel::OPC_CheckChild6Type: case SelectionDAGISel::OPC_CheckChild7Type: - Result = !::CheckChildType(Table, Index, N, SDISel.getTargetLowering(), - Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Type); + Result = !::CheckChildType(Table, Index, N, SDISel.TLI, + Table[Index - 1] - + SelectionDAGISel::OPC_CheckChild0Type); return Index; case SelectionDAGISel::OPC_CheckCondCode: Result = !::CheckCondCode(Table, Index, N); return Index; case SelectionDAGISel::OPC_CheckValueType: - Result = !::CheckValueType(Table, Index, N, SDISel.getTargetLowering()); + Result = !::CheckValueType(Table, Index, N, SDISel.TLI); return Index; case SelectionDAGISel::OPC_CheckInteger: Result = !::CheckInteger(Table, Index, N); @@ -2436,6 +2511,42 @@ struct MatchScope { bool HasChainNodesMatched, HasGlueResultNodesMatched; }; +/// \\brief A DAG update listener to keep the matching state +/// (i.e. RecordedNodes and MatchScope) uptodate if the target is allowed to +/// change the DAG while matching. X86 addressing mode matcher is an example +/// for this. +class MatchStateUpdater : public SelectionDAG::DAGUpdateListener +{ + SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes; + SmallVectorImpl<MatchScope> &MatchScopes; +public: + MatchStateUpdater(SelectionDAG &DAG, + SmallVectorImpl<std::pair<SDValue, SDNode*> > &RN, + SmallVectorImpl<MatchScope> &MS) : + SelectionDAG::DAGUpdateListener(DAG), + RecordedNodes(RN), MatchScopes(MS) { } + + void NodeDeleted(SDNode *N, SDNode *E) { + // Some early-returns here to avoid the search if we deleted the node or + // if the update comes from MorphNodeTo (MorphNodeTo is the last thing we + // do, so it's unnecessary to update matching state at that point). + // Neither of these can occur currently because we only install this + // update listener during matching a complex patterns. + if (!E || E->isMachineOpcode()) + return; + // Performing linear search here does not matter because we almost never + // run this code. You'd have to have a CSE during complex pattern + // matching. + for (auto &I : RecordedNodes) + if (I.first.getNode() == N) + I.first.setNode(E); + + for (auto &I : MatchScopes) + for (auto &J : I.NodeStack) + if (J.getNode() == N) + J.setNode(E); + } +}; } SDNode *SelectionDAGISel:: @@ -2449,8 +2560,6 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case ISD::BasicBlock: case ISD::Register: case ISD::RegisterMask: - //case ISD::VALUETYPE: - //case ISD::CONDCODE: case ISD::HANDLENODE: case ISD::MDNODE_SDNODE: case ISD::TargetConstant: @@ -2692,6 +2801,14 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, unsigned CPNum = MatcherTable[MatcherIndex++]; unsigned RecNo = MatcherTable[MatcherIndex++]; assert(RecNo < RecordedNodes.size() && "Invalid CheckComplexPat"); + + // If target can modify DAG during matching, keep the matching state + // consistent. + std::unique_ptr<MatchStateUpdater> MSU; + if (ComplexPatternFuncMutatesDAG()) + MSU.reset(new MatchStateUpdater(*CurDAG, RecordedNodes, + MatchScopes)); + if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo].second, RecordedNodes[RecNo].first, CPNum, RecordedNodes)) @@ -2703,7 +2820,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, continue; case OPC_CheckType: - if (!::CheckType(MatcherTable, MatcherIndex, N, getTargetLowering())) + if (!::CheckType(MatcherTable, MatcherIndex, N, TLI)) break; continue; @@ -2751,7 +2868,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, MVT CaseVT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; if (CaseVT == MVT::iPTR) - CaseVT = getTargetLowering()->getPointerTy(); + CaseVT = TLI->getPointerTy(); // If the VT matches, then we will execute this case. if (CurNodeVT == CaseVT) @@ -2773,7 +2890,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case OPC_CheckChild2Type: case OPC_CheckChild3Type: case OPC_CheckChild4Type: case OPC_CheckChild5Type: case OPC_CheckChild6Type: case OPC_CheckChild7Type: - if (!::CheckChildType(MatcherTable, MatcherIndex, N, getTargetLowering(), + if (!::CheckChildType(MatcherTable, MatcherIndex, N, TLI, Opcode-OPC_CheckChild0Type)) break; continue; @@ -2781,7 +2898,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (!::CheckCondCode(MatcherTable, MatcherIndex, N)) break; continue; case OPC_CheckValueType: - if (!::CheckValueType(MatcherTable, MatcherIndex, N, getTargetLowering())) + if (!::CheckValueType(MatcherTable, MatcherIndex, N, TLI)) break; continue; case OPC_CheckInteger: @@ -2980,7 +3097,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, for (unsigned i = 0; i != NumVTs; ++i) { MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; - if (VT == MVT::iPTR) VT = getTargetLowering()->getPointerTy().SimpleTy; + if (VT == MVT::iPTR) + VT = TLI->getPointerTy().SimpleTy; VTs.push_back(VT); } @@ -3076,7 +3194,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (EmitNodeInfo & OPFL_MemRefs) { // Only attach load or store memory operands if the generated // instruction may load or store. - const MCInstrDesc &MCID = TM.getInstrInfo()->get(TargetOpc); + const MCInstrDesc &MCID = TII->get(TargetOpc); bool mayLoad = MCID.mayLoad(); bool mayStore = MCID.mayStore(); diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp new file mode 100644 index 000000000000..33c20d3f2195 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -0,0 +1,684 @@ +//===-- StatepointLowering.cpp - SDAGBuilder's statepoint code -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file includes support code use by SelectionDAGBuilder when lowering a +// statepoint sequence in SelectionDAG IR. +// +//===----------------------------------------------------------------------===// + +#include "StatepointLowering.h" +#include "SelectionDAGBuilder.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/StackMaps.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Statepoint.h" +#include "llvm/Target/TargetLowering.h" +#include <algorithm> +using namespace llvm; + +#define DEBUG_TYPE "statepoint-lowering" + +STATISTIC(NumSlotsAllocatedForStatepoints, + "Number of stack slots allocated for statepoints"); +STATISTIC(NumOfStatepoints, "Number of statepoint nodes encountered"); +STATISTIC(StatepointMaxSlotsRequired, + "Maximum number of stack slots required for a singe statepoint"); + +void +StatepointLoweringState::startNewStatepoint(SelectionDAGBuilder &Builder) { + // Consistency check + assert(PendingGCRelocateCalls.empty() && + "Trying to visit statepoint before finished processing previous one"); + Locations.clear(); + RelocLocations.clear(); + NextSlotToAllocate = 0; + // Need to resize this on each safepoint - we need the two to stay in + // sync and the clear patterns of a SelectionDAGBuilder have no relation + // to FunctionLoweringInfo. + AllocatedStackSlots.resize(Builder.FuncInfo.StatepointStackSlots.size()); + for (size_t i = 0; i < AllocatedStackSlots.size(); i++) { + AllocatedStackSlots[i] = false; + } +} +void StatepointLoweringState::clear() { + Locations.clear(); + RelocLocations.clear(); + AllocatedStackSlots.clear(); + assert(PendingGCRelocateCalls.empty() && + "cleared before statepoint sequence completed"); +} + +SDValue +StatepointLoweringState::allocateStackSlot(EVT ValueType, + SelectionDAGBuilder &Builder) { + + NumSlotsAllocatedForStatepoints++; + + // The basic scheme here is to first look for a previously created stack slot + // which is not in use (accounting for the fact arbitrary slots may already + // be reserved), or to create a new stack slot and use it. + + // If this doesn't succeed in 40000 iterations, something is seriously wrong + for (int i = 0; i < 40000; i++) { + assert(Builder.FuncInfo.StatepointStackSlots.size() == + AllocatedStackSlots.size() && + "broken invariant"); + const size_t NumSlots = AllocatedStackSlots.size(); + assert(NextSlotToAllocate <= NumSlots && "broken invariant"); + + if (NextSlotToAllocate >= NumSlots) { + assert(NextSlotToAllocate == NumSlots); + // record stats + if (NumSlots + 1 > StatepointMaxSlotsRequired) { + StatepointMaxSlotsRequired = NumSlots + 1; + } + + SDValue SpillSlot = Builder.DAG.CreateStackTemporary(ValueType); + const unsigned FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); + Builder.FuncInfo.StatepointStackSlots.push_back(FI); + AllocatedStackSlots.push_back(true); + return SpillSlot; + } + if (!AllocatedStackSlots[NextSlotToAllocate]) { + const int FI = Builder.FuncInfo.StatepointStackSlots[NextSlotToAllocate]; + AllocatedStackSlots[NextSlotToAllocate] = true; + return Builder.DAG.getFrameIndex(FI, ValueType); + } + // Note: We deliberately choose to advance this only on the failing path. + // Doing so on the suceeding path involes a bit of complexity that caused a + // minor bug previously. Unless performance shows this matters, please + // keep this code as simple as possible. + NextSlotToAllocate++; + } + llvm_unreachable("infinite loop?"); +} + +/// Try to find existing copies of the incoming values in stack slots used for +/// statepoint spilling. If we can find a spill slot for the incoming value, +/// mark that slot as allocated, and reuse the same slot for this safepoint. +/// This helps to avoid series of loads and stores that only serve to resuffle +/// values on the stack between calls. +static void reservePreviousStackSlotForValue(SDValue Incoming, + SelectionDAGBuilder &Builder) { + + if (isa<ConstantSDNode>(Incoming) || isa<FrameIndexSDNode>(Incoming)) { + // We won't need to spill this, so no need to check for previously + // allocated stack slots + return; + } + + SDValue Loc = Builder.StatepointLowering.getLocation(Incoming); + if (Loc.getNode()) { + // duplicates in input + return; + } + + // Search back for the load from a stack slot pattern to find the original + // slot we allocated for this value. We could extend this to deal with + // simple modification patterns, but simple dealing with trivial load/store + // sequences helps a lot already. + if (LoadSDNode *Load = dyn_cast<LoadSDNode>(Incoming)) { + if (auto *FI = dyn_cast<FrameIndexSDNode>(Load->getBasePtr())) { + const int Index = FI->getIndex(); + auto Itr = std::find(Builder.FuncInfo.StatepointStackSlots.begin(), + Builder.FuncInfo.StatepointStackSlots.end(), Index); + if (Itr == Builder.FuncInfo.StatepointStackSlots.end()) { + // not one of the lowering stack slots, can't reuse! + // TODO: Actually, we probably could reuse the stack slot if the value + // hasn't changed at all, but we'd need to look for intervening writes + return; + } else { + // This is one of our dedicated lowering slots + const int Offset = + std::distance(Builder.FuncInfo.StatepointStackSlots.begin(), Itr); + if (Builder.StatepointLowering.isStackSlotAllocated(Offset)) { + // stack slot already assigned to someone else, can't use it! + // TODO: currently we reserve space for gc arguments after doing + // normal allocation for deopt arguments. We should reserve for + // _all_ deopt and gc arguments, then start allocating. This + // will prevent some moves being inserted when vm state changes, + // but gc state doesn't between two calls. + return; + } + // Reserve this stack slot + Builder.StatepointLowering.reserveStackSlot(Offset); + } + + // Cache this slot so we find it when going through the normal + // assignment loop. + SDValue Loc = + Builder.DAG.getTargetFrameIndex(Index, Incoming.getValueType()); + + Builder.StatepointLowering.setLocation(Incoming, Loc); + } + } + + // TODO: handle case where a reloaded value flows through a phi to + // another safepoint. e.g. + // bb1: + // a' = relocated... + // bb2: % pred: bb1, bb3, bb4, etc. + // a_phi = phi(a', ...) + // statepoint ... a_phi + // NOTE: This will require reasoning about cross basic block values. This is + // decidedly non trivial and this might not be the right place to do it. We + // don't really have the information we need here... + + // TODO: handle simple updates. If a value is modified and the original + // value is no longer live, it would be nice to put the modified value in the + // same slot. This allows folding of the memory accesses for some + // instructions types (like an increment). + // statepoint (i) + // i1 = i+1 + // statepoint (i1) +} + +/// Remove any duplicate (as SDValues) from the derived pointer pairs. This +/// is not required for correctness. It's purpose is to reduce the size of +/// StackMap section. It has no effect on the number of spill slots required +/// or the actual lowering. +static void removeDuplicatesGCPtrs(SmallVectorImpl<const Value *> &Bases, + SmallVectorImpl<const Value *> &Ptrs, + SmallVectorImpl<const Value *> &Relocs, + SelectionDAGBuilder &Builder) { + + // This is horribly ineffecient, but I don't care right now + SmallSet<SDValue, 64> Seen; + + SmallVector<const Value *, 64> NewBases, NewPtrs, NewRelocs; + for (size_t i = 0; i < Ptrs.size(); i++) { + SDValue SD = Builder.getValue(Ptrs[i]); + // Only add non-duplicates + if (Seen.count(SD) == 0) { + NewBases.push_back(Bases[i]); + NewPtrs.push_back(Ptrs[i]); + NewRelocs.push_back(Relocs[i]); + } + Seen.insert(SD); + } + assert(Bases.size() >= NewBases.size()); + assert(Ptrs.size() >= NewPtrs.size()); + assert(Relocs.size() >= NewRelocs.size()); + Bases = NewBases; + Ptrs = NewPtrs; + Relocs = NewRelocs; + assert(Ptrs.size() == Bases.size()); + assert(Ptrs.size() == Relocs.size()); +} + +/// Extract call from statepoint, lower it and return pointer to the +/// call node. Also update NodeMap so that getValue(statepoint) will +/// reference lowered call result +static SDNode *lowerCallFromStatepoint(const CallInst &CI, + SelectionDAGBuilder &Builder) { + + assert(Intrinsic::experimental_gc_statepoint == + dyn_cast<IntrinsicInst>(&CI)->getIntrinsicID() && + "function called must be the statepoint function"); + + ImmutableStatepoint StatepointOperands(&CI); + + // Lower the actual call itself - This is a bit of a hack, but we want to + // avoid modifying the actual lowering code. This is similiar in intent to + // the LowerCallOperands mechanism used by PATCHPOINT, but is structured + // differently. Hopefully, this is slightly more robust w.r.t. calling + // convention, return values, and other function attributes. + Value *ActualCallee = const_cast<Value *>(StatepointOperands.actualCallee()); + + std::vector<Value *> Args; + CallInst::const_op_iterator arg_begin = StatepointOperands.call_args_begin(); + CallInst::const_op_iterator arg_end = StatepointOperands.call_args_end(); + Args.insert(Args.end(), arg_begin, arg_end); + // TODO: remove the creation of a new instruction! We should not be + // modifying the IR (even temporarily) at this point. + CallInst *Tmp = CallInst::Create(ActualCallee, Args); + Tmp->setTailCall(CI.isTailCall()); + Tmp->setCallingConv(CI.getCallingConv()); + Tmp->setAttributes(CI.getAttributes()); + Builder.LowerCallTo(Tmp, Builder.getValue(ActualCallee), false); + + // Handle the return value of the call iff any. + const bool HasDef = !Tmp->getType()->isVoidTy(); + if (HasDef) { + // The value of the statepoint itself will be the value of call itself. + // We'll replace the actually call node shortly. gc_result will grab + // this value. + Builder.setValue(&CI, Builder.getValue(Tmp)); + } else { + // The token value is never used from here on, just generate a poison value + Builder.setValue(&CI, Builder.DAG.getIntPtrConstant(-1)); + } + // Remove the fake entry we created so we don't have a hanging reference + // after we delete this node. + Builder.removeValue(Tmp); + delete Tmp; + Tmp = nullptr; + + // Search for the call node + // The following code is essentially reverse engineering X86's + // LowerCallTo. + SDNode *CallNode = nullptr; + + // We just emitted a call, so it should be last thing generated + SDValue Chain = Builder.DAG.getRoot(); + + // Find closest CALLSEQ_END walking back through lowered nodes if needed + SDNode *CallEnd = Chain.getNode(); + int Sanity = 0; + while (CallEnd->getOpcode() != ISD::CALLSEQ_END) { + CallEnd = CallEnd->getGluedNode(); + assert(CallEnd && "Can not find call node"); + assert(Sanity < 20 && "should have found call end already"); + Sanity++; + } + assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && + "Expected a callseq node."); + assert(CallEnd->getGluedNode()); + + // Step back inside the CALLSEQ + CallNode = CallEnd->getGluedNode(); + return CallNode; +} + +/// Callect all gc pointers coming into statepoint intrinsic, clean them up, +/// and return two arrays: +/// Bases - base pointers incoming to this statepoint +/// Ptrs - derived pointers incoming to this statepoint +/// Relocs - the gc_relocate corresponding to each base/ptr pair +/// Elements of this arrays should be in one-to-one correspondence with each +/// other i.e Bases[i], Ptrs[i] are from the same gcrelocate call +static void +getIncomingStatepointGCValues(SmallVectorImpl<const Value *> &Bases, + SmallVectorImpl<const Value *> &Ptrs, + SmallVectorImpl<const Value *> &Relocs, + ImmutableCallSite Statepoint, + SelectionDAGBuilder &Builder) { + // Search for relocated pointers. Note that working backwards from the + // gc_relocates ensures that we only get pairs which are actually relocated + // and used after the statepoint. + // TODO: This logic should probably become a utility function in Statepoint.h + for (const User *U : cast<CallInst>(Statepoint.getInstruction())->users()) { + if (!isGCRelocate(U)) { + continue; + } + GCRelocateOperands relocateOpers(U); + Relocs.push_back(cast<Value>(U)); + Bases.push_back(relocateOpers.basePtr()); + Ptrs.push_back(relocateOpers.derivedPtr()); + } + + // Remove any redundant llvm::Values which map to the same SDValue as another + // input. Also has the effect of removing duplicates in the original + // llvm::Value input list as well. This is a useful optimization for + // reducing the size of the StackMap section. It has no other impact. + removeDuplicatesGCPtrs(Bases, Ptrs, Relocs, Builder); + + assert(Bases.size() == Ptrs.size() && Ptrs.size() == Relocs.size()); +} + +/// Spill a value incoming to the statepoint. It might be either part of +/// vmstate +/// or gcstate. In both cases unconditionally spill it on the stack unless it +/// is a null constant. Return pair with first element being frame index +/// containing saved value and second element with outgoing chain from the +/// emitted store +static std::pair<SDValue, SDValue> +spillIncomingStatepointValue(SDValue Incoming, SDValue Chain, + SelectionDAGBuilder &Builder) { + SDValue Loc = Builder.StatepointLowering.getLocation(Incoming); + + // Emit new store if we didn't do it for this ptr before + if (!Loc.getNode()) { + Loc = Builder.StatepointLowering.allocateStackSlot(Incoming.getValueType(), + Builder); + assert(isa<FrameIndexSDNode>(Loc)); + int Index = cast<FrameIndexSDNode>(Loc)->getIndex(); + // We use TargetFrameIndex so that isel will not select it into LEA + Loc = Builder.DAG.getTargetFrameIndex(Index, Incoming.getValueType()); + + // TODO: We can create TokenFactor node instead of + // chaining stores one after another, this may allow + // a bit more optimal scheduling for them + Chain = Builder.DAG.getStore(Chain, Builder.getCurSDLoc(), Incoming, Loc, + MachinePointerInfo::getFixedStack(Index), + false, false, 0); + + Builder.StatepointLowering.setLocation(Incoming, Loc); + } + + assert(Loc.getNode()); + return std::make_pair(Loc, Chain); +} + +/// Lower a single value incoming to a statepoint node. This value can be +/// either a deopt value or a gc value, the handling is the same. We special +/// case constants and allocas, then fall back to spilling if required. +static void lowerIncomingStatepointValue(SDValue Incoming, + SmallVectorImpl<SDValue> &Ops, + SelectionDAGBuilder &Builder) { + SDValue Chain = Builder.getRoot(); + + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Incoming)) { + // If the original value was a constant, make sure it gets recorded as + // such in the stackmap. This is required so that the consumer can + // parse any internal format to the deopt state. It also handles null + // pointers and other constant pointers in GC states + Ops.push_back( + Builder.DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64)); + Ops.push_back(Builder.DAG.getTargetConstant(C->getSExtValue(), MVT::i64)); + } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) { + // This handles allocas as arguments to the statepoint + const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo(); + Ops.push_back( + Builder.DAG.getTargetFrameIndex(FI->getIndex(), TLI.getPointerTy())); + } else { + // Otherwise, locate a spill slot and explicitly spill it so it + // can be found by the runtime later. We currently do not support + // tracking values through callee saved registers to their eventual + // spill location. This would be a useful optimization, but would + // need to be optional since it requires a lot of complexity on the + // runtime side which not all would support. + std::pair<SDValue, SDValue> Res = + spillIncomingStatepointValue(Incoming, Chain, Builder); + Ops.push_back(Res.first); + Chain = Res.second; + } + + Builder.DAG.setRoot(Chain); +} + +/// Lower deopt state and gc pointer arguments of the statepoint. The actual +/// lowering is described in lowerIncomingStatepointValue. This function is +/// responsible for lowering everything in the right position and playing some +/// tricks to avoid redundant stack manipulation where possible. On +/// completion, 'Ops' will contain ready to use operands for machine code +/// statepoint. The chain nodes will have already been created and the DAG root +/// will be set to the last value spilled (if any were). +static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, + ImmutableStatepoint Statepoint, + SelectionDAGBuilder &Builder) { + + // Lower the deopt and gc arguments for this statepoint. Layout will + // be: deopt argument length, deopt arguments.., gc arguments... + + SmallVector<const Value *, 64> Bases, Ptrs, Relocations; + getIncomingStatepointGCValues(Bases, Ptrs, Relocations, + Statepoint.getCallSite(), Builder); + +#ifndef NDEBUG + // Check that each of the gc pointer and bases we've gotten out of the + // safepoint is something the strategy thinks might be a pointer into the GC + // heap. This is basically just here to help catch errors during statepoint + // insertion. TODO: This should actually be in the Verifier, but we can't get + // to the GCStrategy from there (yet). + if (Builder.GFI) { + GCStrategy &S = Builder.GFI->getStrategy(); + for (const Value *V : Bases) { + auto Opt = S.isGCManagedPointer(V); + if (Opt.hasValue()) { + assert(Opt.getValue() && + "non gc managed base pointer found in statepoint"); + } + } + for (const Value *V : Ptrs) { + auto Opt = S.isGCManagedPointer(V); + if (Opt.hasValue()) { + assert(Opt.getValue() && + "non gc managed derived pointer found in statepoint"); + } + } + for (const Value *V : Relocations) { + auto Opt = S.isGCManagedPointer(V); + if (Opt.hasValue()) { + assert(Opt.getValue() && "non gc managed pointer relocated"); + } + } + } +#endif + + + + // Before we actually start lowering (and allocating spill slots for values), + // reserve any stack slots which we judge to be profitable to reuse for a + // particular value. This is purely an optimization over the code below and + // doesn't change semantics at all. It is important for performance that we + // reserve slots for both deopt and gc values before lowering either. + for (auto I = Statepoint.vm_state_begin() + 1, E = Statepoint.vm_state_end(); + I != E; ++I) { + Value *V = *I; + SDValue Incoming = Builder.getValue(V); + reservePreviousStackSlotForValue(Incoming, Builder); + } + for (unsigned i = 0; i < Bases.size() * 2; ++i) { + // Even elements will contain base, odd elements - derived ptr + const Value *V = i % 2 ? Bases[i / 2] : Ptrs[i / 2]; + SDValue Incoming = Builder.getValue(V); + reservePreviousStackSlotForValue(Incoming, Builder); + } + + // First, prefix the list with the number of unique values to be + // lowered. Note that this is the number of *Values* not the + // number of SDValues required to lower them. + const int NumVMSArgs = Statepoint.numTotalVMSArgs(); + Ops.push_back( + Builder.DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64)); + Ops.push_back(Builder.DAG.getTargetConstant(NumVMSArgs, MVT::i64)); + + assert(NumVMSArgs + 1 == std::distance(Statepoint.vm_state_begin(), + Statepoint.vm_state_end())); + + // The vm state arguments are lowered in an opaque manner. We do + // not know what type of values are contained within. We skip the + // first one since that happens to be the total number we lowered + // explicitly just above. We could have left it in the loop and + // not done it explicitly, but it's far easier to understand this + // way. + for (auto I = Statepoint.vm_state_begin() + 1, E = Statepoint.vm_state_end(); + I != E; ++I) { + const Value *V = *I; + SDValue Incoming = Builder.getValue(V); + lowerIncomingStatepointValue(Incoming, Ops, Builder); + } + + // Finally, go ahead and lower all the gc arguments. There's no prefixed + // length for this one. After lowering, we'll have the base and pointer + // arrays interwoven with each (lowered) base pointer immediately followed by + // it's (lowered) derived pointer. i.e + // (base[0], ptr[0], base[1], ptr[1], ...) + for (unsigned i = 0; i < Bases.size() * 2; ++i) { + // Even elements will contain base, odd elements - derived ptr + const Value *V = i % 2 ? Bases[i / 2] : Ptrs[i / 2]; + SDValue Incoming = Builder.getValue(V); + lowerIncomingStatepointValue(Incoming, Ops, Builder); + } +} +void SelectionDAGBuilder::visitStatepoint(const CallInst &CI) { + // The basic scheme here is that information about both the original call and + // the safepoint is encoded in the CallInst. We create a temporary call and + // lower it, then reverse engineer the calling sequence. + + // Check some preconditions for sanity + assert(isStatepoint(&CI) && + "function called must be the statepoint function"); + NumOfStatepoints++; + // Clear state + StatepointLowering.startNewStatepoint(*this); + +#ifndef NDEBUG + // Consistency check + for (const User *U : CI.users()) { + const CallInst *Call = cast<CallInst>(U); + if (isGCRelocate(Call)) + StatepointLowering.scheduleRelocCall(*Call); + } +#endif + + ImmutableStatepoint ISP(&CI); +#ifndef NDEBUG + // If this is a malformed statepoint, report it early to simplify debugging. + // This should catch any IR level mistake that's made when constructing or + // transforming statepoints. + ISP.verify(); + + // Check that the associated GCStrategy expects to encounter statepoints. + // TODO: This if should become an assert. For now, we allow the GCStrategy + // to be optional for backwards compatibility. This will only last a short + // period (i.e. a couple of weeks). + if (GFI) { + assert(GFI->getStrategy().useStatepoints() && + "GCStrategy does not expect to encounter statepoints"); + } +#endif + + + // Lower statepoint vmstate and gcstate arguments + SmallVector<SDValue, 10> LoweredArgs; + lowerStatepointMetaArgs(LoweredArgs, ISP, *this); + + // Get call node, we will replace it later with statepoint + SDNode *CallNode = lowerCallFromStatepoint(CI, *this); + + // Construct the actual STATEPOINT node with all the appropriate arguments + // and return values. + + // TODO: Currently, all of these operands are being marked as read/write in + // PrologEpilougeInserter.cpp, we should special case the VMState arguments + // and flags to be read-only. + SmallVector<SDValue, 40> Ops; + + // Calculate and push starting position of vmstate arguments + // Call Node: Chain, Target, {Args}, RegMask, [Glue] + SDValue Glue; + if (CallNode->getGluedNode()) { + // Glue is always last operand + Glue = CallNode->getOperand(CallNode->getNumOperands() - 1); + } + // Get number of arguments incoming directly into call node + unsigned NumCallRegArgs = + CallNode->getNumOperands() - (Glue.getNode() ? 4 : 3); + Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, MVT::i32)); + + // Add call target + SDValue CallTarget = SDValue(CallNode->getOperand(1).getNode(), 0); + Ops.push_back(CallTarget); + + // Add call arguments + // Get position of register mask in the call + SDNode::op_iterator RegMaskIt; + if (Glue.getNode()) + RegMaskIt = CallNode->op_end() - 2; + else + RegMaskIt = CallNode->op_end() - 1; + Ops.insert(Ops.end(), CallNode->op_begin() + 2, RegMaskIt); + + // Add a leading constant argument with the Flags and the calling convention + // masked together + CallingConv::ID CallConv = CI.getCallingConv(); + int Flags = dyn_cast<ConstantInt>(CI.getArgOperand(2))->getZExtValue(); + assert(Flags == 0 && "not expected to be used"); + Ops.push_back(DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64)); + Ops.push_back( + DAG.getTargetConstant(Flags | ((unsigned)CallConv << 1), MVT::i64)); + + // Insert all vmstate and gcstate arguments + Ops.insert(Ops.end(), LoweredArgs.begin(), LoweredArgs.end()); + + // Add register mask from call node + Ops.push_back(*RegMaskIt); + + // Add chain + Ops.push_back(CallNode->getOperand(0)); + + // Same for the glue, but we add it only if original call had it + if (Glue.getNode()) + Ops.push_back(Glue); + + // Compute return values + SmallVector<EVT, 21> ValueVTs; + ValueVTs.push_back(MVT::Other); + ValueVTs.push_back(MVT::Glue); // provide a glue output since we consume one + // as input. This allows someone else to chain + // off us as needed. + SDVTList NodeTys = DAG.getVTList(ValueVTs); + + SDNode *StatepointMCNode = DAG.getMachineNode(TargetOpcode::STATEPOINT, + getCurSDLoc(), NodeTys, Ops); + + // Replace original call + DAG.ReplaceAllUsesWith(CallNode, StatepointMCNode); // This may update Root + // Remove originall call node + DAG.DeleteNode(CallNode); + + // DON'T set the root - under the assumption that it's already set past the + // inserted node we created. + + // TODO: A better future implementation would be to emit a single variable + // argument, variable return value STATEPOINT node here and then hookup the + // return value of each gc.relocate to the respective output of the + // previously emitted STATEPOINT value. Unfortunately, this doesn't appear + // to actually be possible today. +} + +void SelectionDAGBuilder::visitGCResult(const CallInst &CI) { + // The result value of the gc_result is simply the result of the actual + // call. We've already emitted this, so just grab the value. + Instruction *I = cast<Instruction>(CI.getArgOperand(0)); + assert(isStatepoint(I) && + "first argument must be a statepoint token"); + + setValue(&CI, getValue(I)); +} + +void SelectionDAGBuilder::visitGCRelocate(const CallInst &CI) { +#ifndef NDEBUG + // Consistency check + StatepointLowering.relocCallVisited(CI); +#endif + + GCRelocateOperands relocateOpers(&CI); + SDValue SD = getValue(relocateOpers.derivedPtr()); + + if (isa<ConstantSDNode>(SD) || isa<FrameIndexSDNode>(SD)) { + // We didn't need to spill these special cases (constants and allocas). + // See the handling in spillIncomingValueForStatepoint for detail. + setValue(&CI, SD); + return; + } + + SDValue Loc = StatepointLowering.getRelocLocation(SD); + // Emit new load if we did not emit it before + if (!Loc.getNode()) { + SDValue SpillSlot = StatepointLowering.getLocation(SD); + int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); + + // Be conservative: flush all pending loads + // TODO: Probably we can be less restrictive on this, + // it may allow more scheduling opprtunities + SDValue Chain = getRoot(); + + Loc = DAG.getLoad(SpillSlot.getValueType(), getCurSDLoc(), Chain, + SpillSlot, MachinePointerInfo::getFixedStack(FI), false, + false, false, 0); + + StatepointLowering.setRelocLocation(SD, Loc); + + // Again, be conservative, don't emit pending loads + DAG.setRoot(Loc.getValue(1)); + } + + assert(Loc.getNode()); + setValue(&CI, Loc); +} diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.h b/lib/CodeGen/SelectionDAG/StatepointLowering.h new file mode 100644 index 000000000000..673112cf8bb5 --- /dev/null +++ b/lib/CodeGen/SelectionDAG/StatepointLowering.h @@ -0,0 +1,138 @@ +//===-- StatepointLowering.h - SDAGBuilder's statepoint code -*- C++ -*---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file includes support code use by SelectionDAGBuilder when lowering a +// statepoint sequence in SelectionDAG IR. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H +#define LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include <vector> + +namespace llvm { +class SelectionDAGBuilder; + +/// This class tracks both per-statepoint and per-selectiondag information. +/// For each statepoint it tracks locations of it's gc valuess (incoming and +/// relocated) and list of gcreloc calls scheduled for visiting (this is +/// used for a debug mode consistency check only). The spill slot tracking +/// works in concert with information in FunctionLoweringInfo. +class StatepointLoweringState { +public: + StatepointLoweringState() : NextSlotToAllocate(0) { + } + + /// Reset all state tracking for a newly encountered safepoint. Also + /// performs some consistency checking. + void startNewStatepoint(SelectionDAGBuilder &Builder); + + /// Clear the memory usage of this object. This is called from + /// SelectionDAGBuilder::clear. We require this is never called in the + /// midst of processing a statepoint sequence. + void clear(); + + /// Returns the spill location of a value incoming to the current + /// statepoint. Will return SDValue() if this value hasn't been + /// spilled. Otherwise, the value has already been spilled and no + /// further action is required by the caller. + SDValue getLocation(SDValue val) { + if (!Locations.count(val)) + return SDValue(); + return Locations[val]; + } + void setLocation(SDValue val, SDValue Location) { + assert(!Locations.count(val) && + "Trying to allocate already allocated location"); + Locations[val] = Location; + } + + /// Returns the relocated value for a given input pointer. Will + /// return SDValue() if this value hasn't yet been reloaded from + /// it's stack slot after the statepoint. Otherwise, the value + /// has already been reloaded and the SDValue of that reload will + /// be returned. Note that VMState values are spilled but not + /// reloaded (since they don't change at the safepoint unless + /// also listed in the GC pointer section) and will thus never + /// be in this map + SDValue getRelocLocation(SDValue val) { + if (!RelocLocations.count(val)) + return SDValue(); + return RelocLocations[val]; + } + void setRelocLocation(SDValue val, SDValue Location) { + assert(!RelocLocations.count(val) && + "Trying to allocate already allocated location"); + RelocLocations[val] = Location; + } + + /// Record the fact that we expect to encounter a given gc_relocate + /// before the next statepoint. If we don't see it, we'll report + /// an assertion. + void scheduleRelocCall(const CallInst &RelocCall) { + PendingGCRelocateCalls.push_back(&RelocCall); + } + /// Remove this gc_relocate from the list we're expecting to see + /// before the next statepoint. If we weren't expecting to see + /// it, we'll report an assertion. + void relocCallVisited(const CallInst &RelocCall) { + SmallVectorImpl<const CallInst *>::iterator itr = + std::find(PendingGCRelocateCalls.begin(), PendingGCRelocateCalls.end(), + &RelocCall); + assert(itr != PendingGCRelocateCalls.end() && + "Visited unexpected gcrelocate call"); + PendingGCRelocateCalls.erase(itr); + } + + // TODO: Should add consistency tracking to ensure we encounter + // expected gc_result calls too. + + /// Get a stack slot we can use to store an value of type ValueType. This + /// will hopefully be a recylced slot from another statepoint. + SDValue allocateStackSlot(EVT ValueType, SelectionDAGBuilder &Builder); + + void reserveStackSlot(int Offset) { + assert(Offset >= 0 && Offset < (int)AllocatedStackSlots.size() && + "out of bounds"); + assert(!AllocatedStackSlots[Offset] && "already reserved!"); + assert(NextSlotToAllocate <= (unsigned)Offset && "consistency!"); + AllocatedStackSlots[Offset] = true; + } + bool isStackSlotAllocated(int Offset) { + assert(Offset >= 0 && Offset < (int)AllocatedStackSlots.size() && + "out of bounds"); + return AllocatedStackSlots[Offset]; + } + +private: + /// Maps pre-relocation value (gc pointer directly incoming into statepoint) + /// into it's location (currently only stack slots) + DenseMap<SDValue, SDValue> Locations; + /// Map pre-relocated value into it's new relocated location + DenseMap<SDValue, SDValue> RelocLocations; + + /// A boolean indicator for each slot listed in the FunctionInfo as to + /// whether it has been used in the current statepoint. Since we try to + /// preserve stack slots across safepoints, there can be gaps in which + /// slots have been allocated. + SmallVector<bool, 50> AllocatedStackSlots; + + /// Points just beyond the last slot known to have been allocated + unsigned NextSlotToAllocate; + + /// Keep track of pending gcrelocate calls for consistency check + SmallVector<const CallInst *, 10> PendingGCRelocateCalls; +}; +} // end namespace llvm + +#endif // LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 05ace413bfdf..72e0aca84080 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -31,13 +31,13 @@ #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include <cctype> using namespace llvm; -/// NOTE: The constructor takes ownership of TLOF. -TargetLowering::TargetLowering(const TargetMachine &tm, - const TargetLoweringObjectFile *tlof) - : TargetLoweringBase(tm, tlof) {} +/// NOTE: The TargetMachine owns TLOF. +TargetLowering::TargetLowering(const TargetMachine &tm) + : TargetLoweringBase(tm) {} const char *TargetLowering::getTargetNodeName(unsigned Opcode) const { return nullptr; @@ -1283,36 +1283,53 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } // (zext x) == C --> x == (trunc C) - if (DCI.isBeforeLegalize() && N0->hasOneUse() && - (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { + // (sext x) == C --> x == (trunc C) + if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + DCI.isBeforeLegalize() && N0->hasOneUse()) { unsigned MinBits = N0.getValueSizeInBits(); - SDValue PreZExt; + SDValue PreExt; + bool Signed = false; if (N0->getOpcode() == ISD::ZERO_EXTEND) { // ZExt MinBits = N0->getOperand(0).getValueSizeInBits(); - PreZExt = N0->getOperand(0); + PreExt = N0->getOperand(0); } else if (N0->getOpcode() == ISD::AND) { // DAGCombine turns costly ZExts into ANDs if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0->getOperand(1))) if ((C->getAPIntValue()+1).isPowerOf2()) { MinBits = C->getAPIntValue().countTrailingOnes(); - PreZExt = N0->getOperand(0); + PreExt = N0->getOperand(0); } + } else if (N0->getOpcode() == ISD::SIGN_EXTEND) { + // SExt + MinBits = N0->getOperand(0).getValueSizeInBits(); + PreExt = N0->getOperand(0); + Signed = true; } else if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(N0)) { - // ZEXTLOAD + // ZEXTLOAD / SEXTLOAD if (LN0->getExtensionType() == ISD::ZEXTLOAD) { MinBits = LN0->getMemoryVT().getSizeInBits(); - PreZExt = N0; + PreExt = N0; + } else if (LN0->getExtensionType() == ISD::SEXTLOAD) { + Signed = true; + MinBits = LN0->getMemoryVT().getSizeInBits(); + PreExt = N0; } } + // Figure out how many bits we need to preserve this constant. + unsigned ReqdBits = Signed ? + C1.getBitWidth() - C1.getNumSignBits() + 1 : + C1.getActiveBits(); + // Make sure we're not losing bits from the constant. if (MinBits > 0 && - MinBits < C1.getBitWidth() && MinBits >= C1.getActiveBits()) { + MinBits < C1.getBitWidth() && + MinBits >= ReqdBits) { EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits); if (isTypeDesirableForOp(ISD::SETCC, MinVT)) { // Will get folded away. - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreZExt); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt); SDValue C = DAG.getConstant(C1.trunc(MinBits), MinVT); return DAG.getSetCC(dl, VT, Trunc, C, Cond); } @@ -2177,7 +2194,8 @@ getRegForInlineAsmConstraint(const std::string &Constraint, std::make_pair(0u, static_cast<const TargetRegisterClass*>(nullptr)); // Figure out which register class contains this reg. - const TargetRegisterInfo *RI = getTargetMachine().getRegisterInfo(); + const TargetRegisterInfo *RI = + getTargetMachine().getSubtargetImpl()->getRegisterInfo(); for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(), E = RI->regclass_end(); RCI != E; ++RCI) { const TargetRegisterClass *RC = *RCI; @@ -2239,14 +2257,11 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( // Do a prepass over the constraints, canonicalizing them, and building up the // ConstraintOperands list. - InlineAsm::ConstraintInfoVector - ConstraintInfos = IA->ParseConstraints(); - unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. unsigned ResNo = 0; // ResNo - The result number of the next output. - for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) { - ConstraintOperands.push_back(AsmOperandInfo(ConstraintInfos[i])); + for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) { + ConstraintOperands.emplace_back(std::move(CI)); AsmOperandInfo &OpInfo = ConstraintOperands.back(); // Update multiple alternative constraint count. @@ -2325,7 +2340,7 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( } // If we have multiple alternative constraints, select the best alternative. - if (ConstraintInfos.size()) { + if (ConstraintOperands.size()) { if (maCount) { unsigned bestMAIndex = 0; int bestWeight = -1; @@ -2641,11 +2656,13 @@ SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, SDLoc dl, /// \brief Given an ISD::SDIV node expressing a divide by constant, /// return a DAG expression to select that will generate the same value by -/// multiplying by a magic number. See: -/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> +/// multiplying by a magic number. +/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, bool IsAfterLegalization, std::vector<SDNode *> *Created) const { + assert(Created && "No vector to hold sdiv ops."); + EVT VT = N->getValueType(0); SDLoc dl(N); @@ -2673,38 +2690,36 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor, // If d > 0 and m < 0, add the numerator if (Divisor.isStrictlyPositive() && magics.m.isNegative()) { Q = DAG.getNode(ISD::ADD, dl, VT, Q, N->getOperand(0)); - if (Created) - Created->push_back(Q.getNode()); + Created->push_back(Q.getNode()); } // If d < 0 and m > 0, subtract the numerator. if (Divisor.isNegative() && magics.m.isStrictlyPositive()) { Q = DAG.getNode(ISD::SUB, dl, VT, Q, N->getOperand(0)); - if (Created) - Created->push_back(Q.getNode()); + Created->push_back(Q.getNode()); } // Shift right algebraic if shift value is nonzero if (magics.s > 0) { Q = DAG.getNode(ISD::SRA, dl, VT, Q, DAG.getConstant(magics.s, getShiftAmountTy(Q.getValueType()))); - if (Created) - Created->push_back(Q.getNode()); + Created->push_back(Q.getNode()); } // Extract the sign bit and add it to the quotient SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, DAG.getConstant(VT.getScalarSizeInBits() - 1, getShiftAmountTy(Q.getValueType()))); - if (Created) - Created->push_back(T.getNode()); + Created->push_back(T.getNode()); return DAG.getNode(ISD::ADD, dl, VT, Q, T); } /// \brief Given an ISD::UDIV node expressing a divide by constant, /// return a DAG expression to select that will generate the same value by -/// multiplying by a magic number. See: -/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> +/// multiplying by a magic number. +/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, bool IsAfterLegalization, std::vector<SDNode *> *Created) const { + assert(Created && "No vector to hold udiv ops."); + EVT VT = N->getValueType(0); SDLoc dl(N); @@ -2725,8 +2740,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor, unsigned Shift = Divisor.countTrailingZeros(); Q = DAG.getNode(ISD::SRL, dl, VT, Q, DAG.getConstant(Shift, getShiftAmountTy(Q.getValueType()))); - if (Created) - Created->push_back(Q.getNode()); + Created->push_back(Q.getNode()); // Get magic number for the shifted divisor. magics = Divisor.lshr(Shift).magicu(Shift); @@ -2744,8 +2758,8 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor, DAG.getConstant(magics.m, VT)).getNode(), 1); else return SDValue(); // No mulhu or equvialent - if (Created) - Created->push_back(Q.getNode()); + + Created->push_back(Q.getNode()); if (magics.a == 0) { assert(magics.s < Divisor.getBitWidth() && @@ -2754,15 +2768,12 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor, DAG.getConstant(magics.s, getShiftAmountTy(Q.getValueType()))); } else { SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q); - if (Created) - Created->push_back(NPQ.getNode()); + Created->push_back(NPQ.getNode()); NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, getShiftAmountTy(NPQ.getValueType()))); - if (Created) - Created->push_back(NPQ.getNode()); + Created->push_back(NPQ.getNode()); NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q); - if (Created) - Created->push_back(NPQ.getNode()); + Created->push_back(NPQ.getNode()); return DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(magics.s-1, getShiftAmountTy(NPQ.getValueType()))); } @@ -2785,7 +2796,7 @@ verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const { bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, SelectionDAG &DAG, SDValue LL, SDValue LH, - SDValue RL, SDValue RH) const { + SDValue RL, SDValue RH) const { EVT VT = N->getValueType(0); SDLoc dl(N); @@ -2818,8 +2829,8 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, // The inputs are both zero-extended. if (HasUMUL_LOHI) { // We can emit a umul_lohi. - Lo = DAG.getNode(ISD::UMUL_LOHI, dl, - DAG.getVTList(HiLoVT, HiLoVT), LL, RL); + Lo = DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(HiLoVT, HiLoVT), LL, + RL); Hi = SDValue(Lo.getNode(), 1); return true; } @@ -2834,8 +2845,8 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, // The input values are both sign-extended. if (HasSMUL_LOHI) { // We can emit a smul_lohi. - Lo = DAG.getNode(ISD::SMUL_LOHI, dl, - DAG.getVTList(HiLoVT, HiLoVT), LL, RL); + Lo = DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(HiLoVT, HiLoVT), LL, + RL); Hi = SDValue(Lo.getNode(), 1); return true; } |