diff options
Diffstat (limited to 'lib/CodeGen/SelectionDAG')
28 files changed, 5961 insertions, 3298 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 3218dce8f575..7a99687757f8 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -36,7 +36,6 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" @@ -60,6 +59,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" +#include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" @@ -122,7 +122,7 @@ namespace { bool LegalTypes = false; bool ForCodeSize; - /// \brief Worklist of all of the nodes that need to be simplified. + /// Worklist of all of the nodes that need to be simplified. /// /// This must behave as a stack -- new nodes to process are pushed onto the /// back and when processing we pop off of the back. @@ -131,14 +131,14 @@ namespace { /// due to nodes being deleted from the underlying DAG. SmallVector<SDNode *, 64> Worklist; - /// \brief Mapping from an SDNode to its position on the worklist. + /// Mapping from an SDNode to its position on the worklist. /// /// This is used to find and remove nodes from the worklist (by nulling /// them) when they are deleted from the underlying DAG. It relies on /// stable indices of nodes within the worklist. DenseMap<SDNode *, unsigned> WorklistMap; - /// \brief Set of nodes which have been combined (at least once). + /// Set of nodes which have been combined (at least once). /// /// This is used to allow us to reliably add any operands of a DAG node /// which have not yet been combined to the worklist. @@ -232,14 +232,25 @@ namespace { return SimplifyDemandedBits(Op, Demanded); } + /// Check the specified vector node value to see if it can be simplified or + /// if things it uses can be simplified as it only uses some of the + /// elements. If so, return true. + bool SimplifyDemandedVectorElts(SDValue Op) { + unsigned NumElts = Op.getValueType().getVectorNumElements(); + APInt Demanded = APInt::getAllOnesValue(NumElts); + return SimplifyDemandedVectorElts(Op, Demanded); + } + bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded); + bool SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded, + bool AssumeSingleUse = false); bool CombineToPreIndexedLoadStore(SDNode *N); bool CombineToPostIndexedLoadStore(SDNode *N); SDValue SplitIndexingFromLoad(LoadSDNode *LD); bool SliceUpLoad(SDNode *N); - /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed + /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed /// load. /// /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced. @@ -258,10 +269,6 @@ namespace { SDValue PromoteExtend(SDValue Op); bool PromoteLoad(SDValue Op); - void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, SDValue Trunc, - SDValue ExtLoad, const SDLoc &DL, - ISD::NodeType ExtType); - /// Call the node-specific routine that knows how to fold each /// particular type of node. If that doesn't do anything, try the /// target-specific DAG combines. @@ -292,7 +299,9 @@ namespace { SDValue visitMUL(SDNode *N); SDValue useDivRem(SDNode *N); SDValue visitSDIV(SDNode *N); + SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N); SDValue visitUDIV(SDNode *N); + SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N); SDValue visitREM(SDNode *N); SDValue visitMULHU(SDNode *N); SDValue visitMULHS(SDNode *N); @@ -302,9 +311,9 @@ namespace { SDValue visitUMULO(SDNode *N); SDValue visitIMINMAX(SDNode *N); SDValue visitAND(SDNode *N); - SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference); + SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N); SDValue visitOR(SDNode *N); - SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference); + SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N); SDValue visitXOR(SDNode *N); SDValue SimplifyVBinOp(SDNode *N); SDValue visitSHL(SDNode *N); @@ -323,7 +332,6 @@ namespace { SDValue visitVSELECT(SDNode *N); SDValue visitSELECT_CC(SDNode *N); SDValue visitSETCC(SDNode *N); - SDValue visitSETCCE(SDNode *N); SDValue visitSETCCCARRY(SDNode *N); SDValue visitSIGN_EXTEND(SDNode *N); SDValue visitZERO_EXTEND(SDNode *N); @@ -385,8 +393,8 @@ namespace { SDValue visitFMULForFMADistributiveCombine(SDNode *N); SDValue XformToShuffleWithZero(SDNode *N); - SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue LHS, - SDValue RHS); + SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, + SDValue N1); SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt); @@ -403,8 +411,11 @@ namespace { SDValue N2, SDValue N3, ISD::CondCode CC); SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1, const SDLoc &DL); + SDValue unfoldMaskedMerge(SDNode *N); + SDValue unfoldExtremeBitClearingToShifts(SDNode *N); SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, - const SDLoc &DL, bool foldBooleans = true); + const SDLoc &DL, bool foldBooleans); + SDValue rebuildSetCC(SDValue N); bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, SDValue &CC) const; @@ -414,20 +425,21 @@ namespace { unsigned HiOp); SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); SDValue CombineExtLoad(SDNode *N); + SDValue CombineZExtLogicopShiftLoad(SDNode *N); SDValue combineRepeatedFPDivisors(SDNode *N); SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex); SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); SDValue BuildSDIV(SDNode *N); SDValue BuildSDIVPow2(SDNode *N); SDValue BuildUDIV(SDNode *N); - SDValue BuildLogBase2(SDValue Op, const SDLoc &DL); + SDValue BuildLogBase2(SDValue V, const SDLoc &DL); SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags); SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags); SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags); SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip); - SDValue buildSqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations, + SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations, SDNodeFlags Flags, bool Reciprocal); - SDValue buildSqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations, + SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations, SDNodeFlags Flags, bool Reciprocal); SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits = true); @@ -442,13 +454,14 @@ namespace { SDValue ReduceLoadOpStoreWidth(SDNode *N); SDValue splitMergedValStore(StoreSDNode *ST); SDValue TransformFPLoadStorePair(SDNode *N); + SDValue convertBuildVecZextToZext(SDNode *N); SDValue reduceBuildVecExtToExtBuildVec(SDNode *N); SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N); SDValue reduceBuildVecToShuffle(SDNode *N); SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N, ArrayRef<int> VectorMask, SDValue VecIn1, SDValue VecIn2, unsigned LeftIdx); - SDValue matchVSelectOpSizesWithSetCC(SDNode *N); + SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast); /// Walk up chain skipping non-aliasing memory nodes, /// looking for aliasing nodes and adding them to the Aliases vector. @@ -500,15 +513,15 @@ namespace { bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, EVT LoadResultTy, EVT &ExtVT); - /// Helper function to calculate whether the given Load can have its + /// Helper function to calculate whether the given Load/Store can have its /// width reduced to ExtVT. - bool isLegalNarrowLoad(LoadSDNode *LoadN, ISD::LoadExtType ExtType, - EVT &ExtVT, unsigned ShAmt = 0); + bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType, + EVT &MemVT, unsigned ShAmt = 0); /// Used by BackwardsPropagateMask to find suitable loads. bool SearchForAndLoads(SDNode *N, SmallPtrSetImpl<LoadSDNode*> &Loads, - SmallPtrSetImpl<SDNode*> &NodeWithConsts, - ConstantSDNode *Mask, SDNode *&UncombinedNode); + SmallPtrSetImpl<SDNode*> &NodesWithConsts, + ConstantSDNode *Mask, SDNode *&NodeToMask); /// Attempt to propagate a given AND node back to load leaves so that they /// can be combined into narrow loads. bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG); @@ -530,23 +543,28 @@ namespace { /// This is a helper function for MergeConsecutiveStores. Stores /// that potentially may be merged with St are placed in - /// StoreNodes. + /// StoreNodes. RootNode is a chain predecessor to all store + /// candidates. void getStoreMergeCandidates(StoreSDNode *St, - SmallVectorImpl<MemOpLink> &StoreNodes); + SmallVectorImpl<MemOpLink> &StoreNodes, + SDNode *&Root); /// Helper function for MergeConsecutiveStores. Checks if /// candidate stores have indirect dependency through their - /// operands. \return True if safe to merge. + /// operands. RootNode is the predecessor to all stores calculated + /// by getStoreMergeCandidates and is used to prune the dependency check. + /// \return True if safe to merge. bool checkMergeStoreCandidatesForDependencies( - SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores); + SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores, + SDNode *RootNode); /// Merge consecutive store operations into a wide store. /// This optimization uses wide integers or vectors when possible. /// \return number of stores that were merged into a merged store (the /// affected nodes are stored as a prefix in \p StoreNodes). - bool MergeConsecutiveStores(StoreSDNode *N); + bool MergeConsecutiveStores(StoreSDNode *St); - /// \brief Try to transform a truncation where C is a constant: + /// Try to transform a truncation where C is a constant: /// (trunc (and X, C)) -> (and (trunc X), (trunc C)) /// /// \p N needs to be a truncation and its first operand an AND. Other @@ -554,6 +572,16 @@ namespace { /// single-use) and if missed an empty SDValue is returned. SDValue distributeTruncateThroughAnd(SDNode *N); + /// Helper function to determine whether the target supports operation + /// given by \p Opcode for type \p VT, that is, whether the operation + /// is legal or custom before legalizing operations, and whether is + /// legal (but not custom) after legalization. + bool hasOperation(unsigned Opcode, EVT VT) { + if (LegalOperations) + return TLI.isOperationLegal(Opcode, VT); + return TLI.isOperationLegalOrCustom(Opcode, VT); + } + public: /// Runs the dag combiner on all nodes in the work list void Run(CombineLevel AtLevel); @@ -564,11 +592,7 @@ namespace { /// legalization these can be huge. EVT getShiftAmountTy(EVT LHSTy) { assert(LHSTy.isInteger() && "Shift amount is not an integer type!"); - if (LHSTy.isVector()) - return LHSTy; - auto &DL = DAG.getDataLayout(); - return LegalTypes ? TLI.getScalarShiftAmountTy(DL, LHSTy) - : TLI.getPointerTy(DL); + return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes); } /// This method returns true if we are running before type legalization or @@ -582,6 +606,10 @@ namespace { EVT getSetCCResultType(EVT VT) const { return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); } + + void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, + SDValue OrigLoad, SDValue ExtLoad, + ISD::NodeType ExtType); }; /// This class is a DAGUpdateListener that removes any deleted @@ -657,8 +685,13 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations, // fneg is removable even if it has multiple uses. if (Op.getOpcode() == ISD::FNEG) return 2; - // Don't allow anything with multiple uses. - if (!Op.hasOneUse()) return 0; + // Don't allow anything with multiple uses unless we know it is free. + EVT VT = Op.getValueType(); + const SDNodeFlags Flags = Op->getFlags(); + if (!Op.hasOneUse()) + if (!(Op.getOpcode() == ISD::FP_EXTEND && + TLI.isFPExtFree(VT, Op.getOperand(0).getValueType()))) + return 0; // Don't recurse exponentially. if (Depth > 6) return 0; @@ -671,17 +704,15 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations, // Don't invert constant FP values after legalization unless the target says // the negated constant is legal. - EVT VT = Op.getValueType(); return TLI.isOperationLegal(ISD::ConstantFP, VT) || TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT); } case ISD::FADD: - // FIXME: determine better conditions for this xform. - if (!Options->UnsafeFPMath) return 0; + if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros()) + return 0; // After operation legalization, it might not be legal to create new FSUBs. - if (LegalOperations && - !TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) + if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) return 0; // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) @@ -694,7 +725,7 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations, case ISD::FSUB: // We can't turn -(A-B) into B-A when we honor signed zeros. if (!Options->NoSignedZerosFPMath && - !Op.getNode()->getFlags().hasNoSignedZeros()) + !Flags.hasNoSignedZeros()) return 0; // fold (fneg (fsub A, B)) -> (fsub B, A) @@ -702,8 +733,6 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations, case ISD::FMUL: case ISD::FDIV: - if (Options->HonorSignDependentRoundingFPMath()) return 0; - // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options, Depth + 1)) @@ -727,9 +756,6 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, // fneg is removable even if it has multiple uses. if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0); - // Don't allow anything with multiple uses. - assert(Op.hasOneUse() && "Unknown reuse!"); - assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree"); const SDNodeFlags Flags = Op.getNode()->getFlags(); @@ -742,8 +768,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType()); } case ISD::FADD: - // FIXME: determine better conditions for this xform. - assert(Options.UnsafeFPMath); + assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros()); // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) if (isNegatibleForFree(Op.getOperand(0), LegalOperations, @@ -769,8 +794,6 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, case ISD::FMUL: case ISD::FDIV: - assert(!Options.HonorSignDependentRoundingFPMath()); - // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) if (isNegatibleForFree(Op.getOperand(0), LegalOperations, DAG.getTargetLoweringInfo(), &Options, Depth+1)) @@ -846,7 +869,13 @@ bool DAGCombiner::isOneUseSetCC(SDValue N) const { return false; } -// \brief Returns the SDNode if it is a constant float BuildVector +static SDValue peekThroughBitcast(SDValue V) { + while (V.getOpcode() == ISD::BITCAST) + V = V.getOperand(0); + return V; +} + +// Returns the SDNode if it is a constant float BuildVector // or constant float. static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) { if (isa<ConstantFPSDNode>(N)) @@ -880,6 +909,7 @@ static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) { // constant null integer (with no undefs). // Build vector implicit truncation is not an issue for null values. static bool isNullConstantOrNullSplatConstant(SDValue N) { + // TODO: may want to use peekThroughBitcast() here. if (ConstantSDNode *Splat = isConstOrConstSplat(N)) return Splat->isNullValue(); return false; @@ -889,6 +919,7 @@ static bool isNullConstantOrNullSplatConstant(SDValue N) { // constant integer of one (with no undefs). // Do not permit build vector implicit truncation. static bool isOneConstantOrOneSplatConstant(SDValue N) { + // TODO: may want to use peekThroughBitcast() here. unsigned BitWidth = N.getScalarValueSizeInBits(); if (ConstantSDNode *Splat = isConstOrConstSplat(N)) return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth; @@ -899,6 +930,7 @@ static bool isOneConstantOrOneSplatConstant(SDValue N) { // constant integer of all ones (with no undefs). // Do not permit build vector implicit truncation. static bool isAllOnesConstantOrAllOnesSplatConstant(SDValue N) { + N = peekThroughBitcast(N); unsigned BitWidth = N.getScalarValueSizeInBits(); if (ConstantSDNode *Splat = isConstOrConstSplat(N)) return Splat->isAllOnesValue() && @@ -913,56 +945,6 @@ static bool isAnyConstantBuildVector(const SDNode *N) { ISD::isBuildVectorOfConstantFPSDNodes(N); } -// Attempt to match a unary predicate against a scalar/splat constant or -// every element of a constant BUILD_VECTOR. -static bool matchUnaryPredicate(SDValue Op, - std::function<bool(ConstantSDNode *)> Match) { - if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) - return Match(Cst); - - if (ISD::BUILD_VECTOR != Op.getOpcode()) - return false; - - EVT SVT = Op.getValueType().getScalarType(); - for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { - auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(i)); - if (!Cst || Cst->getValueType(0) != SVT || !Match(Cst)) - return false; - } - return true; -} - -// Attempt to match a binary predicate against a pair of scalar/splat constants -// or every element of a pair of constant BUILD_VECTORs. -static bool matchBinaryPredicate( - SDValue LHS, SDValue RHS, - std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match) { - if (LHS.getValueType() != RHS.getValueType()) - return false; - - if (auto *LHSCst = dyn_cast<ConstantSDNode>(LHS)) - if (auto *RHSCst = dyn_cast<ConstantSDNode>(RHS)) - return Match(LHSCst, RHSCst); - - if (ISD::BUILD_VECTOR != LHS.getOpcode() || - ISD::BUILD_VECTOR != RHS.getOpcode()) - return false; - - EVT SVT = LHS.getValueType().getScalarType(); - for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) { - auto *LHSCst = dyn_cast<ConstantSDNode>(LHS.getOperand(i)); - auto *RHSCst = dyn_cast<ConstantSDNode>(RHS.getOperand(i)); - if (!LHSCst || !RHSCst) - return false; - if (LHSCst->getValueType(0) != SVT || - LHSCst->getValueType(0) != RHSCst->getValueType(0)) - return false; - if (!Match(LHSCst, RHSCst)) - return false; - } - return true; -} - SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, SDValue N1) { EVT VT = N0.getValueType(); @@ -1013,11 +995,9 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, bool AddTo) { assert(N->getNumValues() == NumTo && "Broken CombineTo call!"); ++NodesCombined; - DEBUG(dbgs() << "\nReplacing.1 "; - N->dump(&DAG); - dbgs() << "\nWith: "; - To[0].getNode()->dump(&DAG); - dbgs() << " and " << NumTo-1 << " other values\n"); + LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: "; + To[0].getNode()->dump(&DAG); + dbgs() << " and " << NumTo - 1 << " other values\n"); for (unsigned i = 0, e = NumTo; i != e; ++i) assert((!To[i].getNode() || N->getValueType(i) == To[i].getValueType()) && @@ -1074,11 +1054,33 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { // Replace the old value with the new one. ++NodesCombined; - DEBUG(dbgs() << "\nReplacing.2 "; - TLO.Old.getNode()->dump(&DAG); - dbgs() << "\nWith: "; - TLO.New.getNode()->dump(&DAG); - dbgs() << '\n'); + LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG); + dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG); + dbgs() << '\n'); + + CommitTargetLoweringOpt(TLO); + return true; +} + +/// Check the specified vector node value to see if it can be simplified or +/// if things it uses can be simplified as it only uses some of the elements. +/// If so, return true. +bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded, + bool AssumeSingleUse) { + TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); + APInt KnownUndef, KnownZero; + if (!TLI.SimplifyDemandedVectorElts(Op, Demanded, KnownUndef, KnownZero, TLO, + 0, AssumeSingleUse)) + return false; + + // Revisit the node. + AddToWorklist(Op.getNode()); + + // Replace the old value with the new one. + ++NodesCombined; + LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG); + dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG); + dbgs() << '\n'); CommitTargetLoweringOpt(TLO); return true; @@ -1089,11 +1091,8 @@ void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { EVT VT = Load->getValueType(0); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0)); - DEBUG(dbgs() << "\nReplacing.9 "; - Load->dump(&DAG); - dbgs() << "\nWith: "; - Trunc.getNode()->dump(&DAG); - dbgs() << '\n'); + LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: "; + Trunc.getNode()->dump(&DAG); dbgs() << '\n'); WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc); DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1)); @@ -1107,10 +1106,8 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { if (ISD::isUNINDEXEDLoad(Op.getNode())) { LoadSDNode *LD = cast<LoadSDNode>(Op); EVT MemVT = LD->getMemoryVT(); - ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) - ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD - : ISD::EXTLOAD) - : LD->getExtensionType(); + ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD + : LD->getExtensionType(); Replace = true; return DAG.getExtLoad(ExtType, DL, PVT, LD->getChain(), LD->getBasePtr(), @@ -1194,7 +1191,7 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { if (TLI.IsDesirableToPromoteOp(Op, PVT)) { assert(PVT != VT && "Don't know what type to promote to!"); - DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); + LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); bool Replace0 = false; SDValue N0 = Op.getOperand(0); @@ -1259,7 +1256,7 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { if (TLI.IsDesirableToPromoteOp(Op, PVT)) { assert(PVT != VT && "Don't know what type to promote to!"); - DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); + LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); bool Replace = false; SDValue N0 = Op.getOperand(0); @@ -1311,8 +1308,7 @@ SDValue DAGCombiner::PromoteExtend(SDValue Op) { // fold (aext (aext x)) -> (aext x) // fold (aext (zext x)) -> (zext x) // fold (aext (sext x)) -> (sext x) - DEBUG(dbgs() << "\nPromoting "; - Op.getNode()->dump(&DAG)); + LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0)); } return SDValue(); @@ -1345,20 +1341,15 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { SDNode *N = Op.getNode(); LoadSDNode *LD = cast<LoadSDNode>(N); EVT MemVT = LD->getMemoryVT(); - ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) - ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD - : ISD::EXTLOAD) - : LD->getExtensionType(); + ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD + : LD->getExtensionType(); SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT, LD->getChain(), LD->getBasePtr(), MemVT, LD->getMemOperand()); SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD); - DEBUG(dbgs() << "\nPromoting "; - N->dump(&DAG); - dbgs() << "\nTo: "; - Result.getNode()->dump(&DAG); - dbgs() << '\n'); + LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: "; + Result.getNode()->dump(&DAG); dbgs() << '\n'); WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1)); @@ -1369,7 +1360,7 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { return false; } -/// \brief Recursively delete a node which has no uses and any operands for +/// Recursively delete a node which has no uses and any operands for /// which it is the only use. /// /// Note that this both deletes the nodes and removes them from the worklist. @@ -1453,7 +1444,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) { continue; } - DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG)); + LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG)); // Add any operands of the new node which have not yet been combined to the // worklist as well. Because the worklist uniques things already, this @@ -1481,8 +1472,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) { RV.getOpcode() != ISD::DELETED_NODE && "Node was deleted but visit returned new node!"); - DEBUG(dbgs() << " ... into: "; - RV.getNode()->dump(&DAG)); + LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG)); if (N->getNumValues() == RV.getNode()->getNumValues()) DAG.ReplaceAllUsesWith(N, RV.getNode()); @@ -1558,7 +1548,6 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::VSELECT: return visitVSELECT(N); case ISD::SELECT_CC: return visitSELECT_CC(N); case ISD::SETCC: return visitSETCC(N); - case ISD::SETCCE: return visitSETCCE(N); case ISD::SETCCCARRY: return visitSETCCCARRY(N); case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); @@ -1708,6 +1697,10 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { return N->getOperand(1); } + // Don't simplify token factors if optnone. + if (OptLevel == CodeGenOpt::None) + return SDValue(); + SmallVector<SDNode *, 8> TFs; // List of token factors to visit. SmallVector<SDValue, 8> Ops; // Ops for replacing token factor. SmallPtrSet<SDNode*, 16> SeenOps; @@ -1893,16 +1886,16 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) { BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) && "Unexpected binary operator"); - // Bail out if any constants are opaque because we can't constant fold those. - SDValue C1 = BO->getOperand(1); - if (!isConstantOrConstantVector(C1, true) && - !isConstantFPBuildVectorOrConstantFP(C1)) - return SDValue(); - // Don't do this unless the old select is going away. We want to eliminate the // binary operator, not replace a binop with a select. // TODO: Handle ISD::SELECT_CC. + unsigned SelOpNo = 0; SDValue Sel = BO->getOperand(0); + if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) { + SelOpNo = 1; + Sel = BO->getOperand(1); + } + if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) return SDValue(); @@ -1916,19 +1909,48 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) { !isConstantFPBuildVectorOrConstantFP(CF)) return SDValue(); + // Bail out if any constants are opaque because we can't constant fold those. + // The exception is "and" and "or" with either 0 or -1 in which case we can + // propagate non constant operands into select. I.e.: + // and (select Cond, 0, -1), X --> select Cond, 0, X + // or X, (select Cond, -1, 0) --> select Cond, -1, X + bool CanFoldNonConst = (BinOpcode == ISD::AND || BinOpcode == ISD::OR) && + (isNullConstantOrNullSplatConstant(CT) || + isAllOnesConstantOrAllOnesSplatConstant(CT)) && + (isNullConstantOrNullSplatConstant(CF) || + isAllOnesConstantOrAllOnesSplatConstant(CF)); + + SDValue CBO = BO->getOperand(SelOpNo ^ 1); + if (!CanFoldNonConst && + !isConstantOrConstantVector(CBO, true) && + !isConstantFPBuildVectorOrConstantFP(CBO)) + return SDValue(); + + EVT VT = Sel.getValueType(); + + // In case of shift value and shift amount may have different VT. For instance + // on x86 shift amount is i8 regardles of LHS type. Bail out if we have + // swapped operands and value types do not match. NB: x86 is fine if operands + // are not swapped with shift amount VT being not bigger than shifted value. + // TODO: that is possible to check for a shift operation, correct VTs and + // still perform optimization on x86 if needed. + if (SelOpNo && VT != CBO.getValueType()) + return SDValue(); + // We have a select-of-constants followed by a binary operator with a // constant. Eliminate the binop by pulling the constant math into the select. - // Example: add (select Cond, CT, CF), C1 --> select Cond, CT + C1, CF + C1 - EVT VT = Sel.getValueType(); + // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO SDLoc DL(Sel); - SDValue NewCT = DAG.getNode(BinOpcode, DL, VT, CT, C1); - if (!NewCT.isUndef() && + SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT) + : DAG.getNode(BinOpcode, DL, VT, CT, CBO); + if (!CanFoldNonConst && !NewCT.isUndef() && !isConstantOrConstantVector(NewCT, true) && !isConstantFPBuildVectorOrConstantFP(NewCT)) return SDValue(); - SDValue NewCF = DAG.getNode(BinOpcode, DL, VT, CF, C1); - if (!NewCF.isUndef() && + SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF) + : DAG.getNode(BinOpcode, DL, VT, CF, CBO); + if (!CanFoldNonConst && !NewCF.isUndef() && !isConstantOrConstantVector(NewCF, true) && !isConstantFPBuildVectorOrConstantFP(NewCF)) return SDValue(); @@ -1936,6 +1958,84 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) { return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF); } +static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) { + assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && + "Expecting add or sub"); + + // Match a constant operand and a zext operand for the math instruction: + // add Z, C + // sub C, Z + bool IsAdd = N->getOpcode() == ISD::ADD; + SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0); + SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1); + auto *CN = dyn_cast<ConstantSDNode>(C); + if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND) + return SDValue(); + + // Match the zext operand as a setcc of a boolean. + if (Z.getOperand(0).getOpcode() != ISD::SETCC || + Z.getOperand(0).getValueType() != MVT::i1) + return SDValue(); + + // Match the compare as: setcc (X & 1), 0, eq. + SDValue SetCC = Z.getOperand(0); + ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get(); + if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) || + SetCC.getOperand(0).getOpcode() != ISD::AND || + !isOneConstant(SetCC.getOperand(0).getOperand(1))) + return SDValue(); + + // We are adding/subtracting a constant and an inverted low bit. Turn that + // into a subtract/add of the low bit with incremented/decremented constant: + // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1)) + // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1)) + EVT VT = C.getValueType(); + SDLoc DL(N); + SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT); + SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) : + DAG.getConstant(CN->getAPIntValue() - 1, DL, VT); + return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit); +} + +/// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into +/// a shift and add with a different constant. +static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) { + assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && + "Expecting add or sub"); + + // We need a constant operand for the add/sub, and the other operand is a + // logical shift right: add (srl), C or sub C, (srl). + bool IsAdd = N->getOpcode() == ISD::ADD; + SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0); + SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1); + ConstantSDNode *C = isConstOrConstSplat(ConstantOp); + if (!C || ShiftOp.getOpcode() != ISD::SRL) + return SDValue(); + + // The shift must be of a 'not' value. + // TODO: Use isBitwiseNot() if it works with vectors. + SDValue Not = ShiftOp.getOperand(0); + if (!Not.hasOneUse() || Not.getOpcode() != ISD::XOR || + !isAllOnesConstantOrAllOnesSplatConstant(Not.getOperand(1))) + return SDValue(); + + // The shift must be moving the sign bit to the least-significant-bit. + EVT VT = ShiftOp.getValueType(); + SDValue ShAmt = ShiftOp.getOperand(1); + ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt); + if (!ShAmtC || ShAmtC->getZExtValue() != VT.getScalarSizeInBits() - 1) + return SDValue(); + + // Eliminate the 'not' by adjusting the shift and add/sub constant: + // add (srl (not X), 31), C --> add (sra X, 31), (C + 1) + // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1) + SDLoc DL(N); + auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL; + SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt); + APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1; + return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT)); +} + SDValue DAGCombiner::visitADD(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -2067,6 +2167,12 @@ SDValue DAGCombiner::visitADD(SDNode *N) { DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11)); } + if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG)) + return V; + + if (SDValue V = foldAddSubOfSignBit(N, DAG)) + return V; + if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); @@ -2075,6 +2181,11 @@ SDValue DAGCombiner::visitADD(SDNode *N) { DAG.haveNoCommonBitsSet(N0, N1)) return DAG.getNode(ISD::OR, DL, VT, N0, N1); + // fold (add (xor a, -1), 1) -> (sub 0, a) + if (isBitwiseNot(N0) && isOneConstantOrOneSplatConstant(N1)) + return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), + N0.getOperand(0)); + if (SDValue Combined = visitADDLike(N0, N1, N)) return Combined; @@ -2210,6 +2321,38 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { return SDValue(); } +static SDValue flipBoolean(SDValue V, const SDLoc &DL, EVT VT, + SelectionDAG &DAG, const TargetLowering &TLI) { + SDValue Cst; + switch (TLI.getBooleanContents(VT)) { + case TargetLowering::ZeroOrOneBooleanContent: + case TargetLowering::UndefinedBooleanContent: + Cst = DAG.getConstant(1, DL, VT); + break; + case TargetLowering::ZeroOrNegativeOneBooleanContent: + Cst = DAG.getConstant(-1, DL, VT); + break; + } + + return DAG.getNode(ISD::XOR, DL, VT, V, Cst); +} + +static bool isBooleanFlip(SDValue V, EVT VT, const TargetLowering &TLI) { + if (V.getOpcode() != ISD::XOR) return false; + ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V.getOperand(1)); + if (!Const) return false; + + switch(TLI.getBooleanContents(VT)) { + case TargetLowering::ZeroOrOneBooleanContent: + return Const->isOne(); + case TargetLowering::ZeroOrNegativeOneBooleanContent: + return Const->isAllOnesValue(); + case TargetLowering::UndefinedBooleanContent: + return (Const->getAPIntValue() & 0x01) == 1; + } + llvm_unreachable("Unsupported boolean content"); +} + SDValue DAGCombiner::visitUADDO(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -2240,6 +2383,15 @@ SDValue DAGCombiner::visitUADDO(SDNode *N) { return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1), DAG.getConstant(0, DL, CarryVT)); + // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry. + if (isBitwiseNot(N0) && isOneConstantOrOneSplatConstant(N1)) { + SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(), + DAG.getConstant(0, DL, VT), + N0.getOperand(0)); + return CombineTo(N, Sub, + flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI)); + } + if (SDValue Combined = visitUADDOLike(N0, N1, N)) return Combined; @@ -2303,13 +2455,17 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) { return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn); // fold (addcarry x, y, false) -> (uaddo x, y) - if (isNullConstant(CarryIn)) - return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1); + if (isNullConstant(CarryIn)) { + if (!LegalOperations || + TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0))) + return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1); + } + + EVT CarryVT = CarryIn.getValueType(); // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry. if (isNullConstant(N0) && isNullConstant(N1)) { EVT VT = N0.getValueType(); - EVT CarryVT = CarryIn.getValueType(); SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT); AddToWorklist(CarryExt.getNode()); return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt, @@ -2317,6 +2473,16 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) { DAG.getConstant(0, DL, CarryVT)); } + // fold (addcarry (xor a, -1), 0, !b) -> (subcarry 0, a, b) and flip carry. + if (isBitwiseNot(N0) && isNullConstant(N1) && + isBooleanFlip(CarryIn, CarryVT, TLI)) { + SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), + DAG.getConstant(0, DL, N0.getValueType()), + N0.getOperand(0), CarryIn.getOperand(0)); + return CombineTo(N, Sub, + flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI)); + } + if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N)) return Combined; @@ -2458,6 +2624,11 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (isAllOnesConstantOrAllOnesSplatConstant(N0)) return DAG.getNode(ISD::XOR, DL, VT, N1, N0); + // fold (A - (0-B)) -> A+B + if (N1.getOpcode() == ISD::SUB && + isNullConstantOrNullSplatConstant(N1.getOperand(0))) + return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1)); + // fold A-(A-B) -> B if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0)) return N1.getOperand(1); @@ -2500,12 +2671,50 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N0.getOperand(1).getOperand(0)); + // fold (X - (-Y * Z)) -> (X + (Y * Z)) + if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) { + if (N1.getOperand(0).getOpcode() == ISD::SUB && + isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0))) { + SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, + N1.getOperand(0).getOperand(1), + N1.getOperand(1)); + return DAG.getNode(ISD::ADD, DL, VT, N0, Mul); + } + if (N1.getOperand(1).getOpcode() == ISD::SUB && + isNullConstantOrNullSplatConstant(N1.getOperand(1).getOperand(0))) { + SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, + N1.getOperand(0), + N1.getOperand(1).getOperand(1)); + return DAG.getNode(ISD::ADD, DL, VT, N0, Mul); + } + } + // If either operand of a sub is undef, the result is undef if (N0.isUndef()) return N0; if (N1.isUndef()) return N1; + if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG)) + return V; + + if (SDValue V = foldAddSubOfSignBit(N, DAG)) + return V; + + // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X) + if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) { + if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) { + SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1); + SDValue S0 = N1.getOperand(0); + if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) { + unsigned OpSizeInBits = VT.getScalarSizeInBits(); + if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1))) + if (C->getAPIntValue() == (OpSizeInBits - 1)) + return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0); + } + } + } + // If the relocation model supports it, consider symbol offsets. if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) { @@ -2612,8 +2821,11 @@ SDValue DAGCombiner::visitSUBCARRY(SDNode *N) { SDValue CarryIn = N->getOperand(2); // fold (subcarry x, y, false) -> (usubo x, y) - if (isNullConstant(CarryIn)) - return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1); + if (isNullConstant(CarryIn)) { + if (!LegalOperations || + TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0))) + return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1); + } return SDValue(); } @@ -2689,11 +2901,8 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { (!VT.isVector() || Level <= AfterLegalizeVectorOps)) { SDLoc DL(N); SDValue LogBase2 = BuildLogBase2(N1, DL); - AddToWorklist(LogBase2.getNode()); - EVT ShiftVT = getShiftAmountTy(N0.getValueType()); SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT); - AddToWorklist(Trunc.getNode()); return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc); } // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c @@ -2816,9 +3025,10 @@ SDValue DAGCombiner::useDivRem(SDNode *Node) { SDValue Op1 = Node->getOperand(1); SDValue combined; for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), - UE = Op0.getNode()->use_end(); UI != UE;) { - SDNode *User = *UI++; - if (User == Node || User->use_empty()) + UE = Op0.getNode()->use_end(); UI != UE; ++UI) { + SDNode *User = *UI; + if (User == Node || User->getOpcode() == ISD::DELETED_NODE || + User->use_empty()) continue; // Convert the other matching node(s), too; // otherwise, the DIVREM may get target-legalized into something @@ -2868,6 +3078,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); + EVT CCVT = getSetCCResultType(VT); // fold vector ops if (VT.isVector()) @@ -2887,6 +3098,11 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { // fold (sdiv X, -1) -> 0-X if (N1C && N1C->isAllOnesValue()) return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0); + // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0) + if (N1C && N1C->getAPIntValue().isMinSignedValue()) + return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ), + DAG.getConstant(1, DL, VT), + DAG.getConstant(0, DL, VT)); if (SDValue V = simplifyDivRem(N, DAG)) return V; @@ -2899,45 +3115,90 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1); + if (SDValue V = visitSDIVLike(N0, N1, N)) + return V; + + // sdiv, srem -> sdivrem + // If the divisor is constant, then return DIVREM only if isIntDivCheap() is + // true. Otherwise, we break the simplification logic in visitREM(). + AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); + if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr)) + if (SDValue DivRem = useDivRem(N)) + return DivRem; + + return SDValue(); +} + +SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) { + SDLoc DL(N); + EVT VT = N->getValueType(0); + EVT CCVT = getSetCCResultType(VT); + unsigned BitWidth = VT.getScalarSizeInBits(); + + ConstantSDNode *N1C = isConstOrConstSplat(N1); + + // Helper for determining whether a value is a power-2 constant scalar or a + // vector of such elements. + auto IsPowerOfTwo = [](ConstantSDNode *C) { + if (C->isNullValue() || C->isOpaque()) + return false; + if (C->getAPIntValue().isPowerOf2()) + return true; + if ((-C->getAPIntValue()).isPowerOf2()) + return true; + return false; + }; + // fold (sdiv X, pow2) -> simple ops after legalize // FIXME: We check for the exact bit here because the generic lowering gives // better results in that case. The target-specific lowering should learn how // to handle exact sdivs efficiently. - if (N1C && !N1C->isNullValue() && !N1C->isOpaque() && - !N->getFlags().hasExact() && (N1C->getAPIntValue().isPowerOf2() || - (-N1C->getAPIntValue()).isPowerOf2())) { + if (!N->getFlags().hasExact() && + ISD::matchUnaryPredicate(N1C ? SDValue(N1C, 0) : N1, IsPowerOfTwo)) { // Target-specific implementation of sdiv x, pow2. if (SDValue Res = BuildSDIVPow2(N)) return Res; - unsigned lg2 = N1C->getAPIntValue().countTrailingZeros(); + // Create constants that are functions of the shift amount value. + EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType()); + SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy); + SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1); + C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy); + SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1); + if (!isConstantOrConstantVector(Inexact)) + return SDValue(); // Splat the sign bit into the register - SDValue SGN = - DAG.getNode(ISD::SRA, DL, VT, N0, - DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, - getShiftAmountTy(N0.getValueType()))); - AddToWorklist(SGN.getNode()); + SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0, + DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy)); + AddToWorklist(Sign.getNode()); // Add (N0 < 0) ? abs2 - 1 : 0; - SDValue SRL = - DAG.getNode(ISD::SRL, DL, VT, SGN, - DAG.getConstant(VT.getScalarSizeInBits() - lg2, DL, - getShiftAmountTy(SGN.getValueType()))); - SDValue ADD = DAG.getNode(ISD::ADD, DL, VT, N0, SRL); - AddToWorklist(SRL.getNode()); - AddToWorklist(ADD.getNode()); // Divide by pow2 - SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, ADD, - DAG.getConstant(lg2, DL, - getShiftAmountTy(ADD.getValueType()))); - - // If we're dividing by a positive value, we're done. Otherwise, we must - // negate the result. - if (N1C->getAPIntValue().isNonNegative()) - return SRA; - - AddToWorklist(SRA.getNode()); - return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA); + SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact); + AddToWorklist(Srl.getNode()); + SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl); + AddToWorklist(Add.getNode()); + SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1); + AddToWorklist(Sra.getNode()); + + // Special case: (sdiv X, 1) -> X + // Special Case: (sdiv X, -1) -> 0-X + SDValue One = DAG.getConstant(1, DL, VT); + SDValue AllOnes = DAG.getAllOnesConstant(DL, VT); + SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ); + SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ); + SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes); + Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra); + + // If dividing by a positive value, we're done. Otherwise, the result must + // be negated. + SDValue Zero = DAG.getConstant(0, DL, VT); + SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra); + + // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding. + SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT); + SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra); + return Res; } // If integer divide is expensive and we satisfy the requirements, emit an @@ -2948,13 +3209,6 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { if (SDValue Op = BuildSDIV(N)) return Op; - // sdiv, srem -> sdivrem - // If the divisor is constant, then return DIVREM only if isIntDivCheap() is - // true. Otherwise, we break the simplification logic in visitREM(). - if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr)) - if (SDValue DivRem = useDivRem(N)) - return DivRem; - return SDValue(); } @@ -2962,6 +3216,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); + EVT CCVT = getSetCCResultType(VT); // fold vector ops if (VT.isVector()) @@ -2977,6 +3232,14 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, N0C, N1C)) return Folded; + // fold (udiv X, 1) -> X + if (N1C && N1C->isOne()) + return N0; + // fold (udiv X, -1) -> select(X == -1, 1, 0) + if (N1C && N1C->getAPIntValue().isAllOnesValue()) + return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ), + DAG.getConstant(1, DL, VT), + DAG.getConstant(0, DL, VT)); if (SDValue V = simplifyDivRem(N, DAG)) return V; @@ -2984,6 +3247,26 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; + if (SDValue V = visitUDIVLike(N0, N1, N)) + return V; + + // sdiv, srem -> sdivrem + // If the divisor is constant, then return DIVREM only if isIntDivCheap() is + // true. Otherwise, we break the simplification logic in visitREM(). + AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); + if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr)) + if (SDValue DivRem = useDivRem(N)) + return DivRem; + + return SDValue(); +} + +SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) { + SDLoc DL(N); + EVT VT = N->getValueType(0); + + ConstantSDNode *N1C = isConstOrConstSplat(N1); + // fold (udiv x, (1 << c)) -> x >>u c if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) && DAG.isKnownToBeAPowerOfTwo(N1)) { @@ -3019,13 +3302,6 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { if (SDValue Op = BuildUDIV(N)) return Op; - // sdiv, srem -> sdivrem - // If the divisor is constant, then return DIVREM only if isIntDivCheap() is - // true. Otherwise, we break the simplification logic in visitREM(). - if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr)) - if (SDValue DivRem = useDivRem(N)) - return DivRem; - return SDValue(); } @@ -3035,6 +3311,8 @@ SDValue DAGCombiner::visitREM(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); + EVT CCVT = getSetCCResultType(VT); + bool isSigned = (Opcode == ISD::SREM); SDLoc DL(N); @@ -3044,6 +3322,10 @@ SDValue DAGCombiner::visitREM(SDNode *N) { if (N0C && N1C) if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C)) return Folded; + // fold (urem X, -1) -> select(X == -1, 0, x) + if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue()) + return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ), + DAG.getConstant(0, DL, VT), N0); if (SDValue V = simplifyDivRem(N, DAG)) return V; @@ -3077,22 +3359,19 @@ SDValue DAGCombiner::visitREM(SDNode *N) { // If X/C can be simplified by the division-by-constant logic, lower // X%C to the equivalent of X-X/C*C. - // To avoid mangling nodes, this simplification requires that the combine() - // call for the speculative DIV must not cause a DIVREM conversion. We guard - // against this by skipping the simplification if isIntDivCheap(). When - // div is not cheap, combine will not return a DIVREM. Regardless, - // checking cheapness here makes sense since the simplification results in - // fatter code. - if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap(VT, Attr)) { - unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV; - SDValue Div = DAG.getNode(DivOpcode, DL, VT, N0, N1); - AddToWorklist(Div.getNode()); - SDValue OptimizedDiv = combine(Div.getNode()); - if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { - assert((OptimizedDiv.getOpcode() != ISD::UDIVREM) && - (OptimizedDiv.getOpcode() != ISD::SDIVREM)); + // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the + // speculative DIV must not cause a DIVREM conversion. We guard against this + // by skipping the simplification if isIntDivCheap(). When div is not cheap, + // combine will not return a DIVREM. Regardless, checking cheapness here + // makes sense since the simplification results in fatter code. + if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) { + SDValue OptimizedDiv = + isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N); + if (OptimizedDiv.getNode() && OptimizedDiv.getOpcode() != ISD::UDIVREM && + OptimizedDiv.getOpcode() != ISD::SDIVREM) { SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1); SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul); + AddToWorklist(OptimizedDiv.getNode()); AddToWorklist(Mul.getNode()); return Sub; } @@ -3350,6 +3629,25 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) { !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); + // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX. + // Only do this if the current op isn't legal and the flipped is. + unsigned Opcode = N->getOpcode(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!TLI.isOperationLegal(Opcode, VT) && + (N0.isUndef() || DAG.SignBitIsZero(N0)) && + (N1.isUndef() || DAG.SignBitIsZero(N1))) { + unsigned AltOpcode; + switch (Opcode) { + case ISD::SMIN: AltOpcode = ISD::UMIN; break; + case ISD::SMAX: AltOpcode = ISD::UMAX; break; + case ISD::UMIN: AltOpcode = ISD::SMIN; break; + case ISD::UMAX: AltOpcode = ISD::SMAX; break; + default: llvm_unreachable("Unknown MINMAX opcode"); + } + if (TLI.isOperationLegal(AltOpcode, VT)) + return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1); + } + return SDValue(); } @@ -3469,9 +3767,9 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { ShOp = SDValue(); } - // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C) - // (OR (shuf (A, C), shuf (B, C)) -> shuf (OR (A, B), C) - // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0) + // (AND (shuf (A, C), shuf (B, C))) -> shuf (AND (A, B), C) + // (OR (shuf (A, C), shuf (B, C))) -> shuf (OR (A, B), C) + // (XOR (shuf (A, C), shuf (B, C))) -> shuf (XOR (A, B), V_0) if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) { SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0->getOperand(0), N1->getOperand(0)); @@ -3490,9 +3788,9 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { ShOp = SDValue(); } - // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B)) - // (OR (shuf (C, A), shuf (C, B)) -> shuf (C, OR (A, B)) - // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B)) + // (AND (shuf (C, A), shuf (C, B))) -> shuf (C, AND (A, B)) + // (OR (shuf (C, A), shuf (C, B))) -> shuf (C, OR (A, B)) + // (XOR (shuf (C, A), shuf (C, B))) -> shuf (V_0, XOR (A, B)) if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) { SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0->getOperand(1), N1->getOperand(1)); @@ -3525,7 +3823,7 @@ SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1, // operations on the left and right operands, so those types must match. EVT VT = N0.getValueType(); EVT OpVT = LL.getValueType(); - if (LegalOperations || VT != MVT::i1) + if (LegalOperations || VT.getScalarType() != MVT::i1) if (VT != getSetCCResultType(OpVT)) return SDValue(); if (OpVT != RL.getValueType()) @@ -3762,53 +4060,78 @@ bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, return true; } -bool DAGCombiner::isLegalNarrowLoad(LoadSDNode *LoadN, ISD::LoadExtType ExtType, - EVT &ExtVT, unsigned ShAmt) { - // Don't transform one with multiple uses, this would require adding a new - // load. - if (!SDValue(LoadN, 0).hasOneUse()) +bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST, + ISD::LoadExtType ExtType, EVT &MemVT, + unsigned ShAmt) { + if (!LDST) return false; - - if (LegalOperations && - !TLI.isLoadExtLegal(ExtType, LoadN->getValueType(0), ExtVT)) + // Only allow byte offsets. + if (ShAmt % 8) return false; // Do not generate loads of non-round integer types since these can // be expensive (and would be wrong if the type is not byte sized). - if (!ExtVT.isRound()) + if (!MemVT.isRound()) return false; // Don't change the width of a volatile load. - if (LoadN->isVolatile()) + if (LDST->isVolatile()) return false; // Verify that we are actually reducing a load width here. - if (LoadN->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits()) - return false; - - // For the transform to be legal, the load must produce only two values - // (the value loaded and the chain). Don't transform a pre-increment - // load, for example, which produces an extra value. Otherwise the - // transformation is not equivalent, and the downstream logic to replace - // uses gets things wrong. - if (LoadN->getNumValues() > 2) + if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits()) return false; - // If the load that we're shrinking is an extload and we're not just - // discarding the extension we can't simply shrink the load. Bail. - // TODO: It would be possible to merge the extensions in some cases. - if (LoadN->getExtensionType() != ISD::NON_EXTLOAD && - LoadN->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt) - return false; - - if (!TLI.shouldReduceLoadWidth(LoadN, ExtType, ExtVT)) + // Ensure that this isn't going to produce an unsupported unaligned access. + if (ShAmt && + !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT, + LDST->getAddressSpace(), ShAmt / 8)) return false; // It's not possible to generate a constant of extended or untyped type. - EVT PtrType = LoadN->getOperand(1).getValueType(); + EVT PtrType = LDST->getBasePtr().getValueType(); if (PtrType == MVT::Untyped || PtrType.isExtended()) return false; + if (isa<LoadSDNode>(LDST)) { + LoadSDNode *Load = cast<LoadSDNode>(LDST); + // Don't transform one with multiple uses, this would require adding a new + // load. + if (!SDValue(Load, 0).hasOneUse()) + return false; + + if (LegalOperations && + !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT)) + return false; + + // For the transform to be legal, the load must produce only two values + // (the value loaded and the chain). Don't transform a pre-increment + // load, for example, which produces an extra value. Otherwise the + // transformation is not equivalent, and the downstream logic to replace + // uses gets things wrong. + if (Load->getNumValues() > 2) + return false; + + // If the load that we're shrinking is an extload and we're not just + // discarding the extension we can't simply shrink the load. Bail. + // TODO: It would be possible to merge the extensions in some cases. + if (Load->getExtensionType() != ISD::NON_EXTLOAD && + Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt) + return false; + + if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT)) + return false; + } else { + assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode"); + StoreSDNode *Store = cast<StoreSDNode>(LDST); + // Can't write outside the original store + if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt) + return false; + + if (LegalOperations && + !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT)) + return false; + } return true; } @@ -3841,16 +4164,22 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N, auto *Load = cast<LoadSDNode>(Op); EVT ExtVT; if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) && - isLegalNarrowLoad(Load, ISD::ZEXTLOAD, ExtVT)) { - // Only add this load if we can make it more narrow. - if (ExtVT.bitsLT(Load->getMemoryVT())) + isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) { + + // ZEXTLOAD is already small enough. + if (Load->getExtensionType() == ISD::ZEXTLOAD && + ExtVT.bitsGE(Load->getMemoryVT())) + continue; + + // Use LE to convert equal sized loads to zext. + if (ExtVT.bitsLE(Load->getMemoryVT())) Loads.insert(Load); + continue; } return false; } case ISD::ZERO_EXTEND: - case ISD::ANY_EXTEND: case ISD::AssertZext: { unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes(); EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); @@ -3876,7 +4205,23 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N, // Allow one node which will masked along with any loads found. if (NodeToMask) return false; + + // Also ensure that the node to be masked only produces one data result. NodeToMask = Op.getNode(); + if (NodeToMask->getNumValues() > 1) { + bool HasValue = false; + for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) { + MVT VT = SDValue(NodeToMask, i).getSimpleValueType(); + if (VT != MVT::Glue && VT != MVT::Other) { + if (HasValue) { + NodeToMask = nullptr; + return false; + } + HasValue = true; + } + } + assert(HasValue && "Node to be masked has no data result?"); + } } return true; } @@ -3900,33 +4245,44 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) { if (Loads.size() == 0) return false; + LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump()); SDValue MaskOp = N->getOperand(1); // If it exists, fixup the single node we allow in the tree that needs // masking. if (FixupNode) { + LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump()); SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode), FixupNode->getValueType(0), SDValue(FixupNode, 0), MaskOp); DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And); - DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), - MaskOp); + if (And.getOpcode() == ISD ::AND) + DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp); } // Narrow any constants that need it. for (auto *LogicN : NodesWithConsts) { - auto *C = cast<ConstantSDNode>(LogicN->getOperand(1)); - SDValue And = DAG.getNode(ISD::AND, SDLoc(C), C->getValueType(0), - SDValue(C, 0), MaskOp); - DAG.UpdateNodeOperands(LogicN, LogicN->getOperand(0), And); + SDValue Op0 = LogicN->getOperand(0); + SDValue Op1 = LogicN->getOperand(1); + + if (isa<ConstantSDNode>(Op0)) + std::swap(Op0, Op1); + + SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), + Op1, MaskOp); + + DAG.UpdateNodeOperands(LogicN, Op0, And); } // Create narrow loads. for (auto *Load : Loads) { + LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump()); SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0), SDValue(Load, 0), MaskOp); DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And); - DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp); + if (And.getOpcode() == ISD ::AND) + And = SDValue( + DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0); SDValue NewLoad = ReduceLoadWidth(And.getNode()); assert(NewLoad && "Shouldn't be masking the load if it can't be narrowed"); @@ -3938,6 +4294,60 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) { return false; } +// Unfold +// x & (-1 'logical shift' y) +// To +// (x 'opposite logical shift' y) 'logical shift' y +// if it is better for performance. +SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) { + assert(N->getOpcode() == ISD::AND); + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + // Do we actually prefer shifts over mask? + if (!TLI.preferShiftsToClearExtremeBits(N0)) + return SDValue(); + + // Try to match (-1 '[outer] logical shift' y) + unsigned OuterShift; + unsigned InnerShift; // The opposite direction to the OuterShift. + SDValue Y; // Shift amount. + auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool { + if (!M.hasOneUse()) + return false; + OuterShift = M->getOpcode(); + if (OuterShift == ISD::SHL) + InnerShift = ISD::SRL; + else if (OuterShift == ISD::SRL) + InnerShift = ISD::SHL; + else + return false; + if (!isAllOnesConstant(M->getOperand(0))) + return false; + Y = M->getOperand(1); + return true; + }; + + SDValue X; + if (matchMask(N1)) + X = N0; + else if (matchMask(N0)) + X = N1; + else + return SDValue(); + + SDLoc DL(N); + EVT VT = N->getValueType(0); + + // tmp = x 'opposite logical shift' y + SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y); + // ret = tmp 'logical shift' y + SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y); + + return T1; +} + SDValue DAGCombiner::visitAND(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -4004,7 +4414,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue()); }; if (N0.getOpcode() == ISD::OR && - matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset)) + ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset)) return N1; // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits. if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { @@ -4235,6 +4645,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return BSwap; } + if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N)) + return Shifts; + return SDValue(); } @@ -4261,7 +4674,10 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, if (!N0.getNode()->hasOneUse()) return SDValue(); ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); - if (!N01C || N01C->getZExtValue() != 0xFF00) + // Also handle 0xffff since the LHS is guaranteed to have zeros there. + // This is needed for X86. + if (!N01C || (N01C->getZExtValue() != 0xFF00 && + N01C->getZExtValue() != 0xFFFF)) return SDValue(); N0 = N0.getOperand(0); LookPassAnd0 = true; @@ -4308,7 +4724,10 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, if (!N10.getNode()->hasOneUse()) return SDValue(); ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1)); - if (!N101C || N101C->getZExtValue() != 0xFF00) + // Also allow 0xFFFF since the bits will be shifted out. This is needed + // for X86. + if (!N101C || (N101C->getZExtValue() != 0xFF00 && + N101C->getZExtValue() != 0xFFFF)) return SDValue(); N10 = N10.getOperand(0); LookPassAnd1 = true; @@ -4379,6 +4798,14 @@ static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) { return false; case 0xFF: MaskByteOffset = 0; break; case 0xFF00: MaskByteOffset = 1; break; + case 0xFFFF: + // In case demanded bits didn't clear the bits that will be shifted out. + // This is needed for X86. + if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) { + MaskByteOffset = 1; + break; + } + return false; case 0xFF0000: MaskByteOffset = 2; break; case 0xFF000000: MaskByteOffset = 3; break; } @@ -4693,7 +5120,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return LHS->getAPIntValue().intersects(RHS->getAPIntValue()); }; if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && - matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect)) { + ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect)) { if (SDValue COR = DAG.FoldConstantArithmetic( ISD::OR, SDLoc(N1), VT, N1.getNode(), N0.getOperand(1).getNode())) { SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1); @@ -4749,7 +5176,8 @@ bool DAGCombiner::MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate // in direction shift1 by Neg. The range [0, EltSize) means that we only need // to consider shift amounts with defined behavior. -static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) { +static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, + SelectionDAG &DAG) { // If EltSize is a power of 2 then: // // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1) @@ -4784,9 +5212,13 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) { unsigned MaskLoBits = 0; if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) { if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) { - if (NegC->getAPIntValue() == EltSize - 1) { + KnownBits Known; + DAG.computeKnownBits(Neg.getOperand(0), Known); + unsigned Bits = Log2_64(EltSize); + if (NegC->getAPIntValue().getActiveBits() <= Bits && + ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) { Neg = Neg.getOperand(0); - MaskLoBits = Log2_64(EltSize); + MaskLoBits = Bits; } } } @@ -4801,10 +5233,16 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) { // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with // Pos'. The truncation is redundant for the purpose of the equality. - if (MaskLoBits && Pos.getOpcode() == ISD::AND) - if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) - if (PosC->getAPIntValue() == EltSize - 1) + if (MaskLoBits && Pos.getOpcode() == ISD::AND) { + if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) { + KnownBits Known; + DAG.computeKnownBits(Pos.getOperand(0), Known); + if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits && + ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >= + MaskLoBits)) Pos = Pos.getOperand(0); + } + } // The condition we need is now: // @@ -4860,7 +5298,7 @@ SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, // (srl x, (*ext y))) -> // (rotr x, y) or (rotl x, (sub 32, y)) EVT VT = Shifted.getValueType(); - if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits())) { + if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) { bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted, HasPos ? Pos : Neg).getNode(); @@ -4878,8 +5316,8 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { if (!TLI.isTypeLegal(VT)) return nullptr; // The target must have at least one rotate flavor. - bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT); - bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT); + bool HasROTL = hasOperation(ISD::ROTL, VT); + bool HasROTR = hasOperation(ISD::ROTR, VT); if (!HasROTL && !HasROTR) return nullptr; // Check for truncated rotate. @@ -4928,7 +5366,7 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { ConstantSDNode *RHS) { return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits; }; - if (matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) { + if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) { SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt); @@ -5185,7 +5623,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { Optional<BaseIndexOffset> Base; SDValue Chain; - SmallSet<LoadSDNode *, 8> Loads; + SmallPtrSet<LoadSDNode *, 8> Loads; Optional<ByteProvider> FirstByteProvider; int64_t FirstOffset = INT64_MAX; @@ -5210,7 +5648,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { return SDValue(); // Loads must share the same base address - BaseIndexOffset Ptr = BaseIndexOffset::match(L->getBasePtr(), DAG); + BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG); int64_t ByteOffsetFromBase = 0; if (!Base) Base = Ptr; @@ -5284,6 +5722,88 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad; } +// If the target has andn, bsl, or a similar bit-select instruction, +// we want to unfold masked merge, with canonical pattern of: +// | A | |B| +// ((x ^ y) & m) ^ y +// | D | +// Into: +// (x & m) | (y & ~m) +// If y is a constant, and the 'andn' does not work with immediates, +// we unfold into a different pattern: +// ~(~x & m) & (m | y) +// NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at +// the very least that breaks andnpd / andnps patterns, and because those +// patterns are simplified in IR and shouldn't be created in the DAG +SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) { + assert(N->getOpcode() == ISD::XOR); + + // Don't touch 'not' (i.e. where y = -1). + if (isAllOnesConstantOrAllOnesSplatConstant(N->getOperand(1))) + return SDValue(); + + EVT VT = N->getValueType(0); + + // There are 3 commutable operators in the pattern, + // so we have to deal with 8 possible variants of the basic pattern. + SDValue X, Y, M; + auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) { + if (And.getOpcode() != ISD::AND || !And.hasOneUse()) + return false; + SDValue Xor = And.getOperand(XorIdx); + if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse()) + return false; + SDValue Xor0 = Xor.getOperand(0); + SDValue Xor1 = Xor.getOperand(1); + // Don't touch 'not' (i.e. where y = -1). + if (isAllOnesConstantOrAllOnesSplatConstant(Xor1)) + return false; + if (Other == Xor0) + std::swap(Xor0, Xor1); + if (Other != Xor1) + return false; + X = Xor0; + Y = Xor1; + M = And.getOperand(XorIdx ? 0 : 1); + return true; + }; + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) && + !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0)) + return SDValue(); + + // Don't do anything if the mask is constant. This should not be reachable. + // InstCombine should have already unfolded this pattern, and DAGCombiner + // probably shouldn't produce it, too. + if (isa<ConstantSDNode>(M.getNode())) + return SDValue(); + + // We can transform if the target has AndNot + if (!TLI.hasAndNot(M)) + return SDValue(); + + SDLoc DL(N); + + // If Y is a constant, check that 'andn' works with immediates. + if (!TLI.hasAndNot(Y)) { + assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable."); + // If not, we need to do a bit more work to make sure andn is still used. + SDValue NotX = DAG.getNOT(DL, X, VT); + SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M); + SDValue NotLHS = DAG.getNOT(DL, LHS, VT); + SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y); + return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS); + } + + SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M); + SDValue NotM = DAG.getNOT(DL, M, VT); + SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM); + + return DAG.getNode(ISD::OR, DL, VT, LHS, RHS); +} + SDValue DAGCombiner::visitXOR(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -5363,7 +5883,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { } // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc - if (isOneConstant(N1) && VT == MVT::i1 && + if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() && (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) { @@ -5375,7 +5895,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { } } // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants - if (isAllOnesConstant(N1) && + if (isAllOnesConstant(N1) && N0.hasOneUse() && (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) { @@ -5396,13 +5916,19 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { } // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X) - unsigned OpSizeInBits = VT.getScalarSizeInBits(); - if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1 && - N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0) && - TLI.isOperationLegalOrCustom(ISD::ABS, VT)) { - if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1))) - if (C->getAPIntValue() == (OpSizeInBits - 1)) - return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0.getOperand(0)); + if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) { + SDValue A = N0.getOpcode() == ISD::ADD ? N0 : N1; + SDValue S = N0.getOpcode() == ISD::SRA ? N0 : N1; + if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) { + SDValue A0 = A.getOperand(0), A1 = A.getOperand(1); + SDValue S0 = S.getOperand(0); + if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0)) { + unsigned OpSizeInBits = VT.getScalarSizeInBits(); + if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1))) + if (C->getAPIntValue() == (OpSizeInBits - 1)) + return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0); + } + } } // fold (xor x, x) -> 0 @@ -5439,6 +5965,10 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N)) return Tmp; + // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable + if (SDValue MM = unfoldMaskedMerge(N)) + return MM; + // Simplify the expression using non-local knowledge. if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); @@ -5641,7 +6171,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) { return Val->getAPIntValue().uge(OpSizeInBits); }; - if (matchUnaryPredicate(N1, MatchShiftTooBig)) + if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig)) return DAG.getUNDEF(VT); // fold (shl x, 0) -> x if (N1C && N1C->isNullValue()) @@ -5676,7 +6206,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); return (c1 + c2).uge(OpSizeInBits); }; - if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange)) + if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange)) return DAG.getConstant(0, SDLoc(N), VT); auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS, @@ -5686,7 +6216,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); return (c1 + c2).ult(OpSizeInBits); }; - if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) { + if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) { SDLoc DL(N); EVT ShiftVT = N1.getValueType(); SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1)); @@ -5862,7 +6392,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) { return Val->getAPIntValue().uge(OpSizeInBits); }; - if (matchUnaryPredicate(N1, MatchShiftTooBig)) + if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig)) return DAG.getUNDEF(VT); // fold (sra x, 0) -> x if (N1C && N1C->isNullValue()) @@ -5897,7 +6427,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); return (c1 + c2).uge(OpSizeInBits); }; - if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange)) + if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange)) return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), DAG.getConstant(OpSizeInBits - 1, DL, ShiftVT)); @@ -5908,7 +6438,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); return (c1 + c2).ult(OpSizeInBits); }; - if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) { + if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) { SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1)); return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), Sum); } @@ -6026,7 +6556,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { auto MatchShiftTooBig = [OpSizeInBits](ConstantSDNode *Val) { return Val->getAPIntValue().uge(OpSizeInBits); }; - if (matchUnaryPredicate(N1, MatchShiftTooBig)) + if (ISD::matchUnaryPredicate(N1, MatchShiftTooBig)) return DAG.getUNDEF(VT); // fold (srl x, 0) -> x if (N1C && N1C->isNullValue()) @@ -6049,7 +6579,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); return (c1 + c2).uge(OpSizeInBits); }; - if (matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange)) + if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange)) return DAG.getConstant(0, SDLoc(N), VT); auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS, @@ -6059,7 +6589,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */); return (c1 + c2).ult(OpSizeInBits); }; - if (matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) { + if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) { SDLoc DL(N); EVT ShiftVT = N1.getValueType(); SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1)); @@ -6270,6 +6800,13 @@ SDValue DAGCombiner::visitCTLZ(SDNode *N) { // fold (ctlz c1) -> c2 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0); + + // If the value is known never to be zero, switch to the undef version. + if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) { + if (DAG.isKnownNeverZero(N0)) + return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0); + } + return SDValue(); } @@ -6290,6 +6827,13 @@ SDValue DAGCombiner::visitCTTZ(SDNode *N) { // fold (cttz c1) -> c2 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0); + + // If the value is known never to be zero, switch to the undef version. + if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) { + if (DAG.isKnownNeverZero(N0)) + return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0); + } + return SDValue(); } @@ -6313,7 +6857,7 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) { return SDValue(); } -/// \brief Generate Min/Max node +/// Generate Min/Max node static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode CC, const TargetLowering &TLI, @@ -6428,9 +6972,9 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { // in another basic block or it could require searching a complicated // expression. if (CondVT.isInteger() && - TLI.getBooleanContents(false, true) == + TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) == TargetLowering::ZeroOrOneBooleanContent && - TLI.getBooleanContents(false, false) == + TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) == TargetLowering::ZeroOrOneBooleanContent && C1->isNullValue() && C2->isOne()) { SDValue NotCond = @@ -6559,15 +7103,10 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { } } - // select (xor Cond, 1), X, Y -> select Cond, Y, X if (VT0 == MVT::i1) { - if (N0->getOpcode() == ISD::XOR) { - if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1))) { - SDValue Cond0 = N0->getOperand(0); - if (C->isOne()) - return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N2, N1); - } - } + // select (not Cond), N1, N2 -> select Cond, N2, N1 + if (isBitwiseNot(N0)) + return DAG.getNode(ISD::SELECT, DL, VT, N0->getOperand(0), N2, N1); } // fold selects based on a setcc into other things, such as min/max/abs @@ -6711,6 +7250,7 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) { SDValue DataLo, DataHi; std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); + SDValue Scale = MSC->getScale(); SDValue BasePtr = MSC->getBasePtr(); SDValue IndexLo, IndexHi; std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL); @@ -6720,11 +7260,11 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) { MachineMemOperand::MOStore, LoMemVT.getStoreSize(), Alignment, MSC->getAAInfo(), MSC->getRanges()); - SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo }; + SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo, Scale }; Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(), DL, OpsLo, MMO); - SDValue OpsHi[] = {Chain, DataHi, MaskHi, BasePtr, IndexHi}; + SDValue OpsHi[] = { Chain, DataHi, MaskHi, BasePtr, IndexHi, Scale }; Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(), DL, OpsHi, MMO); @@ -6785,12 +7325,12 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, MST->isCompressingStore()); + unsigned HiOffset = LoMemVT.getStoreSize(); - MMO = DAG.getMachineFunction(). - getMachineMemOperand(MST->getPointerInfo(), - MachineMemOperand::MOStore, HiMemVT.getStoreSize(), - SecondHalfAlignment, MST->getAAInfo(), - MST->getRanges()); + MMO = DAG.getMachineFunction().getMachineMemOperand( + MST->getPointerInfo().getWithOffset(HiOffset), + MachineMemOperand::MOStore, HiMemVT.getStoreSize(), SecondHalfAlignment, + MST->getAAInfo(), MST->getRanges()); Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO, MST->isTruncatingStore(), @@ -6844,6 +7384,7 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) { EVT LoMemVT, HiMemVT; std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + SDValue Scale = MGT->getScale(); SDValue BasePtr = MGT->getBasePtr(); SDValue Index = MGT->getIndex(); SDValue IndexLo, IndexHi; @@ -6854,13 +7395,13 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) { MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), Alignment, MGT->getAAInfo(), MGT->getRanges()); - SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo }; + SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo, Scale }; Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo, - MMO); + MMO); - SDValue OpsHi[] = {Chain, Src0Hi, MaskHi, BasePtr, IndexHi}; + SDValue OpsHi[] = { Chain, Src0Hi, MaskHi, BasePtr, IndexHi, Scale }; Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi, - MMO); + MMO); AddToWorklist(Lo.getNode()); AddToWorklist(Hi.getNode()); @@ -6934,11 +7475,12 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, MLD->isExpandingLoad()); + unsigned HiOffset = LoMemVT.getStoreSize(); - MMO = DAG.getMachineFunction(). - getMachineMemOperand(MLD->getPointerInfo(), - MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), - SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges()); + MMO = DAG.getMachineFunction().getMachineMemOperand( + MLD->getPointerInfo().getWithOffset(HiOffset), + MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment, + MLD->getAAInfo(), MLD->getRanges()); Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad()); @@ -7056,6 +7598,36 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { AddToWorklist(Add.getNode()); return DAG.getNode(ISD::XOR, DL, VT, Add, Shift); } + + // If this select has a condition (setcc) with narrower operands than the + // select, try to widen the compare to match the select width. + // TODO: This should be extended to handle any constant. + // TODO: This could be extended to handle non-loading patterns, but that + // requires thorough testing to avoid regressions. + if (isNullConstantOrNullSplatConstant(RHS)) { + EVT NarrowVT = LHS.getValueType(); + EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger(); + EVT SetCCVT = getSetCCResultType(LHS.getValueType()); + unsigned SetCCWidth = SetCCVT.getScalarSizeInBits(); + unsigned WideWidth = WideVT.getScalarSizeInBits(); + bool IsSigned = isSignedIntSetCC(CC); + auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD; + if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() && + SetCCWidth != 1 && SetCCWidth < WideWidth && + TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) && + TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) { + // Both compare operands can be widened for free. The LHS can use an + // extended load, and the RHS is a constant: + // vselect (ext (setcc load(X), C)), N1, N2 --> + // vselect (setcc extload(X), C'), N1, N2 + auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS); + SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS); + EVT WideSetCCVT = getSetCCResultType(WideVT); + SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC); + return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2); + } + } } if (SimplifySelectOps(N, N1, N2)) @@ -7127,22 +7699,33 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { } SDValue DAGCombiner::visitSETCC(SDNode *N) { - return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1), - cast<CondCodeSDNode>(N->getOperand(2))->get(), - SDLoc(N)); -} + // setcc is very commonly used as an argument to brcond. This pattern + // also lend itself to numerous combines and, as a result, it is desired + // we keep the argument to a brcond as a setcc as much as possible. + bool PreferSetCC = + N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND; -SDValue DAGCombiner::visitSETCCE(SDNode *N) { - SDValue LHS = N->getOperand(0); - SDValue RHS = N->getOperand(1); - SDValue Carry = N->getOperand(2); - SDValue Cond = N->getOperand(3); + SDValue Combined = SimplifySetCC( + N->getValueType(0), N->getOperand(0), N->getOperand(1), + cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC); - // If Carry is false, fold to a regular SETCC. - if (Carry.getOpcode() == ISD::CARRY_FALSE) - return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond); + if (!Combined) + return SDValue(); - return SDValue(); + // If we prefer to have a setcc, and we don't, we'll try our best to + // recreate one using rebuildSetCC. + if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) { + SDValue NewSetCC = rebuildSetCC(Combined); + + // We don't have anything interesting to combine to. + if (NewSetCC.getNode() == N) + return SDValue(); + + if (NewSetCC) + return NewSetCC; + } + + return Combined; } SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) { @@ -7222,12 +7805,12 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))" // transformation. Returns true if extension are possible and the above // mentioned transformation is profitable. -static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, +static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0, unsigned ExtOpc, SmallVectorImpl<SDNode *> &ExtendNodes, const TargetLowering &TLI) { bool HasCopyToRegUses = false; - bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType()); + bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType()); for (SDNode::use_iterator UI = N0.getNode()->use_begin(), UE = N0.getNode()->use_end(); UI != UE; ++UI) { @@ -7283,16 +7866,16 @@ static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, } void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, - SDValue Trunc, SDValue ExtLoad, - const SDLoc &DL, ISD::NodeType ExtType) { + SDValue OrigLoad, SDValue ExtLoad, + ISD::NodeType ExtType) { // Extend SetCC uses if necessary. - for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { - SDNode *SetCC = SetCCs[i]; + SDLoc DL(ExtLoad); + for (SDNode *SetCC : SetCCs) { SmallVector<SDValue, 4> Ops; for (unsigned j = 0; j != 2; ++j) { SDValue SOp = SetCC->getOperand(j); - if (SOp == Trunc) + if (SOp == OrigLoad) Ops.push_back(ExtLoad); else Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp)); @@ -7341,7 +7924,7 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) { return SDValue(); SmallVector<SDNode *, 4> SetCCs; - if (!ExtendUsesToFormExtLoad(N, N0, N->getOpcode(), SetCCs, TLI)) + if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI)) return SDValue(); ISD::LoadExtType ExtType = @@ -7372,7 +7955,7 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) { const unsigned Align = MinAlign(LN0->getAlignment(), Offset); SDValue SplitLoad = DAG.getExtLoad( - ExtType, DL, SplitDstVT, LN0->getChain(), BasePtr, + ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr, LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align, LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); @@ -7395,12 +7978,82 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) { // with a truncate of the concatenated sextloaded vectors. SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue); + ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode()); CombineTo(N0.getNode(), Trunc, NewChain); - ExtendSetCCUses(SetCCs, Trunc, NewValue, DL, - (ISD::NodeType)N->getOpcode()); return SDValue(N, 0); // Return N so it doesn't get rechecked! } +// fold (zext (and/or/xor (shl/shr (load x), cst), cst)) -> +// (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst)) +SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) { + assert(N->getOpcode() == ISD::ZERO_EXTEND); + EVT VT = N->getValueType(0); + + // and/or/xor + SDValue N0 = N->getOperand(0); + if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || + N0.getOpcode() == ISD::XOR) || + N0.getOperand(1).getOpcode() != ISD::Constant || + (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT))) + return SDValue(); + + // shl/shr + SDValue N1 = N0->getOperand(0); + if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) || + N1.getOperand(1).getOpcode() != ISD::Constant || + (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT))) + return SDValue(); + + // load + if (!isa<LoadSDNode>(N1.getOperand(0))) + return SDValue(); + LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0)); + EVT MemVT = Load->getMemoryVT(); + if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) || + Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed()) + return SDValue(); + + + // If the shift op is SHL, the logic op must be AND, otherwise the result + // will be wrong. + if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND) + return SDValue(); + + if (!N0.hasOneUse() || !N1.hasOneUse()) + return SDValue(); + + SmallVector<SDNode*, 4> SetCCs; + if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0), + ISD::ZERO_EXTEND, SetCCs, TLI)) + return SDValue(); + + // Actually do the transformation. + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT, + Load->getChain(), Load->getBasePtr(), + Load->getMemoryVT(), Load->getMemOperand()); + + SDLoc DL1(N1); + SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad, + N1.getOperand(1)); + + APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + Mask = Mask.zext(VT.getSizeInBits()); + SDLoc DL0(N0); + SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift, + DAG.getConstant(Mask, DL0, VT)); + + ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND); + CombineTo(N, And); + if (SDValue(Load, 0).hasOneUse()) { + DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1)); + } else { + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load), + Load->getValueType(0), ExtLoad); + CombineTo(Load, Trunc, ExtLoad.getValue(1)); + } + return SDValue(N,0); // Return N so it doesn't get rechecked! +} + /// If we're narrowing or widening the result of a vector select and the final /// size is the same size as a setcc (compare) feeding the select, then try to /// apply the cast operation to the select's operands because matching vector @@ -7446,6 +8099,106 @@ SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) { return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB); } +// fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x))) +// fold ([s|z]ext ( extload x)) -> ([s|z]ext (truncate ([s|z]extload x))) +static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner, + const TargetLowering &TLI, EVT VT, + bool LegalOperations, SDNode *N, + SDValue N0, ISD::LoadExtType ExtLoadType) { + SDNode *N0Node = N0.getNode(); + bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node) + : ISD::isZEXTLoad(N0Node); + if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) || + !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse()) + return {}; + + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + EVT MemVT = LN0->getMemoryVT(); + if ((LegalOperations || LN0->isVolatile()) && + !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT)) + return {}; + + SDValue ExtLoad = + DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(), + LN0->getBasePtr(), MemVT, LN0->getMemOperand()); + Combiner.CombineTo(N, ExtLoad); + DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! +} + +// fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x))) +// Only generate vector extloads when 1) they're legal, and 2) they are +// deemed desirable by the target. +static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, + const TargetLowering &TLI, EVT VT, + bool LegalOperations, SDNode *N, SDValue N0, + ISD::LoadExtType ExtLoadType, + ISD::NodeType ExtOpc) { + if (!ISD::isNON_EXTLoad(N0.getNode()) || + !ISD::isUNINDEXEDLoad(N0.getNode()) || + ((LegalOperations || VT.isVector() || + cast<LoadSDNode>(N0)->isVolatile()) && + !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType()))) + return {}; + + bool DoXform = true; + SmallVector<SDNode *, 4> SetCCs; + if (!N0.hasOneUse()) + DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI); + if (VT.isVector()) + DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0)); + if (!DoXform) + return {}; + + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(), + LN0->getBasePtr(), N0.getValueType(), + LN0->getMemOperand()); + Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc); + // If the load value is used only by N, replace it via CombineTo N. + bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse(); + Combiner.CombineTo(N, ExtLoad); + if (NoReplaceTrunc) { + DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); + } else { + SDValue Trunc = + DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad); + Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1)); + } + return SDValue(N, 0); // Return N so it doesn't get rechecked! +} + +static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG, + bool LegalOperations) { + assert((N->getOpcode() == ISD::SIGN_EXTEND || + N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext"); + + SDValue SetCC = N->getOperand(0); + if (LegalOperations || SetCC.getOpcode() != ISD::SETCC || + !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1) + return SDValue(); + + SDValue X = SetCC.getOperand(0); + SDValue Ones = SetCC.getOperand(1); + ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get(); + EVT VT = N->getValueType(0); + EVT XVT = X.getValueType(); + // setge X, C is canonicalized to setgt, so we do not need to match that + // pattern. The setlt sibling is folded in SimplifySelectCC() because it does + // not require the 'not' op. + if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) { + // Invert and smear/shift the sign bit: + // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1) + // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1) + SDLoc DL(N); + SDValue NotX = DAG.getNOT(DL, X, VT); + SDValue ShiftAmount = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT); + auto ShiftOpcode = N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL; + return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount); + } + return SDValue(); +} + SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -7510,62 +8263,21 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { } } - // fold (sext (load x)) -> (sext (truncate (sextload x))) - // Only generate vector extloads when 1) they're legal, and 2) they are - // deemed desirable by the target. - if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && - ((!LegalOperations && !VT.isVector() && - !cast<LoadSDNode>(N0)->isVolatile()) || - TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()))) { - bool DoXform = true; - SmallVector<SDNode*, 4> SetCCs; - if (!N0.hasOneUse()) - DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI); - if (VT.isVector()) - DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0)); - if (DoXform) { - LoadSDNode *LN0 = cast<LoadSDNode>(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(), - LN0->getBasePtr(), N0.getValueType(), - LN0->getMemOperand()); - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), - N0.getValueType(), ExtLoad); - ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND); - // If the load value is used only by N, replace it via CombineTo N. - bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse(); - CombineTo(N, ExtLoad); - if (NoReplaceTrunc) - DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); - else - CombineTo(LN0, Trunc, ExtLoad.getValue(1)); - return SDValue(N, 0); - } - } + // Try to simplify (sext (load x)). + if (SDValue foldedExt = + tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0, + ISD::SEXTLOAD, ISD::SIGN_EXTEND)) + return foldedExt; // fold (sext (load x)) to multiple smaller sextloads. // Only on illegal but splittable vectors. if (SDValue ExtLoad = CombineExtLoad(N)) return ExtLoad; - // fold (sext (sextload x)) -> (sext (truncate (sextload x))) - // fold (sext ( extload x)) -> (sext (truncate (sextload x))) - if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && - ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { - LoadSDNode *LN0 = cast<LoadSDNode>(N0); - EVT MemVT = LN0->getMemoryVT(); - if ((!LegalOperations && !LN0->isVolatile()) || - TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT)) { - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, DL, VT, LN0->getChain(), - LN0->getBasePtr(), MemVT, - LN0->getMemOperand()); - CombineTo(N, ExtLoad); - CombineTo(N0.getNode(), - DAG.getNode(ISD::TRUNCATE, SDLoc(N0), - N0.getValueType(), ExtLoad), - ExtLoad.getValue(1)); - return SDValue(N, 0); // Return N so it doesn't get rechecked! - } - } + // Try to simplify (sext (sextload x)). + if (SDValue foldedExt = tryToFoldExtOfExtload( + DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD)) + return foldedExt; // fold (sext (and/or/xor (load x), cst)) -> // (and/or/xor (sextload x), (sext cst)) @@ -7573,30 +8285,26 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { N0.getOpcode() == ISD::XOR) && isa<LoadSDNode>(N0.getOperand(0)) && N0.getOperand(1).getOpcode() == ISD::Constant && - TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()) && (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { - LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); - if (LN0->getExtensionType() != ISD::ZEXTLOAD && LN0->isUnindexed()) { - bool DoXform = true; + LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0)); + EVT MemVT = LN00->getMemoryVT(); + if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) && + LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) { SmallVector<SDNode*, 4> SetCCs; - if (!N0.hasOneUse()) - DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND, - SetCCs, TLI); + bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0), + ISD::SIGN_EXTEND, SetCCs, TLI); if (DoXform) { - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT, - LN0->getChain(), LN0->getBasePtr(), - LN0->getMemoryVT(), - LN0->getMemOperand()); + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT, + LN00->getChain(), LN00->getBasePtr(), + LN00->getMemoryVT(), + LN00->getMemOperand()); APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); Mask = Mask.sext(VT.getSizeInBits()); SDValue And = DAG.getNode(N0.getOpcode(), DL, VT, ExtLoad, DAG.getConstant(Mask, DL, VT)); - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, - SDLoc(N0.getOperand(0)), - N0.getOperand(0).getValueType(), ExtLoad); - ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::SIGN_EXTEND); + ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND); bool NoReplaceTruncAnd = !N0.hasOneUse(); - bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse(); + bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse(); CombineTo(N, And); // If N0 has multiple uses, change other uses as well. if (NoReplaceTruncAnd) { @@ -7604,15 +8312,21 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And); CombineTo(N0.getNode(), TruncAnd); } - if (NoReplaceTrunc) - DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); - else - CombineTo(LN0, Trunc, ExtLoad.getValue(1)); + if (NoReplaceTrunc) { + DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1)); + } else { + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00), + LN00->getValueType(0), ExtLoad); + CombineTo(LN00, Trunc, ExtLoad.getValue(1)); + } return SDValue(N,0); // Return N so it doesn't get rechecked! } } } + if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations)) + return V; + if (N0.getOpcode() == ISD::SETCC) { SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); @@ -7659,8 +8373,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // If the type of the setcc is larger (say, i8) then the value of the high // bit depends on getBooleanContents(), so ask TLI for a real "true" value // of the appropriate width. - SDValue ExtTrueVal = (SetCCWidth == 1) ? DAG.getAllOnesConstant(DL, VT) - : TLI.getConstTrueVal(DAG, VT, DL); + SDValue ExtTrueVal = (SetCCWidth == 1) + ? DAG.getAllOnesConstant(DL, VT) + : DAG.getBoolConstant(true, DL, VT, N00VT); SDValue Zero = DAG.getConstant(0, DL, VT); if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true)) @@ -7777,13 +8492,16 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // Try to mask before the extension to avoid having to generate a larger mask, // possibly over several sub-vectors. - if (SrcVT.bitsLT(VT)) { + if (SrcVT.bitsLT(VT) && VT.isVector()) { if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) && TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) { SDValue Op = N0.getOperand(0); Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType()); AddToWorklist(Op.getNode()); - return DAG.getZExtOrTrunc(Op, SDLoc(N), VT); + SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT); + // Transfer the debug info; the new node is equivalent to N0. + DAG.transferDbgValues(N0, ZExtOrTrunc); + return ZExtOrTrunc; } } @@ -7815,39 +8533,11 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { X, DAG.getConstant(Mask, DL, VT)); } - // fold (zext (load x)) -> (zext (truncate (zextload x))) - // Only generate vector extloads when 1) they're legal, and 2) they are - // deemed desirable by the target. - if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && - ((!LegalOperations && !VT.isVector() && - !cast<LoadSDNode>(N0)->isVolatile()) || - TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()))) { - bool DoXform = true; - SmallVector<SDNode*, 4> SetCCs; - if (!N0.hasOneUse()) - DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI); - if (VT.isVector()) - DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0)); - if (DoXform) { - LoadSDNode *LN0 = cast<LoadSDNode>(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, - LN0->getChain(), - LN0->getBasePtr(), N0.getValueType(), - LN0->getMemOperand()); - - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), - N0.getValueType(), ExtLoad); - ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), ISD::ZERO_EXTEND); - // If the load value is used only by N, replace it via CombineTo N. - bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse(); - CombineTo(N, ExtLoad); - if (NoReplaceTrunc) - DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); - else - CombineTo(LN0, Trunc, ExtLoad.getValue(1)); - return SDValue(N, 0); // Return N so it doesn't get rechecked! - } - } + // Try to simplify (zext (load x)). + if (SDValue foldedExt = + tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0, + ISD::ZEXTLOAD, ISD::ZERO_EXTEND)) + return foldedExt; // fold (zext (load x)) to multiple smaller zextloads. // Only on illegal but splittable vectors. @@ -7862,10 +8552,11 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { N0.getOpcode() == ISD::XOR) && isa<LoadSDNode>(N0.getOperand(0)) && N0.getOperand(1).getOpcode() == ISD::Constant && - TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()) && (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { - LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); - if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) { + LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0)); + EVT MemVT = LN00->getMemoryVT(); + if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) && + LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) { bool DoXform = true; SmallVector<SDNode*, 4> SetCCs; if (!N0.hasOneUse()) { @@ -7873,29 +8564,26 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { auto *AndC = cast<ConstantSDNode>(N0.getOperand(1)); EVT LoadResultTy = AndC->getValueType(0); EVT ExtVT; - if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, ExtVT)) + if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT)) DoXform = false; } - if (DoXform) - DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), - ISD::ZERO_EXTEND, SetCCs, TLI); } + if (DoXform) + DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0), + ISD::ZERO_EXTEND, SetCCs, TLI); if (DoXform) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT, - LN0->getChain(), LN0->getBasePtr(), - LN0->getMemoryVT(), - LN0->getMemOperand()); + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT, + LN00->getChain(), LN00->getBasePtr(), + LN00->getMemoryVT(), + LN00->getMemOperand()); APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); Mask = Mask.zext(VT.getSizeInBits()); SDLoc DL(N); SDValue And = DAG.getNode(N0.getOpcode(), DL, VT, ExtLoad, DAG.getConstant(Mask, DL, VT)); - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, - SDLoc(N0.getOperand(0)), - N0.getOperand(0).getValueType(), ExtLoad); - ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, ISD::ZERO_EXTEND); + ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND); bool NoReplaceTruncAnd = !N0.hasOneUse(); - bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse(); + bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse(); CombineTo(N, And); // If N0 has multiple uses, change other uses as well. if (NoReplaceTruncAnd) { @@ -7903,35 +8591,30 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And); CombineTo(N0.getNode(), TruncAnd); } - if (NoReplaceTrunc) - DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); - else - CombineTo(LN0, Trunc, ExtLoad.getValue(1)); + if (NoReplaceTrunc) { + DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1)); + } else { + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00), + LN00->getValueType(0), ExtLoad); + CombineTo(LN00, Trunc, ExtLoad.getValue(1)); + } return SDValue(N,0); // Return N so it doesn't get rechecked! } } } - // fold (zext (zextload x)) -> (zext (truncate (zextload x))) - // fold (zext ( extload x)) -> (zext (truncate (zextload x))) - if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && - ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { - LoadSDNode *LN0 = cast<LoadSDNode>(N0); - EVT MemVT = LN0->getMemoryVT(); - if ((!LegalOperations && !LN0->isVolatile()) || - TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, - LN0->getChain(), - LN0->getBasePtr(), MemVT, - LN0->getMemOperand()); - CombineTo(N, ExtLoad); - CombineTo(N0.getNode(), - DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), - ExtLoad), - ExtLoad.getValue(1)); - return SDValue(N, 0); // Return N so it doesn't get rechecked! - } - } + // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) -> + // (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst)) + if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N)) + return ZExtLoad; + + // Try to simplify (zext (zextload x)). + if (SDValue foldedExt = tryToFoldExtOfExtload( + DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD)) + return foldedExt; + + if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations)) + return V; if (N0.getOpcode() == ISD::SETCC) { // Only do this before legalize for now. @@ -8069,24 +8752,25 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { bool DoXform = true; SmallVector<SDNode*, 4> SetCCs; if (!N0.hasOneUse()) - DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI); + DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs, + TLI); if (DoXform) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(), LN0->getBasePtr(), N0.getValueType(), LN0->getMemOperand()); - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), - N0.getValueType(), ExtLoad); - ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), - ISD::ANY_EXTEND); + ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND); // If the load value is used only by N, replace it via CombineTo N. bool NoReplaceTrunc = N0.hasOneUse(); CombineTo(N, ExtLoad); - if (NoReplaceTrunc) + if (NoReplaceTrunc) { DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); - else + } else { + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), + N0.getValueType(), ExtLoad); CombineTo(LN0, Trunc, ExtLoad.getValue(1)); + } return SDValue(N, 0); // Return N so it doesn't get rechecked! } } @@ -8094,9 +8778,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // fold (aext (zextload x)) -> (aext (truncate (zextload x))) // fold (aext (sextload x)) -> (aext (truncate (sextload x))) // fold (aext ( extload x)) -> (aext (truncate (extload x))) - if (N0.getOpcode() == ISD::LOAD && - !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && - N0.hasOneUse()) { + if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) && + ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); ISD::LoadExtType ExtType = LN0->getExtensionType(); EVT MemVT = LN0->getMemoryVT(); @@ -8105,10 +8788,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { VT, LN0->getChain(), LN0->getBasePtr(), MemVT, LN0->getMemOperand()); CombineTo(N, ExtLoad); - CombineTo(N0.getNode(), - DAG.getNode(ISD::TRUNCATE, SDLoc(N0), - N0.getValueType(), ExtLoad), - ExtLoad.getValue(1)); + DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! } } @@ -8248,8 +8928,9 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { unsigned ShAmt = 0; if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { - if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { - ShAmt = N01->getZExtValue(); + SDValue SRL = N0; + if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) { + ShAmt = ConstShift->getZExtValue(); unsigned EVTBits = ExtVT.getSizeInBits(); // Is the shift amount a multiple of size of VT? if ((ShAmt & (EVTBits-1)) == 0) { @@ -8262,17 +8943,36 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { // At this point, we must have a load or else we can't do the transform. if (!isa<LoadSDNode>(N0)) return SDValue(); + auto *LN0 = cast<LoadSDNode>(N0); + // Because a SRL must be assumed to *need* to zero-extend the high bits // (as opposed to anyext the high bits), we can't combine the zextload // lowering of SRL and an sextload. - if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD) + if (LN0->getExtensionType() == ISD::SEXTLOAD) return SDValue(); // If the shift amount is larger than the input type then we're not // accessing any of the loaded bytes. If the load was a zextload/extload // then the result of the shift+trunc is zero/undef (handled elsewhere). - if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits()) + if (ShAmt >= LN0->getMemoryVT().getSizeInBits()) return SDValue(); + + // If the SRL is only used by a masking AND, we may be able to adjust + // the ExtVT to make the AND redundant. + SDNode *Mask = *(SRL->use_begin()); + if (Mask->getOpcode() == ISD::AND && + isa<ConstantSDNode>(Mask->getOperand(1))) { + const APInt &ShiftMask = + cast<ConstantSDNode>(Mask->getOperand(1))->getAPIntValue(); + if (ShiftMask.isMask()) { + EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(), + ShiftMask.countTrailingOnes()); + // If the mask is smaller, recompute the type. + if ((ExtVT.getSizeInBits() > MaskedVT.getSizeInBits()) && + TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT)) + ExtVT = MaskedVT; + } + } } } @@ -8292,7 +8992,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { return SDValue(); LoadSDNode *LN0 = cast<LoadSDNode>(N0); - if (!isLegalNarrowLoad(LN0, ExtType, ExtVT, ShAmt)) + if (!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt)) return SDValue(); // For big endian targets, we need to adjust the offset to the pointer to @@ -8388,7 +9088,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1); } - // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_in_reg x) + // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x) if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG || N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG || N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) && @@ -8762,6 +9462,22 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2)); } + // fold (truncate (extract_subvector(ext x))) -> + // (extract_subvector x) + // TODO: This can be generalized to cover cases where the truncate and extract + // do not fully cancel each other out. + if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::SIGN_EXTEND || + N00.getOpcode() == ISD::ZERO_EXTEND || + N00.getOpcode() == ISD::ANY_EXTEND) { + if (N00.getOperand(0)->getValueType(0).getVectorElementType() == + VT.getVectorElementType()) + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT, + N00.getOperand(0), N0.getOperand(1)); + } + } + if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) return NewVSel; @@ -8882,17 +9598,17 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { } // If the input is a constant, let getNode fold it. - if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) { - // If we can't allow illegal operations, we need to check that this is just - // a fp -> int or int -> conversion and that the resulting operation will - // be legal. - if (!LegalOperations || - (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() && - TLI.isOperationLegal(ISD::ConstantFP, VT)) || - (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() && - TLI.isOperationLegal(ISD::Constant, VT))) - return DAG.getBitcast(VT, N0); - } + // We always need to check that this is just a fp -> int or int -> conversion + // otherwise we will get back N which will confuse the caller into thinking + // we used CombineTo. This can block target combines from running. If we can't + // allowed legal operations, we need to ensure the resulting operation will be + // legal. + // TODO: Maybe we should check that the return value isn't N explicitly? + if ((isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() && + (!LegalOperations || TLI.isOperationLegal(ISD::ConstantFP, VT))) || + (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() && + (!LegalOperations || TLI.isOperationLegal(ISD::Constant, VT)))) + return DAG.getBitcast(VT, N0); // (conv (conv x, t1), t2) -> (conv x, t2) if (N0.getOpcode() == ISD::BITCAST) @@ -9238,7 +9954,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { static bool isContractable(SDNode *N) { SDNodeFlags F = N->getFlags(); - return F.hasAllowContract() || F.hasUnsafeAlgebra(); + return F.hasAllowContract() || F.hasAllowReassociation(); } /// Try to perform FMA combining on a given FADD node. @@ -9262,8 +9978,10 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { if (!HasFMAD && !HasFMA) return SDValue(); + SDNodeFlags Flags = N->getFlags(); + bool CanFuse = Options.UnsafeFPMath || isContractable(N); bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast || - Options.UnsafeFPMath || HasFMAD); + CanFuse || HasFMAD); // If the addition is not contractable, do not combine. if (!AllowFusionGlobally && !isContractable(N)) return SDValue(); @@ -9293,14 +10011,14 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { // fold (fadd (fmul x, y), z) -> (fma x, y, z) if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) { return DAG.getNode(PreferredFusedOpcode, SL, VT, - N0.getOperand(0), N0.getOperand(1), N1); + N0.getOperand(0), N0.getOperand(1), N1, Flags); } // fold (fadd x, (fmul y, z)) -> (fma y, z, x) // Note: Commutes FADD operands. if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) { return DAG.getNode(PreferredFusedOpcode, SL, VT, - N1.getOperand(0), N1.getOperand(1), N0); + N1.getOperand(0), N1.getOperand(1), N0, Flags); } // Look through FP_EXTEND nodes to do more combining. @@ -9314,7 +10032,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(0)), DAG.getNode(ISD::FP_EXTEND, SL, VT, - N00.getOperand(1)), N1); + N00.getOperand(1)), N1, Flags); } } @@ -9328,16 +10046,14 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(0)), DAG.getNode(ISD::FP_EXTEND, SL, VT, - N10.getOperand(1)), N0); + N10.getOperand(1)), N0, Flags); } } // More folding opportunities when target permits. if (Aggressive) { // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z)) - // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF - // are currently only supported on binary nodes. - if (Options.UnsafeFPMath && + if (CanFuse && N0.getOpcode() == PreferredFusedOpcode && N0.getOperand(2).getOpcode() == ISD::FMUL && N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) { @@ -9346,13 +10062,11 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(2).getOperand(0), N0.getOperand(2).getOperand(1), - N1)); + N1, Flags), Flags); } // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x)) - // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF - // are currently only supported on binary nodes. - if (Options.UnsafeFPMath && + if (CanFuse && N1->getOpcode() == PreferredFusedOpcode && N1.getOperand(2).getOpcode() == ISD::FMUL && N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) { @@ -9361,19 +10075,20 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { DAG.getNode(PreferredFusedOpcode, SL, VT, N1.getOperand(2).getOperand(0), N1.getOperand(2).getOperand(1), - N0)); + N0, Flags), Flags); } // fold (fadd (fma x, y, (fpext (fmul u, v))), z) // -> (fma x, y, (fma (fpext u), (fpext v), z)) auto FoldFAddFMAFPExtFMul = [&] ( - SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) { + SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z, + SDNodeFlags Flags) { return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y, DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, U), DAG.getNode(ISD::FP_EXTEND, SL, VT, V), - Z)); + Z, Flags), Flags); }; if (N0.getOpcode() == PreferredFusedOpcode) { SDValue N02 = N0.getOperand(2); @@ -9383,7 +10098,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) { return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1), N020.getOperand(0), N020.getOperand(1), - N1); + N1, Flags); } } } @@ -9394,14 +10109,15 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { // operation into two double-precision operations, which might not be // interesting for all targets, especially GPUs. auto FoldFAddFPExtFMAFMul = [&] ( - SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) { + SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z, + SDNodeFlags Flags) { return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, X), DAG.getNode(ISD::FP_EXTEND, SL, VT, Y), DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FP_EXTEND, SL, VT, U), DAG.getNode(ISD::FP_EXTEND, SL, VT, V), - Z)); + Z, Flags), Flags); }; if (N0.getOpcode() == ISD::FP_EXTEND) { SDValue N00 = N0.getOperand(0); @@ -9411,7 +10127,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) { return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1), N002.getOperand(0), N002.getOperand(1), - N1); + N1, Flags); } } } @@ -9426,7 +10142,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) { return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1), N120.getOperand(0), N120.getOperand(1), - N0); + N0, Flags); } } } @@ -9444,7 +10160,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) { return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1), N102.getOperand(0), N102.getOperand(1), - N0); + N0, Flags); } } } @@ -9473,8 +10189,11 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { if (!HasFMAD && !HasFMA) return SDValue(); + const SDNodeFlags Flags = N->getFlags(); + bool CanFuse = Options.UnsafeFPMath || isContractable(N); bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast || - Options.UnsafeFPMath || HasFMAD); + CanFuse || HasFMAD); + // If the subtraction is not contractable, do not combine. if (!AllowFusionGlobally && !isContractable(N)) return SDValue(); @@ -9499,16 +10218,17 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) { return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0), N0.getOperand(1), - DAG.getNode(ISD::FNEG, SL, VT, N1)); + DAG.getNode(ISD::FNEG, SL, VT, N1), Flags); } // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) // Note: Commutes FSUB operands. - if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) + if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) { return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), - N1.getOperand(1), N0); + N1.getOperand(1), N0, Flags); + } // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) && @@ -9517,7 +10237,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { SDValue N01 = N0.getOperand(0).getOperand(1); return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FNEG, SL, VT, N00), N01, - DAG.getNode(ISD::FNEG, SL, VT, N1)); + DAG.getNode(ISD::FNEG, SL, VT, N1), Flags); } // Look through FP_EXTEND nodes to do more combining. @@ -9533,7 +10253,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { N00.getOperand(0)), DAG.getNode(ISD::FP_EXTEND, SL, VT, N00.getOperand(1)), - DAG.getNode(ISD::FNEG, SL, VT, N1)); + DAG.getNode(ISD::FNEG, SL, VT, N1), Flags); } } @@ -9550,7 +10270,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { N10.getOperand(0))), DAG.getNode(ISD::FP_EXTEND, SL, VT, N10.getOperand(1)), - N0); + N0, Flags); } } @@ -9572,7 +10292,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { N000.getOperand(0)), DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)), - N1)); + N1, Flags)); } } } @@ -9595,7 +10315,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { N000.getOperand(0)), DAG.getNode(ISD::FP_EXTEND, SL, VT, N000.getOperand(1)), - N1)); + N1, Flags)); } } } @@ -9604,9 +10324,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { if (Aggressive) { // fold (fsub (fma x, y, (fmul u, v)), z) // -> (fma x, y (fma u, v, (fneg z))) - // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF - // are currently only supported on binary nodes. - if (Options.UnsafeFPMath && N0.getOpcode() == PreferredFusedOpcode && + if (CanFuse && N0.getOpcode() == PreferredFusedOpcode && isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) { return DAG.getNode(PreferredFusedOpcode, SL, VT, @@ -9615,14 +10333,12 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { N0.getOperand(2).getOperand(0), N0.getOperand(2).getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, - N1))); + N1), Flags), Flags); } // fold (fsub x, (fma y, z, (fmul u, v))) // -> (fma (fneg y), z, (fma (fneg u), v, x)) - // FIXME: The UnsafeAlgebra flag should be propagated to FMA/FMAD, but FMF - // are currently only supported on binary nodes. - if (Options.UnsafeFPMath && N1.getOpcode() == PreferredFusedOpcode && + if (CanFuse && N1.getOpcode() == PreferredFusedOpcode && isContractableFMUL(N1.getOperand(2))) { SDValue N20 = N1.getOperand(2).getOperand(0); SDValue N21 = N1.getOperand(2).getOperand(1); @@ -9632,8 +10348,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { N1.getOperand(1), DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FNEG, SL, VT, N20), - - N21, N0)); + N21, N0, Flags), Flags); } @@ -9653,7 +10368,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { DAG.getNode(ISD::FP_EXTEND, SL, VT, N020.getOperand(1)), DAG.getNode(ISD::FNEG, SL, VT, - N1))); + N1), Flags), Flags); } } } @@ -9681,7 +10396,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { DAG.getNode(ISD::FP_EXTEND, SL, VT, N002.getOperand(1)), DAG.getNode(ISD::FNEG, SL, VT, - N1))); + N1), Flags), Flags); } } } @@ -9704,7 +10419,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { VT, N1200)), DAG.getNode(ISD::FP_EXTEND, SL, VT, N1201), - N0)); + N0, Flags), Flags); } } @@ -9735,7 +10450,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { VT, N1020)), DAG.getNode(ISD::FP_EXTEND, SL, VT, N1021), - N0)); + N0, Flags), Flags); } } } @@ -9751,6 +10466,7 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) { SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); SDLoc SL(N); + const SDNodeFlags Flags = N->getFlags(); assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation"); @@ -9782,52 +10498,54 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) { // fold (fmul (fadd x, +1.0), y) -> (fma x, y, y) // fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y)) - auto FuseFADD = [&](SDValue X, SDValue Y) { + auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) { if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) { auto XC1 = isConstOrConstSplatFP(X.getOperand(1)); if (XC1 && XC1->isExactlyValue(+1.0)) - return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y); + return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, + Y, Flags); if (XC1 && XC1->isExactlyValue(-1.0)) return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, - DAG.getNode(ISD::FNEG, SL, VT, Y)); + DAG.getNode(ISD::FNEG, SL, VT, Y), Flags); } return SDValue(); }; - if (SDValue FMA = FuseFADD(N0, N1)) + if (SDValue FMA = FuseFADD(N0, N1, Flags)) return FMA; - if (SDValue FMA = FuseFADD(N1, N0)) + if (SDValue FMA = FuseFADD(N1, N0, Flags)) return FMA; // fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y) // fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y)) // fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y)) // fold (fmul (fsub x, -1.0), y) -> (fma x, y, y) - auto FuseFSUB = [&](SDValue X, SDValue Y) { + auto FuseFSUB = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) { if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) { auto XC0 = isConstOrConstSplatFP(X.getOperand(0)); if (XC0 && XC0->isExactlyValue(+1.0)) return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y, - Y); + Y, Flags); if (XC0 && XC0->isExactlyValue(-1.0)) return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y, - DAG.getNode(ISD::FNEG, SL, VT, Y)); + DAG.getNode(ISD::FNEG, SL, VT, Y), Flags); auto XC1 = isConstOrConstSplatFP(X.getOperand(1)); if (XC1 && XC1->isExactlyValue(+1.0)) return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, - DAG.getNode(ISD::FNEG, SL, VT, Y)); + DAG.getNode(ISD::FNEG, SL, VT, Y), Flags); if (XC1 && XC1->isExactlyValue(-1.0)) - return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y); + return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, + Y, Flags); } return SDValue(); }; - if (SDValue FMA = FuseFSUB(N0, N1)) + if (SDValue FMA = FuseFSUB(N0, N1, Flags)) return FMA; - if (SDValue FMA = FuseFSUB(N1, N0)) + if (SDValue FMA = FuseFSUB(N1, N0, Flags)) return FMA; return SDValue(); @@ -9889,35 +10607,42 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { return DAG.getNode(ISD::FSUB, DL, VT, N1IsFMul ? N0 : N1, Add, Flags); } - // FIXME: Auto-upgrade the target/function-level option. - if (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros()) { - // fold (fadd A, 0) -> A - if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1)) - if (N1C->isZero()) - return N0; + ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1); + if (N1C && N1C->isZero()) { + if (N1C->isNegative() || Options.UnsafeFPMath || + Flags.hasNoSignedZeros()) { + // fold (fadd A, 0) -> A + return N0; + } } - // If 'unsafe math' is enabled, fold lots of things. - if (Options.UnsafeFPMath) { - // No FP constant should be created after legalization as Instruction - // Selection pass has a hard time dealing with FP constants. - bool AllowNewConst = (Level < AfterLegalizeDAG); - - // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) - if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() && - isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) - return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), - DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, - Flags), - Flags); + // No FP constant should be created after legalization as Instruction + // Selection pass has a hard time dealing with FP constants. + bool AllowNewConst = (Level < AfterLegalizeDAG); + // If 'unsafe math' or nnan is enabled, fold lots of things. + if ((Options.UnsafeFPMath || Flags.hasNoNaNs()) && AllowNewConst) { // If allowed, fold (fadd (fneg x), x) -> 0.0 - if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) + if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) return DAG.getConstantFP(0.0, DL, VT); // If allowed, fold (fadd x, (fneg x)) -> 0.0 - if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) + if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) return DAG.getConstantFP(0.0, DL, VT); + } + + // If 'unsafe math' or reassoc and nsz, fold lots of things. + // TODO: break out portions of the transformations below for which Unsafe is + // considered and which do not require both nsz and reassoc + if ((Options.UnsafeFPMath || + (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) && + AllowNewConst) { + // fadd (fadd x, c1), c2 -> fadd x, c1 + c2 + if (N1CFP && N0.getOpcode() == ISD::FADD && + isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) { + SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, Flags); + return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC, Flags); + } // We can fold chains of FADD's of the same value into multiplications. // This transform is not safe in general because we are reducing the number @@ -9965,7 +10690,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { } } - if (N0.getOpcode() == ISD::FADD && AllowNewConst) { + if (N0.getOpcode() == ISD::FADD) { bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0)); // (fadd (fadd x, x), x) -> (fmul x, 3.0) if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) && @@ -9975,7 +10700,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { } } - if (N1.getOpcode() == ISD::FADD && AllowNewConst) { + if (N1.getOpcode() == ISD::FADD) { bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0)); // (fadd x, (fadd x, x)) -> (fmul x, 3.0) if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && @@ -9986,8 +10711,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { } // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0) - if (AllowNewConst && - N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && + if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1) && N1.getOperand(0) == N1.getOperand(1) && N0.getOperand(0) == N1.getOperand(0)) { @@ -10027,15 +10751,23 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; - // fold (fsub A, (fneg B)) -> (fadd A, B) - if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) - return DAG.getNode(ISD::FADD, DL, VT, N0, - GetNegatedExpression(N1, DAG, LegalOperations), Flags); + // (fsub A, 0) -> A + if (N1CFP && N1CFP->isZero()) { + if (!N1CFP->isNegative() || Options.UnsafeFPMath || + Flags.hasNoSignedZeros()) { + return N0; + } + } + + if (N0 == N1) { + // (fsub x, x) -> 0.0 + if (Options.UnsafeFPMath || Flags.hasNoNaNs()) + return DAG.getConstantFP(0.0f, DL, VT); + } - // FIXME: Auto-upgrade the target/function-level option. - if (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros()) { - // (fsub 0, B) -> -B - if (N0CFP && N0CFP->isZero()) { + // (fsub 0, B) -> -B + if (N0CFP && N0CFP->isZero()) { + if (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) { if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) return GetNegatedExpression(N1, DAG, LegalOperations); if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) @@ -10043,16 +10775,13 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { } } + // fold (fsub A, (fneg B)) -> (fadd A, B) + if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) + return DAG.getNode(ISD::FADD, DL, VT, N0, + GetNegatedExpression(N1, DAG, LegalOperations), Flags); + // If 'unsafe math' is enabled, fold lots of things. if (Options.UnsafeFPMath) { - // (fsub A, 0) -> A - if (N1CFP && N1CFP->isZero()) - return N0; - - // (fsub x, x) -> 0.0 - if (N0 == N1) - return DAG.getConstantFP(0.0f, DL, VT); - // (fsub x, (fadd x, y)) -> (fneg y) // (fsub x, (fadd y, x)) -> (fneg y) if (N1.getOpcode() == ISD::FADD) { @@ -10109,12 +10838,15 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; - if (Options.UnsafeFPMath) { + if (Options.UnsafeFPMath || + (Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) { // fold (fmul A, 0) -> 0 if (N1CFP && N1CFP->isZero()) return N1; + } - // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2)) + if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) { + // fmul (fmul X, C1), C2 -> fmul X, C1 * C2 if (N0.getOpcode() == ISD::FMUL) { // Fold scalars or any vector constants (not just splats). // This fold is done in general by InstCombine, but extra fmul insts @@ -10138,13 +10870,10 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { } } - // fold (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c)) - // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs - // during an early run of DAGCombiner can prevent folding with fmuls - // inserted during lowering. - if (N0.getOpcode() == ISD::FADD && - (N0.getOperand(0) == N0.getOperand(1)) && - N0.hasOneUse()) { + // Match a special-case: we convert X * 2.0 into fadd. + // fmul (fadd X, X), C -> fmul X, 2.0 * C + if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() && + N0.getOperand(0) == N0.getOperand(1)) { const SDValue Two = DAG.getConstantFP(2.0, DL, VT); SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags); return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags); @@ -10238,6 +10967,10 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; + // FMA nodes have flags that propagate to the created nodes. + const SDNodeFlags Flags = N->getFlags(); + bool UnsafeFPMath = Options.UnsafeFPMath || isContractable(N); + // Constant fold FMA. if (isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1) && @@ -10245,7 +10978,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2); } - if (Options.UnsafeFPMath) { + if (UnsafeFPMath) { if (N0CFP && N0CFP->isZero()) return N2; if (N1CFP && N1CFP->isZero()) @@ -10262,12 +10995,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { !isConstantFPBuildVectorOrConstantFP(N1)) return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2); - // TODO: FMA nodes should have flags that propagate to the created nodes. - // For now, create a Flags object for use with all unsafe math transforms. - SDNodeFlags Flags; - Flags.setUnsafeAlgebra(true); - - if (Options.UnsafeFPMath) { + if (UnsafeFPMath) { // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) && isConstantFPBuildVectorOrConstantFP(N1) && @@ -10313,7 +11041,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { } } - if (Options.UnsafeFPMath) { + if (UnsafeFPMath) { // (fma x, c, x) -> (fmul x, (c+1)) if (N1CFP && N0 == N2) { return DAG.getNode(ISD::FMUL, DL, VT, N0, @@ -10420,7 +11148,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; - if (Options.UnsafeFPMath) { + if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) { // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. if (N1CFP) { // Compute the reciprocal 1.0 / c2. @@ -10529,17 +11257,16 @@ SDValue DAGCombiner::visitFREM(SDNode *N) { } SDValue DAGCombiner::visitFSQRT(SDNode *N) { - if (!DAG.getTarget().Options.UnsafeFPMath) + SDNodeFlags Flags = N->getFlags(); + if (!DAG.getTarget().Options.UnsafeFPMath && + !Flags.hasApproximateFuncs()) return SDValue(); SDValue N0 = N->getOperand(0); if (TLI.isFsqrtCheap(N0, DAG)) return SDValue(); - // TODO: FSQRT nodes should have flags that propagate to the created nodes. - // For now, create a Flags object for use with all unsafe math transforms. - SDNodeFlags Flags; - Flags.setUnsafeAlgebra(true); + // FSQRT nodes have flags that propagate to the created nodes. return buildSqrtEstimate(N0, Flags); } @@ -10607,6 +11334,41 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { return SDValue(); } +static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG, + const TargetLowering &TLI) { + // This optimization is guarded by a function attribute because it may produce + // unexpected results. Ie, programs may be relying on the platform-specific + // undefined behavior when the float-to-int conversion overflows. + const Function &F = DAG.getMachineFunction().getFunction(); + Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow"); + if (StrictOverflow.getValueAsString().equals("false")) + return SDValue(); + + // We only do this if the target has legal ftrunc. Otherwise, we'd likely be + // replacing casts with a libcall. We also must be allowed to ignore -0.0 + // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer + // conversions would return +0.0. + // FIXME: We should be able to use node-level FMF here. + // TODO: If strict math, should we use FABS (+ range check for signed cast)? + EVT VT = N->getValueType(0); + if (!TLI.isOperationLegal(ISD::FTRUNC, VT) || + !DAG.getTarget().Options.NoSignedZerosFPMath) + return SDValue(); + + // fptosi/fptoui round towards zero, so converting from FP to integer and + // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X + SDValue N0 = N->getOperand(0); + if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT && + N0.getOperand(0).getValueType() == VT) + return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0)); + + if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT && + N0.getOperand(0).getValueType() == VT) + return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0)); + + return SDValue(); +} + SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -10658,6 +11420,9 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { } } + if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI)) + return FTrunc; + return SDValue(); } @@ -10697,6 +11462,9 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { } } + if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI)) + return FTrunc; + return SDValue(); } @@ -11103,16 +11871,22 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { N1.getOperand(0), N1.getOperand(1), N2); } - if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) || - ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) && - (N1.getOperand(0).hasOneUse() && - N1.getOperand(0).getOpcode() == ISD::SRL))) { - SDNode *Trunc = nullptr; - if (N1.getOpcode() == ISD::TRUNCATE) { - // Look pass the truncate. - Trunc = N1.getNode(); - N1 = N1.getOperand(0); - } + if (N1.hasOneUse()) { + if (SDValue NewN1 = rebuildSetCC(N1)) + return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, NewN1, N2); + } + + return SDValue(); +} + +SDValue DAGCombiner::rebuildSetCC(SDValue N) { + if (N.getOpcode() == ISD::SRL || + (N.getOpcode() == ISD::TRUNCATE && + (N.getOperand(0).hasOneUse() && + N.getOperand(0).getOpcode() == ISD::SRL))) { + // Look pass the truncate. + if (N.getOpcode() == ISD::TRUNCATE) + N = N.getOperand(0); // Match this pattern so that we can generate simpler code: // @@ -11131,74 +11905,55 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { // This applies only when the AND constant value has one bit set and the // SRL constant is equal to the log2 of the AND constant. The back-end is // smart enough to convert the result into a TEST/JMP sequence. - SDValue Op0 = N1.getOperand(0); - SDValue Op1 = N1.getOperand(1); + SDValue Op0 = N.getOperand(0); + SDValue Op1 = N.getOperand(1); - if (Op0.getOpcode() == ISD::AND && - Op1.getOpcode() == ISD::Constant) { + if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) { SDValue AndOp1 = Op0.getOperand(1); if (AndOp1.getOpcode() == ISD::Constant) { const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue(); if (AndConst.isPowerOf2() && - cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) { + cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) { SDLoc DL(N); - SDValue SetCC = - DAG.getSetCC(DL, - getSetCCResultType(Op0.getValueType()), - Op0, DAG.getConstant(0, DL, Op0.getValueType()), - ISD::SETNE); - - SDValue NewBRCond = DAG.getNode(ISD::BRCOND, DL, - MVT::Other, Chain, SetCC, N2); - // Don't add the new BRCond into the worklist or else SimplifySelectCC - // will convert it back to (X & C1) >> C2. - CombineTo(N, NewBRCond, false); - // Truncate is dead. - if (Trunc) - deleteAndRecombine(Trunc); - // Replace the uses of SRL with SETCC - WorklistRemover DeadNodes(*this); - DAG.ReplaceAllUsesOfValueWith(N1, SetCC); - deleteAndRecombine(N1.getNode()); - return SDValue(N, 0); // Return N so it doesn't get rechecked! + return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()), + Op0, DAG.getConstant(0, DL, Op0.getValueType()), + ISD::SETNE); } } } - - if (Trunc) - // Restore N1 if the above transformation doesn't match. - N1 = N->getOperand(1); } // Transform br(xor(x, y)) -> br(x != y) // Transform br(xor(xor(x,y), 1)) -> br (x == y) - if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) { - SDNode *TheXor = N1.getNode(); + if (N.getOpcode() == ISD::XOR) { + // Because we may call this on a speculatively constructed + // SimplifiedSetCC Node, we need to simplify this node first. + // Ideally this should be folded into SimplifySetCC and not + // here. For now, grab a handle to N so we don't lose it from + // replacements interal to the visit. + HandleSDNode XORHandle(N); + while (N.getOpcode() == ISD::XOR) { + SDValue Tmp = visitXOR(N.getNode()); + // No simplification done. + if (!Tmp.getNode()) + break; + // Returning N is form in-visit replacement that may invalidated + // N. Grab value from Handle. + if (Tmp.getNode() == N.getNode()) + N = XORHandle.getValue(); + else // Node simplified. Try simplifying again. + N = Tmp; + } + + if (N.getOpcode() != ISD::XOR) + return N; + + SDNode *TheXor = N.getNode(); + SDValue Op0 = TheXor->getOperand(0); SDValue Op1 = TheXor->getOperand(1); - if (Op0.getOpcode() == Op1.getOpcode()) { - // Avoid missing important xor optimizations. - if (SDValue Tmp = visitXOR(TheXor)) { - if (Tmp.getNode() != TheXor) { - DEBUG(dbgs() << "\nReplacing.8 "; - TheXor->dump(&DAG); - dbgs() << "\nWith: "; - Tmp.getNode()->dump(&DAG); - dbgs() << '\n'); - WorklistRemover DeadNodes(*this); - DAG.ReplaceAllUsesOfValueWith(N1, Tmp); - deleteAndRecombine(TheXor); - return DAG.getNode(ISD::BRCOND, SDLoc(N), - MVT::Other, Chain, Tmp, N2); - } - - // visitXOR has changed XOR's operands or replaced the XOR completely, - // bail out. - return SDValue(N, 0); - } - } if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) { bool Equal = false; @@ -11208,19 +11963,12 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { Equal = true; } - EVT SetCCVT = N1.getValueType(); + EVT SetCCVT = N.getValueType(); if (LegalTypes) SetCCVT = getSetCCResultType(SetCCVT); - SDValue SetCC = DAG.getSetCC(SDLoc(TheXor), - SetCCVT, - Op0, Op1, - Equal ? ISD::SETEQ : ISD::SETNE); // Replace the uses of XOR with SETCC - WorklistRemover DeadNodes(*this); - DAG.ReplaceAllUsesOfValueWith(N1, SetCC); - deleteAndRecombine(N1.getNode()); - return DAG.getNode(ISD::BRCOND, SDLoc(N), - MVT::Other, Chain, SetCC, N2); + return DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1, + Equal ? ISD::SETEQ : ISD::SETNE); } } @@ -11452,11 +12200,8 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { BasePtr, Offset, AM); ++PreIndexedNodes; ++NodesCombined; - DEBUG(dbgs() << "\nReplacing.4 "; - N->dump(&DAG); - dbgs() << "\nWith: "; - Result.getNode()->dump(&DAG); - dbgs() << '\n'); + LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: "; + Result.getNode()->dump(&DAG); dbgs() << '\n'); WorklistRemover DeadNodes(*this); if (isLoad) { DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); @@ -11621,11 +12366,9 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { BasePtr, Offset, AM); ++PostIndexedNodes; ++NodesCombined; - DEBUG(dbgs() << "\nReplacing.5 "; - N->dump(&DAG); - dbgs() << "\nWith: "; - Result.getNode()->dump(&DAG); - dbgs() << '\n'); + LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); + dbgs() << "\nWith: "; Result.getNode()->dump(&DAG); + dbgs() << '\n'); WorklistRemover DeadNodes(*this); if (isLoad) { DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); @@ -11649,7 +12392,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { return false; } -/// \brief Return the base-pointer arithmetic from an indexed \p LD. +/// Return the base-pointer arithmetic from an indexed \p LD. SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) { ISD::MemIndexedMode AM = LD->getAddressingMode(); assert(AM != ISD::UNINDEXED); @@ -11691,11 +12434,9 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // v3 = add v2, c // Now we replace use of chain2 with chain1. This makes the second load // isomorphic to the one we are deleting, and thus makes this load live. - DEBUG(dbgs() << "\nReplacing.6 "; - N->dump(&DAG); - dbgs() << "\nWith chain: "; - Chain.getNode()->dump(&DAG); - dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG); + dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG); + dbgs() << "\n"); WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); AddUsersToWorklist(Chain.getNode()); @@ -11726,11 +12467,9 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { AddUsersToWorklist(N); } else Index = DAG.getUNDEF(N->getValueType(1)); - DEBUG(dbgs() << "\nReplacing.7 "; - N->dump(&DAG); - dbgs() << "\nWith: "; - Undef.getNode()->dump(&DAG); - dbgs() << " and 2 other values\n"); + LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG); + dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG); + dbgs() << " and 2 other values\n"); WorklistRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index); @@ -11758,13 +12497,14 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // Try to infer better alignment information than the load already has. if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { - if (Align > LD->getMemOperand()->getBaseAlignment()) { + if (Align > LD->getAlignment() && LD->getSrcValueOffset() % Align == 0) { SDValue NewLoad = DAG.getExtLoad( LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr, LD->getPointerInfo(), LD->getMemoryVT(), Align, LD->getMemOperand()->getFlags(), LD->getAAInfo()); - if (NewLoad.getNode() != N) - return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true); + // NewLoad will always be N as we are only refining the alignment + assert(NewLoad.getNode() == N); + (void)NewLoad; } } } @@ -11811,7 +12551,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { namespace { -/// \brief Helper structure used to slice a load in smaller loads. +/// Helper structure used to slice a load in smaller loads. /// Basically a slice is obtained from the following sequence: /// Origin = load Ty1, Base /// Shift = srl Ty1 Origin, CstTy Amount @@ -11824,7 +12564,7 @@ namespace { /// SliceTy is deduced from the number of bits that are actually used to /// build Inst. struct LoadedSlice { - /// \brief Helper structure used to compute the cost of a slice. + /// Helper structure used to compute the cost of a slice. struct Cost { /// Are we optimizing for code size. bool ForCodeSize; @@ -11838,7 +12578,7 @@ struct LoadedSlice { Cost(bool ForCodeSize = false) : ForCodeSize(ForCodeSize) {} - /// \brief Get the cost of one isolated slice. + /// Get the cost of one isolated slice. Cost(const LoadedSlice &LS, bool ForCodeSize = false) : ForCodeSize(ForCodeSize), Loads(1) { EVT TruncType = LS.Inst->getValueType(0); @@ -11848,7 +12588,7 @@ struct LoadedSlice { ZExts = 1; } - /// \brief Account for slicing gain in the current cost. + /// Account for slicing gain in the current cost. /// Slicing provide a few gains like removing a shift or a /// truncate. This method allows to grow the cost of the original /// load with the gain from this slice. @@ -11921,7 +12661,7 @@ struct LoadedSlice { unsigned Shift = 0, SelectionDAG *DAG = nullptr) : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {} - /// \brief Get the bits used in a chunk of bits \p BitWidth large. + /// Get the bits used in a chunk of bits \p BitWidth large. /// \return Result is \p BitWidth and has used bits set to 1 and /// not used bits set to 0. APInt getUsedBits() const { @@ -11941,14 +12681,14 @@ struct LoadedSlice { return UsedBits; } - /// \brief Get the size of the slice to be loaded in bytes. + /// Get the size of the slice to be loaded in bytes. unsigned getLoadedSize() const { unsigned SliceSize = getUsedBits().countPopulation(); assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte."); return SliceSize / 8; } - /// \brief Get the type that will be loaded for this slice. + /// Get the type that will be loaded for this slice. /// Note: This may not be the final type for the slice. EVT getLoadedType() const { assert(DAG && "Missing context"); @@ -11956,7 +12696,7 @@ struct LoadedSlice { return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8); } - /// \brief Get the alignment of the load used for this slice. + /// Get the alignment of the load used for this slice. unsigned getAlignment() const { unsigned Alignment = Origin->getAlignment(); unsigned Offset = getOffsetFromBase(); @@ -11965,7 +12705,7 @@ struct LoadedSlice { return Alignment; } - /// \brief Check if this slice can be rewritten with legal operations. + /// Check if this slice can be rewritten with legal operations. bool isLegal() const { // An invalid slice is not legal. if (!Origin || !Inst || !DAG) @@ -12009,7 +12749,7 @@ struct LoadedSlice { return true; } - /// \brief Get the offset in bytes of this slice in the original chunk of + /// Get the offset in bytes of this slice in the original chunk of /// bits. /// \pre DAG != nullptr. uint64_t getOffsetFromBase() const { @@ -12030,7 +12770,7 @@ struct LoadedSlice { return Offset; } - /// \brief Generate the sequence of instructions to load the slice + /// Generate the sequence of instructions to load the slice /// represented by this object and redirect the uses of this slice to /// this new sequence of instructions. /// \pre this->Inst && this->Origin are valid Instructions and this @@ -12068,7 +12808,7 @@ struct LoadedSlice { return LastInst; } - /// \brief Check if this slice can be merged with an expensive cross register + /// Check if this slice can be merged with an expensive cross register /// bank copy. E.g., /// i = load i32 /// f = bitcast i32 i to float @@ -12117,7 +12857,7 @@ struct LoadedSlice { } // end anonymous namespace -/// \brief Check that all bits set in \p UsedBits form a dense region, i.e., +/// Check that all bits set in \p UsedBits form a dense region, i.e., /// \p UsedBits looks like 0..0 1..1 0..0. static bool areUsedBitsDense(const APInt &UsedBits) { // If all the bits are one, this is dense! @@ -12133,7 +12873,7 @@ static bool areUsedBitsDense(const APInt &UsedBits) { return NarrowedUsedBits.isAllOnesValue(); } -/// \brief Check whether or not \p First and \p Second are next to each other +/// Check whether or not \p First and \p Second are next to each other /// in memory. This means that there is no hole between the bits loaded /// by \p First and the bits loaded by \p Second. static bool areSlicesNextToEachOther(const LoadedSlice &First, @@ -12147,7 +12887,7 @@ static bool areSlicesNextToEachOther(const LoadedSlice &First, return areUsedBitsDense(UsedBits); } -/// \brief Adjust the \p GlobalLSCost according to the target +/// Adjust the \p GlobalLSCost according to the target /// paring capabilities and the layout of the slices. /// \pre \p GlobalLSCost should account for at least as many loads as /// there is in the slices in \p LoadedSlices. @@ -12160,8 +12900,8 @@ static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices, // Sort the slices so that elements that are likely to be next to each // other in memory are next to each other in the list. - std::sort(LoadedSlices.begin(), LoadedSlices.end(), - [](const LoadedSlice &LHS, const LoadedSlice &RHS) { + llvm::sort(LoadedSlices.begin(), LoadedSlices.end(), + [](const LoadedSlice &LHS, const LoadedSlice &RHS) { assert(LHS.Origin == RHS.Origin && "Different bases not implemented."); return LHS.getOffsetFromBase() < RHS.getOffsetFromBase(); }); @@ -12208,7 +12948,7 @@ static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices, } } -/// \brief Check the profitability of all involved LoadedSlice. +/// Check the profitability of all involved LoadedSlice. /// Currently, it is considered profitable if there is exactly two /// involved slices (1) which are (2) next to each other in memory, and /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3). @@ -12252,7 +12992,7 @@ static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices, return OrigCost > GlobalSlicingCost; } -/// \brief If the given load, \p LI, is used only by trunc or trunc(lshr) +/// If the given load, \p LI, is used only by trunc or trunc(lshr) /// operations, split it in the various pieces being extracted. /// /// This sort of thing is introduced by SROA. @@ -12371,22 +13111,6 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0)); if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer. - // The store should be chained directly to the load or be an operand of a - // tokenfactor. - if (LD == Chain.getNode()) - ; // ok. - else if (Chain->getOpcode() != ISD::TokenFactor) - return Result; // Fail. - else { - bool isOk = false; - for (const SDValue &ChainOp : Chain->op_values()) - if (ChainOp.getNode() == LD) { - isOk = true; - break; - } - if (!isOk) return Result; - } - // This only handles simple types. if (V.getValueType() != MVT::i16 && V.getValueType() != MVT::i32 && @@ -12423,6 +13147,24 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { // is aligned the same as the access width. if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result; + // For narrowing to be valid, it must be the case that the load the + // immediately preceeding memory operation before the store. + if (LD == Chain.getNode()) + ; // ok. + else if (Chain->getOpcode() == ISD::TokenFactor && + SDValue(LD, 1).hasOneUse()) { + // LD has only 1 chain use so they are no indirect dependencies. + bool isOk = false; + for (const SDValue &ChainOp : Chain->op_values()) + if (ChainOp.getNode() == LD) { + isOk = true; + break; + } + if (!isOk) + return Result; + } else + return Result; // Fail. + Result.first = MaskedBytes; Result.second = NotMaskTZ/8; return Result; @@ -12741,12 +13483,6 @@ bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, return false; } -static SDValue peekThroughBitcast(SDValue V) { - while (V.getOpcode() == ISD::BITCAST) - V = V.getOperand(0); - return V; -} - SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores) { SmallVector<SDValue, 8> Chains; @@ -12871,6 +13607,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode); SDValue Val = St->getValue(); + Val = peekThroughBitcast(Val); StoreInt <<= ElementSizeBits; if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) { StoreInt |= C->getAPIntValue() @@ -12903,13 +13640,13 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( FirstInChain->getPointerInfo(), FirstInChain->getAlignment()); } else { // Must be realized as a trunc store - EVT LegalizedStoredValueTy = + EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType()); - unsigned LegalizedStoreSize = LegalizedStoredValueTy.getSizeInBits(); + unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits(); ConstantSDNode *C = cast<ConstantSDNode>(StoredVal); SDValue ExtendedStoreVal = DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL, - LegalizedStoredValueTy); + LegalizedStoredValTy); NewStore = DAG.getTruncStore( NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/, @@ -12926,10 +13663,11 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts( } void DAGCombiner::getStoreMergeCandidates( - StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes) { + StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes, + SDNode *&RootNode) { // This holds the base pointer, index, and the offset in bytes from the base // pointer. - BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG); + BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG); EVT MemVT = St->getMemoryVT(); SDValue Val = peekThroughBitcast(St->getValue()); @@ -12950,11 +13688,17 @@ void DAGCombiner::getStoreMergeCandidates( EVT LoadVT; if (IsLoadSrc) { auto *Ld = cast<LoadSDNode>(Val); - LBasePtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG); + LBasePtr = BaseIndexOffset::match(Ld, DAG); LoadVT = Ld->getMemoryVT(); // Load and store should be the same type. if (MemVT != LoadVT) return; + // Loads must only have one use. + if (!Ld->hasNUsesOfValue(1, 0)) + return; + // The memory operands must not be volatile. + if (Ld->isVolatile() || Ld->isIndexed()) + return; } auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr, int64_t &Offset) -> bool { @@ -12969,9 +13713,15 @@ void DAGCombiner::getStoreMergeCandidates( return false; // The Load's Base Ptr must also match if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Val)) { - auto LPtr = BaseIndexOffset::match(OtherLd->getBasePtr(), DAG); + auto LPtr = BaseIndexOffset::match(OtherLd, DAG); if (LoadVT != OtherLd->getMemoryVT()) return false; + // Loads must only have one use. + if (!OtherLd->hasNUsesOfValue(1, 0)) + return false; + // The memory operands must not be volatile. + if (OtherLd->isVolatile() || OtherLd->isIndexed()) + return false; if (!(LBasePtr.equalBaseIndex(LPtr, DAG))) return false; } else @@ -12993,7 +13743,7 @@ void DAGCombiner::getStoreMergeCandidates( Val.getOpcode() != ISD::EXTRACT_SUBVECTOR) return false; } - Ptr = BaseIndexOffset::match(Other->getBasePtr(), DAG); + Ptr = BaseIndexOffset::match(Other, DAG); return (BasePtr.equalBaseIndex(Ptr, DAG, Offset)); }; @@ -13013,7 +13763,7 @@ void DAGCombiner::getStoreMergeCandidates( // FIXME: We should be able to climb and // descend TokenFactors to find candidates as well. - SDNode *RootNode = (St->getChain()).getNode(); + RootNode = St->getChain().getNode(); if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) { RootNode = Ldn->getChain().getNode(); @@ -13044,31 +13794,54 @@ void DAGCombiner::getStoreMergeCandidates( // through the chain). Check in parallel by searching up from // non-chain operands of candidates. bool DAGCombiner::checkMergeStoreCandidatesForDependencies( - SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores) { + SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores, + SDNode *RootNode) { // FIXME: We should be able to truncate a full search of // predecessors by doing a BFS and keeping tabs the originating // stores from which worklist nodes come from in a similar way to // TokenFactor simplfication. - SmallPtrSet<const SDNode *, 16> Visited; + SmallPtrSet<const SDNode *, 32> Visited; SmallVector<const SDNode *, 8> Worklist; - unsigned int Max = 8192; + + // RootNode is a predecessor to all candidates so we need not search + // past it. Add RootNode (peeking through TokenFactors). Do not count + // these towards size check. + + Worklist.push_back(RootNode); + while (!Worklist.empty()) { + auto N = Worklist.pop_back_val(); + if (N->getOpcode() == ISD::TokenFactor) { + for (SDValue Op : N->ops()) + Worklist.push_back(Op.getNode()); + } + Visited.insert(N); + } + + // Don't count pruning nodes towards max. + unsigned int Max = 1024 + Visited.size(); // Search Ops of store candidates. for (unsigned i = 0; i < NumStores; ++i) { - SDNode *n = StoreNodes[i].MemNode; - // Potential loops may happen only through non-chain operands - for (unsigned j = 1; j < n->getNumOperands(); ++j) - Worklist.push_back(n->getOperand(j).getNode()); + SDNode *N = StoreNodes[i].MemNode; + // Of the 4 Store Operands: + // * Chain (Op 0) -> We have already considered these + // in candidate selection and can be + // safely ignored + // * Value (Op 1) -> Cycles may happen (e.g. through load chains) + // * Address (Op 2) -> Merged addresses may only vary by a fixed constant + // and so no cycles are possible. + // * (Op 3) -> appears to always be undef. Cannot be source of cycle. + // + // Thus we need only check predecessors of the value operands. + auto *Op = N->getOperand(1).getNode(); + if (Visited.insert(Op).second) + Worklist.push_back(Op); } // Search through DAG. We can stop early if we find a store node. - for (unsigned i = 0; i < NumStores; ++i) { + for (unsigned i = 0; i < NumStores; ++i) if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist, Max)) return false; - // Check if we ended early, failing conservatively if so. - if (Visited.size() >= Max) - return false; - } return true; } @@ -13106,8 +13879,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { return false; SmallVector<MemOpLink, 8> StoreNodes; + SDNode *RootNode; // Find potential store merge candidates by searching through chain sub-DAG - getStoreMergeCandidates(St, StoreNodes); + getStoreMergeCandidates(St, StoreNodes, RootNode); // Check if there is anything to merge. if (StoreNodes.size() < 2) @@ -13115,10 +13889,10 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { // Sort the memory operands according to their distance from the // base pointer. - std::sort(StoreNodes.begin(), StoreNodes.end(), - [](MemOpLink LHS, MemOpLink RHS) { - return LHS.OffsetFromBase < RHS.OffsetFromBase; - }); + llvm::sort(StoreNodes.begin(), StoreNodes.end(), + [](MemOpLink LHS, MemOpLink RHS) { + return LHS.OffsetFromBase < RHS.OffsetFromBase; + }); // Store Merge attempts to merge the lowest stores. This generally // works out as if successful, as the remaining stores are checked @@ -13162,178 +13936,191 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { continue; } - // Check that we can merge these candidates without causing a cycle - if (!checkMergeStoreCandidatesForDependencies(StoreNodes, - NumConsecutiveStores)) { - StoreNodes.erase(StoreNodes.begin(), - StoreNodes.begin() + NumConsecutiveStores); - continue; - } - // The node with the lowest store address. LLVMContext &Context = *DAG.getContext(); const DataLayout &DL = DAG.getDataLayout(); // Store the constants into memory as one consecutive store. if (IsConstantSrc) { - LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; - unsigned FirstStoreAS = FirstInChain->getAddressSpace(); - unsigned FirstStoreAlign = FirstInChain->getAlignment(); - unsigned LastLegalType = 1; - unsigned LastLegalVectorType = 1; - bool LastIntegerTrunc = false; - bool NonZero = false; - unsigned FirstZeroAfterNonZero = NumConsecutiveStores; - for (unsigned i = 0; i < NumConsecutiveStores; ++i) { - StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode); - SDValue StoredVal = ST->getValue(); - bool IsElementZero = false; - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) - IsElementZero = C->isNullValue(); - else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) - IsElementZero = C->getConstantFPValue()->isNullValue(); - if (IsElementZero) { - if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores) - FirstZeroAfterNonZero = i; - } - NonZero |= !IsElementZero; + while (NumConsecutiveStores >= 2) { + LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; + unsigned FirstStoreAS = FirstInChain->getAddressSpace(); + unsigned FirstStoreAlign = FirstInChain->getAlignment(); + unsigned LastLegalType = 1; + unsigned LastLegalVectorType = 1; + bool LastIntegerTrunc = false; + bool NonZero = false; + unsigned FirstZeroAfterNonZero = NumConsecutiveStores; + for (unsigned i = 0; i < NumConsecutiveStores; ++i) { + StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode); + SDValue StoredVal = ST->getValue(); + bool IsElementZero = false; + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) + IsElementZero = C->isNullValue(); + else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) + IsElementZero = C->getConstantFPValue()->isNullValue(); + if (IsElementZero) { + if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores) + FirstZeroAfterNonZero = i; + } + NonZero |= !IsElementZero; - // Find a legal type for the constant store. - unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; - EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits); - bool IsFast = false; - if (TLI.isTypeLegal(StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && - TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, - FirstStoreAlign, &IsFast) && - IsFast) { - LastIntegerTrunc = false; - LastLegalType = i + 1; - // Or check whether a truncstore is legal. - } else if (TLI.getTypeAction(Context, StoreTy) == - TargetLowering::TypePromoteInteger) { - EVT LegalizedStoredValueTy = - TLI.getTypeToTransformTo(Context, StoredVal.getValueType()); - if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) && + // Find a legal type for the constant store. + unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; + EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits); + bool IsFast = false; + + // Break early when size is too large to be legal. + if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits) + break; + + if (TLI.isTypeLegal(StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, FirstStoreAlign, &IsFast) && IsFast) { - LastIntegerTrunc = true; + LastIntegerTrunc = false; LastLegalType = i + 1; + // Or check whether a truncstore is legal. + } else if (TLI.getTypeAction(Context, StoreTy) == + TargetLowering::TypePromoteInteger) { + EVT LegalizedStoredValTy = + TLI.getTypeToTransformTo(Context, StoredVal.getValueType()); + if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, + FirstStoreAlign, &IsFast) && + IsFast) { + LastIntegerTrunc = true; + LastLegalType = i + 1; + } } - } - // We only use vectors if the constant is known to be zero or the target - // allows it and the function is not marked with the noimplicitfloat - // attribute. - if ((!NonZero || - TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) && - !NoVectors) { - // Find a legal type for the vector store. - unsigned Elts = (i + 1) * NumMemElts; - EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); - if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) && - TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && - TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, - FirstStoreAlign, &IsFast) && - IsFast) - LastLegalVectorType = i + 1; + // We only use vectors if the constant is known to be zero or the + // target allows it and the function is not marked with the + // noimplicitfloat attribute. + if ((!NonZero || + TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) && + !NoVectors) { + // Find a legal type for the vector store. + unsigned Elts = (i + 1) * NumMemElts; + EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); + if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) && + TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && + TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, + FirstStoreAlign, &IsFast) && + IsFast) + LastLegalVectorType = i + 1; + } } - } - bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors; - unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType; + bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors; + unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType; + + // Check if we found a legal integer type that creates a meaningful + // merge. + if (NumElem < 2) { + // We know that candidate stores are in order and of correct + // shape. While there is no mergeable sequence from the + // beginning one may start later in the sequence. The only + // reason a merge of size N could have failed where another of + // the same size would not have, is if the alignment has + // improved or we've dropped a non-zero value. Drop as many + // candidates as we can here. + unsigned NumSkip = 1; + while ( + (NumSkip < NumConsecutiveStores) && + (NumSkip < FirstZeroAfterNonZero) && + (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) + NumSkip++; + + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); + NumConsecutiveStores -= NumSkip; + continue; + } - // Check if we found a legal integer type that creates a meaningful merge. - if (NumElem < 2) { - // We know that candidate stores are in order and of correct - // shape. While there is no mergeable sequence from the - // beginning one may start later in the sequence. The only - // reason a merge of size N could have failed where another of - // the same size would not have, is if the alignment has - // improved or we've dropped a non-zero value. Drop as many - // candidates as we can here. - unsigned NumSkip = 1; - while ( - (NumSkip < NumConsecutiveStores) && - (NumSkip < FirstZeroAfterNonZero) && - (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) { - NumSkip++; + // Check that we can merge these candidates without causing a cycle. + if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem, + RootNode)) { + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); + NumConsecutiveStores -= NumElem; + continue; } - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); - continue; - } - bool Merged = MergeStoresOfConstantsOrVecElts( - StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc); - RV |= Merged; + RV |= MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, true, + UseVector, LastIntegerTrunc); - // Remove merged stores for next iteration. - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); + // Remove merged stores for next iteration. + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); + NumConsecutiveStores -= NumElem; + } continue; } // When extracting multiple vector elements, try to store them // in one vector store rather than a sequence of scalar stores. if (IsExtractVecSrc) { - LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; - unsigned FirstStoreAS = FirstInChain->getAddressSpace(); - unsigned FirstStoreAlign = FirstInChain->getAlignment(); - unsigned NumStoresToMerge = 1; - for (unsigned i = 0; i < NumConsecutiveStores; ++i) { - StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); - SDValue StVal = peekThroughBitcast(St->getValue()); - // This restriction could be loosened. - // Bail out if any stored values are not elements extracted from a - // vector. It should be possible to handle mixed sources, but load - // sources need more careful handling (see the block of code below that - // handles consecutive loads). - if (StVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT && - StVal.getOpcode() != ISD::EXTRACT_SUBVECTOR) - return RV; + // Loop on Consecutive Stores on success. + while (NumConsecutiveStores >= 2) { + LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; + unsigned FirstStoreAS = FirstInChain->getAddressSpace(); + unsigned FirstStoreAlign = FirstInChain->getAlignment(); + unsigned NumStoresToMerge = 1; + for (unsigned i = 0; i < NumConsecutiveStores; ++i) { + // Find a legal type for the vector store. + unsigned Elts = (i + 1) * NumMemElts; + EVT Ty = + EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); + bool IsFast; - // Find a legal type for the vector store. - unsigned Elts = (i + 1) * NumMemElts; - EVT Ty = - EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); - bool IsFast; - if (TLI.isTypeLegal(Ty) && - TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && - TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, - FirstStoreAlign, &IsFast) && - IsFast) - NumStoresToMerge = i + 1; - } + // Break early when size is too large to be legal. + if (Ty.getSizeInBits() > MaximumLegalStoreInBits) + break; - // Check if we found a legal integer type that creates a meaningful merge. - if (NumStoresToMerge < 2) { - // We know that candidate stores are in order and of correct - // shape. While there is no mergeable sequence from the - // beginning one may start later in the sequence. The only - // reason a merge of size N could have failed where another of - // the same size would not have, is if the alignment has - // improved. Drop as many candidates as we can here. - unsigned NumSkip = 1; - while ((NumSkip < NumConsecutiveStores) && - (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) - NumSkip++; + if (TLI.isTypeLegal(Ty) && + TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && + TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, + FirstStoreAlign, &IsFast) && + IsFast) + NumStoresToMerge = i + 1; + } - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); - continue; - } + // Check if we found a legal integer type creating a meaningful + // merge. + if (NumStoresToMerge < 2) { + // We know that candidate stores are in order and of correct + // shape. While there is no mergeable sequence from the + // beginning one may start later in the sequence. The only + // reason a merge of size N could have failed where another of + // the same size would not have, is if the alignment has + // improved. Drop as many candidates as we can here. + unsigned NumSkip = 1; + while ( + (NumSkip < NumConsecutiveStores) && + (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) + NumSkip++; + + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); + NumConsecutiveStores -= NumSkip; + continue; + } + + // Check that we can merge these candidates without causing a cycle. + if (!checkMergeStoreCandidatesForDependencies( + StoreNodes, NumStoresToMerge, RootNode)) { + StoreNodes.erase(StoreNodes.begin(), + StoreNodes.begin() + NumStoresToMerge); + NumConsecutiveStores -= NumStoresToMerge; + continue; + } + + RV |= MergeStoresOfConstantsOrVecElts( + StoreNodes, MemVT, NumStoresToMerge, false, true, false); - bool Merged = MergeStoresOfConstantsOrVecElts( - StoreNodes, MemVT, NumStoresToMerge, false, true, false); - if (!Merged) { StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge); - continue; + NumConsecutiveStores -= NumStoresToMerge; } - // Remove merged stores for next iteration. - StoreNodes.erase(StoreNodes.begin(), - StoreNodes.begin() + NumStoresToMerge); - RV = true; continue; } @@ -13347,26 +14134,13 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { // Find acceptable loads. Loads need to have the same chain (token factor), // must not be zext, volatile, indexed, and they must be consecutive. BaseIndexOffset LdBasePtr; + for (unsigned i = 0; i < NumConsecutiveStores; ++i) { StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); SDValue Val = peekThroughBitcast(St->getValue()); - LoadSDNode *Ld = dyn_cast<LoadSDNode>(Val); - if (!Ld) - break; + LoadSDNode *Ld = cast<LoadSDNode>(Val); - // Loads must only have one use. - if (!Ld->hasNUsesOfValue(1, 0)) - break; - - // The memory operands must not be volatile. - if (Ld->isVolatile() || Ld->isIndexed()) - break; - - // The stored memory type must be the same. - if (Ld->getMemoryVT() != MemVT) - break; - - BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG); + BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG); // If this is not the first ptr that we check. int64_t LdOffset = 0; if (LdBasePtr.getBase().getNode()) { @@ -13382,90 +14156,75 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { LoadNodes.push_back(MemOpLink(Ld, LdOffset)); } - if (LoadNodes.size() < 2) { - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1); - continue; - } + while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) { + // If we have load/store pair instructions and we only have two values, + // don't bother merging. + unsigned RequiredAlignment; + if (LoadNodes.size() == 2 && + TLI.hasPairedLoad(MemVT, RequiredAlignment) && + StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) { + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2); + LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2); + break; + } + LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; + unsigned FirstStoreAS = FirstInChain->getAddressSpace(); + unsigned FirstStoreAlign = FirstInChain->getAlignment(); + LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode); + unsigned FirstLoadAS = FirstLoad->getAddressSpace(); + unsigned FirstLoadAlign = FirstLoad->getAlignment(); - // If we have load/store pair instructions and we only have two values, - // don't bother merging. - unsigned RequiredAlignment; - if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) && - StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) { - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2); - continue; - } - LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; - unsigned FirstStoreAS = FirstInChain->getAddressSpace(); - unsigned FirstStoreAlign = FirstInChain->getAlignment(); - LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode); - unsigned FirstLoadAS = FirstLoad->getAddressSpace(); - unsigned FirstLoadAlign = FirstLoad->getAlignment(); + // Scan the memory operations on the chain and find the first + // non-consecutive load memory address. These variables hold the index in + // the store node array. - // Scan the memory operations on the chain and find the first - // non-consecutive load memory address. These variables hold the index in - // the store node array. - unsigned LastConsecutiveLoad = 1; - // This variable refers to the size and not index in the array. - unsigned LastLegalVectorType = 1; - unsigned LastLegalIntegerType = 1; - bool isDereferenceable = true; - bool DoIntegerTruncate = false; - StartAddress = LoadNodes[0].OffsetFromBase; - SDValue FirstChain = FirstLoad->getChain(); - for (unsigned i = 1; i < LoadNodes.size(); ++i) { - // All loads must share the same chain. - if (LoadNodes[i].MemNode->getChain() != FirstChain) - break; + unsigned LastConsecutiveLoad = 1; - int64_t CurrAddress = LoadNodes[i].OffsetFromBase; - if (CurrAddress - StartAddress != (ElementSizeBytes * i)) - break; - LastConsecutiveLoad = i; - - if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable()) - isDereferenceable = false; - - // Find a legal type for the vector store. - unsigned Elts = (i + 1) * NumMemElts; - EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); - - bool IsFastSt, IsFastLd; - if (TLI.isTypeLegal(StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && - TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, - FirstStoreAlign, &IsFastSt) && - IsFastSt && - TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, - FirstLoadAlign, &IsFastLd) && - IsFastLd) { - LastLegalVectorType = i + 1; - } + // This variable refers to the size and not index in the array. + unsigned LastLegalVectorType = 1; + unsigned LastLegalIntegerType = 1; + bool isDereferenceable = true; + bool DoIntegerTruncate = false; + StartAddress = LoadNodes[0].OffsetFromBase; + SDValue FirstChain = FirstLoad->getChain(); + for (unsigned i = 1; i < LoadNodes.size(); ++i) { + // All loads must share the same chain. + if (LoadNodes[i].MemNode->getChain() != FirstChain) + break; + + int64_t CurrAddress = LoadNodes[i].OffsetFromBase; + if (CurrAddress - StartAddress != (ElementSizeBytes * i)) + break; + LastConsecutiveLoad = i; - // Find a legal type for the integer store. - unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; - StoreTy = EVT::getIntegerVT(Context, SizeInBits); - if (TLI.isTypeLegal(StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && - TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, - FirstStoreAlign, &IsFastSt) && - IsFastSt && - TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, - FirstLoadAlign, &IsFastLd) && - IsFastLd) { - LastLegalIntegerType = i + 1; - DoIntegerTruncate = false; - // Or check whether a truncstore and extload is legal. - } else if (TLI.getTypeAction(Context, StoreTy) == - TargetLowering::TypePromoteInteger) { - EVT LegalizedStoredValueTy = TLI.getTypeToTransformTo(Context, StoreTy); - if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValueTy, DAG) && - TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, - StoreTy) && - TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, - StoreTy) && - TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) && + if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable()) + isDereferenceable = false; + + // Find a legal type for the vector store. + unsigned Elts = (i + 1) * NumMemElts; + EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); + + // Break early when size is too large to be legal. + if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits) + break; + + bool IsFastSt, IsFastLd; + if (TLI.isTypeLegal(StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, + FirstStoreAlign, &IsFastSt) && + IsFastSt && + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, + FirstLoadAlign, &IsFastLd) && + IsFastLd) { + LastLegalVectorType = i + 1; + } + + // Find a legal type for the integer store. + unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; + StoreTy = EVT::getIntegerVT(Context, SizeInBits); + if (TLI.isTypeLegal(StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, FirstStoreAlign, &IsFastSt) && IsFastSt && @@ -13473,105 +14232,140 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { FirstLoadAlign, &IsFastLd) && IsFastLd) { LastLegalIntegerType = i + 1; - DoIntegerTruncate = true; + DoIntegerTruncate = false; + // Or check whether a truncstore and extload is legal. + } else if (TLI.getTypeAction(Context, StoreTy) == + TargetLowering::TypePromoteInteger) { + EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy); + if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) && + TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, + StoreTy) && + TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, + StoreTy) && + TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, + FirstStoreAlign, &IsFastSt) && + IsFastSt && + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, + FirstLoadAlign, &IsFastLd) && + IsFastLd) { + LastLegalIntegerType = i + 1; + DoIntegerTruncate = true; + } } } - } - // Only use vector types if the vector type is larger than the integer type. - // If they are the same, use integers. - bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors; - unsigned LastLegalType = - std::max(LastLegalVectorType, LastLegalIntegerType); - - // We add +1 here because the LastXXX variables refer to location while - // the NumElem refers to array/index size. - unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1); - NumElem = std::min(LastLegalType, NumElem); - - if (NumElem < 2) { - // We know that candidate stores are in order and of correct - // shape. While there is no mergeable sequence from the - // beginning one may start later in the sequence. The only - // reason a merge of size N could have failed where another of - // the same size would not have is if the alignment or either - // the load or store has improved. Drop as many candidates as we - // can here. - unsigned NumSkip = 1; - while ((NumSkip < LoadNodes.size()) && - (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) && - (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) - NumSkip++; - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); - continue; - } + // Only use vector types if the vector type is larger than the integer + // type. If they are the same, use integers. + bool UseVectorTy = + LastLegalVectorType > LastLegalIntegerType && !NoVectors; + unsigned LastLegalType = + std::max(LastLegalVectorType, LastLegalIntegerType); - // Find if it is better to use vectors or integers to load and store - // to memory. - EVT JointMemOpVT; - if (UseVectorTy) { - // Find a legal type for the vector store. - unsigned Elts = NumElem * NumMemElts; - JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); - } else { - unsigned SizeInBits = NumElem * ElementSizeBytes * 8; - JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits); - } - - SDLoc LoadDL(LoadNodes[0].MemNode); - SDLoc StoreDL(StoreNodes[0].MemNode); - - // The merged loads are required to have the same incoming chain, so - // using the first's chain is acceptable. - - SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem); - AddToWorklist(NewStoreChain.getNode()); - - MachineMemOperand::Flags MMOFlags = isDereferenceable ? - MachineMemOperand::MODereferenceable: - MachineMemOperand::MONone; - - SDValue NewLoad, NewStore; - if (UseVectorTy || !DoIntegerTruncate) { - NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(), - FirstLoad->getBasePtr(), - FirstLoad->getPointerInfo(), FirstLoadAlign, - MMOFlags); - NewStore = DAG.getStore(NewStoreChain, StoreDL, NewLoad, - FirstInChain->getBasePtr(), - FirstInChain->getPointerInfo(), FirstStoreAlign); - } else { // This must be the truncstore/extload case - EVT ExtendedTy = - TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT); - NewLoad = - DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy, FirstLoad->getChain(), - FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(), - JointMemOpVT, FirstLoadAlign, MMOFlags); - NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad, - FirstInChain->getBasePtr(), - FirstInChain->getPointerInfo(), JointMemOpVT, - FirstInChain->getAlignment(), - FirstInChain->getMemOperand()->getFlags()); - } - - // Transfer chain users from old loads to the new load. - for (unsigned i = 0; i < NumElem; ++i) { - LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode); - DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), - SDValue(NewLoad.getNode(), 1)); - } - - // Replace the all stores with the new store. Recursively remove - // corresponding value if its no longer used. - for (unsigned i = 0; i < NumElem; ++i) { - SDValue Val = StoreNodes[i].MemNode->getOperand(1); - CombineTo(StoreNodes[i].MemNode, NewStore); - if (Val.getNode()->use_empty()) - recursivelyDeleteUnusedNodes(Val.getNode()); - } - - RV = true; - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); + // We add +1 here because the LastXXX variables refer to location while + // the NumElem refers to array/index size. + unsigned NumElem = + std::min(NumConsecutiveStores, LastConsecutiveLoad + 1); + NumElem = std::min(LastLegalType, NumElem); + + if (NumElem < 2) { + // We know that candidate stores are in order and of correct + // shape. While there is no mergeable sequence from the + // beginning one may start later in the sequence. The only + // reason a merge of size N could have failed where another of + // the same size would not have is if the alignment or either + // the load or store has improved. Drop as many candidates as we + // can here. + unsigned NumSkip = 1; + while ((NumSkip < LoadNodes.size()) && + (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) && + (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) + NumSkip++; + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); + LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip); + NumConsecutiveStores -= NumSkip; + continue; + } + + // Check that we can merge these candidates without causing a cycle. + if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem, + RootNode)) { + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); + LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem); + NumConsecutiveStores -= NumElem; + continue; + } + + // Find if it is better to use vectors or integers to load and store + // to memory. + EVT JointMemOpVT; + if (UseVectorTy) { + // Find a legal type for the vector store. + unsigned Elts = NumElem * NumMemElts; + JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); + } else { + unsigned SizeInBits = NumElem * ElementSizeBytes * 8; + JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits); + } + + SDLoc LoadDL(LoadNodes[0].MemNode); + SDLoc StoreDL(StoreNodes[0].MemNode); + + // The merged loads are required to have the same incoming chain, so + // using the first's chain is acceptable. + + SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem); + AddToWorklist(NewStoreChain.getNode()); + + MachineMemOperand::Flags MMOFlags = + isDereferenceable ? MachineMemOperand::MODereferenceable + : MachineMemOperand::MONone; + + SDValue NewLoad, NewStore; + if (UseVectorTy || !DoIntegerTruncate) { + NewLoad = + DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(), + FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(), + FirstLoadAlign, MMOFlags); + NewStore = DAG.getStore( + NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), FirstStoreAlign); + } else { // This must be the truncstore/extload case + EVT ExtendedTy = + TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT); + NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy, + FirstLoad->getChain(), FirstLoad->getBasePtr(), + FirstLoad->getPointerInfo(), JointMemOpVT, + FirstLoadAlign, MMOFlags); + NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad, + FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), + JointMemOpVT, FirstInChain->getAlignment(), + FirstInChain->getMemOperand()->getFlags()); + } + + // Transfer chain users from old loads to the new load. + for (unsigned i = 0; i < NumElem; ++i) { + LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode); + DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), + SDValue(NewLoad.getNode(), 1)); + } + + // Replace the all stores with the new store. Recursively remove + // corresponding value if its no longer used. + for (unsigned i = 0; i < NumElem; ++i) { + SDValue Val = StoreNodes[i].MemNode->getOperand(1); + CombineTo(StoreNodes[i].MemNode, NewStore); + if (Val.getNode()->use_empty()) + recursivelyDeleteUnusedNodes(Val.getNode()); + } + + RV = true; + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); + LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem); + NumConsecutiveStores -= NumElem; + } } return RV; } @@ -13713,13 +14507,14 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // Try to infer better alignment information than the store already has. if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) { if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { - if (Align > ST->getAlignment()) { + if (Align > ST->getAlignment() && ST->getSrcValueOffset() % Align == 0) { SDValue NewStore = DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(), ST->getMemoryVT(), Align, ST->getMemOperand()->getFlags(), ST->getAAInfo()); - if (NewStore.getNode() != N) - return CombineTo(ST, NewStore, true); + // NewStore will always be N as we are only refining the alignment + assert(NewStore.getNode() == N); + (void)NewStore; } } } @@ -13783,30 +14578,30 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { } } - // Deal with elidable overlapping chained stores. - if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) - if (OptLevel != CodeGenOpt::None && ST->isUnindexed() && - ST1->isUnindexed() && !ST1->isVolatile() && ST1->hasOneUse() && - !ST1->getBasePtr().isUndef() && !ST->isVolatile()) { - BaseIndexOffset STBasePtr = BaseIndexOffset::match(ST->getBasePtr(), DAG); - BaseIndexOffset ST1BasePtr = - BaseIndexOffset::match(ST1->getBasePtr(), DAG); - unsigned STBytes = ST->getMemoryVT().getStoreSize(); - unsigned ST1Bytes = ST1->getMemoryVT().getStoreSize(); - int64_t PtrDiff; - // If this is a store who's preceeding store to a subset of the same - // memory and no one other node is chained to that store we can - // effectively drop the store. Do not remove stores to undef as they may - // be used as data sinks. - - if (((ST->getBasePtr() == ST1->getBasePtr()) && - (ST->getValue() == ST1->getValue())) || - (STBasePtr.equalBaseIndex(ST1BasePtr, DAG, PtrDiff) && - (0 <= PtrDiff) && (PtrDiff + ST1Bytes <= STBytes))) { + if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) { + if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() && + !ST1->isVolatile() && ST1->getBasePtr() == Ptr && + ST->getMemoryVT() == ST1->getMemoryVT()) { + // If this is a store followed by a store with the same value to the same + // location, then the store is dead/noop. + if (ST1->getValue() == Value) { + // The store is dead, remove it. + return Chain; + } + + // If this is a store who's preceeding store to the same location + // and no one other node is chained to that store we can effectively + // drop the store. Do not remove stores to undef as they may be used as + // data sinks. + if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() && + !ST1->getBasePtr().isUndef()) { + // ST1 is fully overwritten and can be elided. Combine with it's chain + // value. CombineTo(ST1, ST1->getChain()); - return SDValue(N, 0); + return SDValue(); } } + } // If this is an FP_ROUND or TRUNC followed by a store, fold this into a // truncating store. We can do this even if this is already a truncstore. @@ -14201,6 +14996,10 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { SDValue EltNo = N->getOperand(1); ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo); + // extract_vector_elt of out-of-bounds element -> UNDEF + if (ConstEltNo && ConstEltNo->getAPIntValue().uge(VT.getVectorNumElements())) + return DAG.getUNDEF(NVT); + // extract_vector_elt (build_vector x, y), 1 -> y if (ConstEltNo && InVec.getOpcode() == ISD::BUILD_VECTOR && @@ -14286,6 +15085,23 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { } } + // If only EXTRACT_VECTOR_ELT nodes use the source vector we can + // simplify it based on the (valid) extraction indices. + if (llvm::all_of(InVec->uses(), [&](SDNode *Use) { + return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT && + Use->getOperand(0) == InVec && + isa<ConstantSDNode>(Use->getOperand(1)); + })) { + APInt DemandedElts = APInt::getNullValue(VT.getVectorNumElements()); + for (SDNode *Use : InVec->uses()) { + auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1)); + if (CstElt->getAPIntValue().ult(VT.getVectorNumElements())) + DemandedElts.setBit(CstElt->getZExtValue()); + } + if (SimplifyDemandedVectorElts(InVec, DemandedElts, true)) + return SDValue(N, 0); + } + bool BCNumEltsChanged = false; EVT ExtVT = VT.getVectorElementType(); EVT LVT = ExtVT; @@ -14492,7 +15308,10 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { assert(VecVT.getSizeInBits() == VT.getSizeInBits() && "Invalid vector size"); // Check if the new vector type is legal. - if (!isTypeLegal(VecVT)) return SDValue(); + if (!isTypeLegal(VecVT) || + (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) && + TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))) + return SDValue(); // Make the new BUILD_VECTOR. SDValue BV = DAG.getBuildVector(VecVT, DL, Ops); @@ -14739,12 +15558,16 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) { } // Not an undef or zero. If the input is something other than an - // EXTRACT_VECTOR_ELT with a constant index, bail out. + // EXTRACT_VECTOR_ELT with an in-range constant index, bail out. if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT || !isa<ConstantSDNode>(Op.getOperand(1))) return SDValue(); SDValue ExtractedFromVec = Op.getOperand(0); + APInt ExtractIdx = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue(); + if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements())) + return SDValue(); + // All inputs must have the same element type as the output. if (VT.getVectorElementType() != ExtractedFromVec.getValueType().getVectorElementType()) @@ -14900,6 +15723,54 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) { return Shuffles[0]; } +// Try to turn a build vector of zero extends of extract vector elts into a +// a vector zero extend and possibly an extract subvector. +// TODO: Support sign extend or any extend? +// TODO: Allow undef elements? +// TODO: Don't require the extracts to start at element 0. +SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) { + if (LegalOperations) + return SDValue(); + + EVT VT = N->getValueType(0); + + SDValue Op0 = N->getOperand(0); + auto checkElem = [&](SDValue Op) -> int64_t { + if (Op.getOpcode() == ISD::ZERO_EXTEND && + Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT && + Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0)) + if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1))) + return C->getZExtValue(); + return -1; + }; + + // Make sure the first element matches + // (zext (extract_vector_elt X, C)) + int64_t Offset = checkElem(Op0); + if (Offset < 0) + return SDValue(); + + unsigned NumElems = N->getNumOperands(); + SDValue In = Op0.getOperand(0).getOperand(0); + EVT InSVT = In.getValueType().getScalarType(); + EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems); + + // Don't create an illegal input type after type legalization. + if (LegalTypes && !TLI.isTypeLegal(InVT)) + return SDValue(); + + // Ensure all the elements come from the same vector and are adjacent. + for (unsigned i = 1; i != NumElems; ++i) { + if ((Offset + i) != checkElem(N->getOperand(i))) + return SDValue(); + } + + SDLoc DL(N); + In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In, + Op0.getOperand(0).getOperand(1)); + return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, In); +} + SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { EVT VT = N->getValueType(0); @@ -14907,6 +15778,32 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (ISD::allOperandsUndef(N)) return DAG.getUNDEF(VT); + // If this is a splat of a bitcast from another vector, change to a + // concat_vector. + // For example: + // (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) -> + // (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X)))) + // + // If X is a build_vector itself, the concat can become a larger build_vector. + // TODO: Maybe this is useful for non-splat too? + if (!LegalOperations) { + if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) { + Splat = peekThroughBitcast(Splat); + EVT SrcVT = Splat.getValueType(); + if (SrcVT.isVector()) { + unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements(); + EVT NewVT = EVT::getVectorVT(*DAG.getContext(), + SrcVT.getVectorElementType(), NumElts); + if (!LegalTypes || TLI.isTypeLegal(NewVT)) { + SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat); + SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), + NewVT, Ops); + return DAG.getBitcast(VT, Concat); + } + } + } + } + // Check if we can express BUILD VECTOR via subvector extract. if (!LegalTypes && (N->getNumOperands() > 1)) { SDValue Op0 = N->getOperand(0); @@ -14936,6 +15833,9 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { Op0.getOperand(0), Op0.getOperand(1)); } + if (SDValue V = convertBuildVecZextToZext(N)) + return V; + if (SDValue V = reduceBuildVecExtToExtBuildVec(N)) return V; @@ -15125,6 +16025,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { if (!SclTy.isFloatingPoint() && !SclTy.isInteger()) return SDValue(); + // Bail out if the vector size is not a multiple of the scalar size. + if (VT.getSizeInBits() % SclTy.getSizeInBits()) + return SDValue(); + unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits(); if (VNTNumElms < 2) return SDValue(); @@ -15403,13 +16307,22 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { // Only do this if we won't split any elements. if (ExtractSize % EltSize == 0) { unsigned NumElems = ExtractSize / EltSize; - EVT ExtractVT = EVT::getVectorVT(*DAG.getContext(), - InVT.getVectorElementType(), NumElems); - if ((!LegalOperations || - TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT)) && + EVT EltVT = InVT.getVectorElementType(); + EVT ExtractVT = NumElems == 1 ? EltVT : + EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems); + if ((Level < AfterLegalizeDAG || + (NumElems == 1 || + TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) && (!LegalTypes || TLI.isTypeLegal(ExtractVT))) { unsigned IdxVal = (Idx->getZExtValue() * NVT.getScalarSizeInBits()) / EltSize; + if (NumElems == 1) { + SDValue Src = V->getOperand(IdxVal); + if (EltVT != Src.getValueType()) + Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src); + + return DAG.getBitcast(NVT, Src); + } // Extract the pieces from the original build_vector. SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N), @@ -15451,122 +16364,8 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG)) return NarrowBOp; - return SDValue(); -} - -static SDValue simplifyShuffleOperandRecursively(SmallBitVector &UsedElements, - SDValue V, SelectionDAG &DAG) { - SDLoc DL(V); - EVT VT = V.getValueType(); - - switch (V.getOpcode()) { - default: - return V; - - case ISD::CONCAT_VECTORS: { - EVT OpVT = V->getOperand(0).getValueType(); - int OpSize = OpVT.getVectorNumElements(); - SmallBitVector OpUsedElements(OpSize, false); - bool FoundSimplification = false; - SmallVector<SDValue, 4> NewOps; - NewOps.reserve(V->getNumOperands()); - for (int i = 0, NumOps = V->getNumOperands(); i < NumOps; ++i) { - SDValue Op = V->getOperand(i); - bool OpUsed = false; - for (int j = 0; j < OpSize; ++j) - if (UsedElements[i * OpSize + j]) { - OpUsedElements[j] = true; - OpUsed = true; - } - NewOps.push_back( - OpUsed ? simplifyShuffleOperandRecursively(OpUsedElements, Op, DAG) - : DAG.getUNDEF(OpVT)); - FoundSimplification |= Op == NewOps.back(); - OpUsedElements.reset(); - } - if (FoundSimplification) - V = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, NewOps); - return V; - } - - case ISD::INSERT_SUBVECTOR: { - SDValue BaseV = V->getOperand(0); - SDValue SubV = V->getOperand(1); - auto *IdxN = dyn_cast<ConstantSDNode>(V->getOperand(2)); - if (!IdxN) - return V; - - int SubSize = SubV.getValueType().getVectorNumElements(); - int Idx = IdxN->getZExtValue(); - bool SubVectorUsed = false; - SmallBitVector SubUsedElements(SubSize, false); - for (int i = 0; i < SubSize; ++i) - if (UsedElements[i + Idx]) { - SubVectorUsed = true; - SubUsedElements[i] = true; - UsedElements[i + Idx] = false; - } - - // Now recurse on both the base and sub vectors. - SDValue SimplifiedSubV = - SubVectorUsed - ? simplifyShuffleOperandRecursively(SubUsedElements, SubV, DAG) - : DAG.getUNDEF(SubV.getValueType()); - SDValue SimplifiedBaseV = simplifyShuffleOperandRecursively(UsedElements, BaseV, DAG); - if (SimplifiedSubV != SubV || SimplifiedBaseV != BaseV) - V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, - SimplifiedBaseV, SimplifiedSubV, V->getOperand(2)); - return V; - } - } -} - -static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0, - SDValue N1, SelectionDAG &DAG) { - EVT VT = SVN->getValueType(0); - int NumElts = VT.getVectorNumElements(); - SmallBitVector N0UsedElements(NumElts, false), N1UsedElements(NumElts, false); - for (int M : SVN->getMask()) - if (M >= 0 && M < NumElts) - N0UsedElements[M] = true; - else if (M >= NumElts) - N1UsedElements[M - NumElts] = true; - - SDValue S0 = simplifyShuffleOperandRecursively(N0UsedElements, N0, DAG); - SDValue S1 = simplifyShuffleOperandRecursively(N1UsedElements, N1, DAG); - if (S0 == N0 && S1 == N1) - return SDValue(); - - return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask()); -} - -static SDValue simplifyShuffleMask(ShuffleVectorSDNode *SVN, SDValue N0, - SDValue N1, SelectionDAG &DAG) { - auto isUndefElt = [](SDValue V, int Idx) { - // TODO - handle more cases as required. - if (V.getOpcode() == ISD::BUILD_VECTOR) - return V.getOperand(Idx).isUndef(); - if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) - return (Idx != 0) || V.getOperand(0).isUndef(); - return false; - }; - - EVT VT = SVN->getValueType(0); - unsigned NumElts = VT.getVectorNumElements(); - - bool Changed = false; - SmallVector<int, 8> NewMask; - for (unsigned i = 0; i != NumElts; ++i) { - int Idx = SVN->getMaskElt(i); - if ((0 <= Idx && Idx < (int)NumElts && isUndefElt(N0, Idx)) || - ((int)NumElts < Idx && isUndefElt(N1, Idx - NumElts))) { - Changed = true; - Idx = -1; - } - NewMask.push_back(Idx); - } - if (Changed) - return DAG.getVectorShuffle(VT, SDLoc(SVN), N0, N1, NewMask); + if (SimplifyDemandedVectorElts(SDValue(N, 0))) + return SDValue(N, 0); return SDValue(); } @@ -16013,10 +16812,6 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask); } - // Simplify shuffle mask if a referenced element is UNDEF. - if (SDValue V = simplifyShuffleMask(SVN, N0, N1, DAG)) - return V; - if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG)) return InsElt; @@ -16077,11 +16872,9 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { } } - // There are various patterns used to build up a vector from smaller vectors, - // subvectors, or elements. Scan chains of these and replace unused insertions - // or components with undef. - if (SDValue S = simplifyShuffleOperands(SVN, N0, N1, DAG)) - return S; + // Simplify source operands based on shuffle mask. + if (SimplifyDemandedVectorElts(SDValue(N, 0))) + return SDValue(N, 0); // Match shuffles that can be converted to any_vector_extend_in_reg. if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations, LegalTypes)) @@ -16394,7 +17187,9 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR && N1.getOperand(0).getOperand(1) == N2 && N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() == - VT.getVectorNumElements()) { + VT.getVectorNumElements() && + N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() == + VT.getSizeInBits()) { return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0)); } @@ -16405,10 +17200,11 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) { SDValue CN0 = N0.getOperand(0); SDValue CN1 = N1.getOperand(0); - if (CN0.getValueType().getVectorElementType() == - CN1.getValueType().getVectorElementType() && - CN0.getValueType().getVectorNumElements() == - VT.getVectorNumElements()) { + EVT CN0VT = CN0.getValueType(); + EVT CN1VT = CN1.getValueType(); + if (CN0VT.isVector() && CN1VT.isVector() && + CN0VT.getVectorElementType() == CN1VT.getVectorElementType() && + CN0VT.getVectorNumElements() == VT.getVectorNumElements()) { SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), CN0.getValueType(), CN0, CN1, N2); return DAG.getBitcast(VT, NewINSERT); @@ -16663,14 +17459,14 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, const ConstantFPSDNode *Zero = nullptr; if (TheSelect->getOpcode() == ISD::SELECT_CC) { - CC = dyn_cast<CondCodeSDNode>(TheSelect->getOperand(4))->get(); + CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get(); CmpLHS = TheSelect->getOperand(0); Zero = isConstOrConstSplatFP(TheSelect->getOperand(1)); } else { // SELECT or VSELECT SDValue Cmp = TheSelect->getOperand(0); if (Cmp.getOpcode() == ISD::SETCC) { - CC = dyn_cast<CondCodeSDNode>(Cmp.getOperand(2))->get(); + CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get(); CmpLHS = Cmp.getOperand(0); Zero = isConstOrConstSplatFP(Cmp.getOperand(1)); } @@ -16888,24 +17684,6 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, return !SCCC->isNullValue() ? N2 : N3; } - // Check to see if we can simplify the select into an fabs node - if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) { - // Allow either -0.0 or 0.0 - if (CFP->isZero()) { - // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs - if ((CC == ISD::SETGE || CC == ISD::SETGT) && - N0 == N2 && N3.getOpcode() == ISD::FNEG && - N2 == N3.getOperand(0)) - return DAG.getNode(ISD::FABS, DL, VT, N0); - - // select (setl[te] X, +/-0.0), fneg(X), X -> fabs - if ((CC == ISD::SETLT || CC == ISD::SETLE) && - N0 == N3 && N2.getOpcode() == ISD::FNEG && - N2.getOperand(0) == N3) - return DAG.getNode(ISD::FABS, DL, VT, N3); - } - } - // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)" // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0 // in it. This is a win when the constant is not otherwise available because @@ -17383,19 +18161,34 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal); if (!Reciprocal) { - // Unfortunately, Est is now NaN if the input was exactly 0.0. - // Select out this case and force the answer to 0.0. + // The estimate is now completely wrong if the input was exactly 0.0 or + // possibly a denormal. Force the answer to 0.0 for those cases. EVT VT = Op.getValueType(); SDLoc DL(Op); - - SDValue FPZero = DAG.getConstantFP(0.0, DL, VT); EVT CCVT = getSetCCResultType(VT); - SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ); - AddToWorklist(ZeroCmp.getNode()); - - Est = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT, - ZeroCmp, FPZero, Est); - AddToWorklist(Est.getNode()); + ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT; + const Function &F = DAG.getMachineFunction().getFunction(); + Attribute Denorms = F.getFnAttribute("denormal-fp-math"); + if (Denorms.getValueAsString().equals("ieee")) { + // fabs(X) < SmallestNormal ? 0.0 : Est + const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT); + APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem); + SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT); + SDValue FPZero = DAG.getConstantFP(0.0, DL, VT); + SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op); + SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT); + Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est); + AddToWorklist(Fabs.getNode()); + AddToWorklist(IsDenorm.getNode()); + AddToWorklist(Est.getNode()); + } else { + // X == 0.0 ? 0.0 : Est + SDValue FPZero = DAG.getConstantFP(0.0, DL, VT); + SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ); + Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est); + AddToWorklist(IsZero.getNode()); + AddToWorklist(Est.getNode()); + } } } return Est; @@ -17433,44 +18226,46 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { unsigned NumBytes1 = Op1->getMemoryVT().getStoreSize(); // Check for BaseIndexOffset matching. - BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0->getBasePtr(), DAG); - BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1->getBasePtr(), DAG); + BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0, DAG); + BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1, DAG); int64_t PtrDiff; - if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff)) - return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0)); - - // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be - // able to calculate their relative offset if at least one arises - // from an alloca. However, these allocas cannot overlap and we - // can infer there is no alias. - if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase())) - if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) { - MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); - // If the base are the same frame index but the we couldn't find a - // constant offset, (indices are different) be conservative. - if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) || - !MFI.isFixedObjectIndex(B->getIndex()))) - return false; - } - - bool IsFI0 = isa<FrameIndexSDNode>(BasePtr0.getBase()); - bool IsFI1 = isa<FrameIndexSDNode>(BasePtr1.getBase()); - bool IsGV0 = isa<GlobalAddressSDNode>(BasePtr0.getBase()); - bool IsGV1 = isa<GlobalAddressSDNode>(BasePtr1.getBase()); - bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase()); - bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase()); + if (BasePtr0.getBase().getNode() && BasePtr1.getBase().getNode()) { + if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff)) + return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0)); + + // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be + // able to calculate their relative offset if at least one arises + // from an alloca. However, these allocas cannot overlap and we + // can infer there is no alias. + if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase())) + if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) { + MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); + // If the base are the same frame index but the we couldn't find a + // constant offset, (indices are different) be conservative. + if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) || + !MFI.isFixedObjectIndex(B->getIndex()))) + return false; + } - // If of mismatched base types or checkable indices we can check - // they do not alias. - if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) || - (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) && - (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1)) - return false; + bool IsFI0 = isa<FrameIndexSDNode>(BasePtr0.getBase()); + bool IsFI1 = isa<FrameIndexSDNode>(BasePtr1.getBase()); + bool IsGV0 = isa<GlobalAddressSDNode>(BasePtr0.getBase()); + bool IsGV1 = isa<GlobalAddressSDNode>(BasePtr1.getBase()); + bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase()); + bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase()); + + // If of mismatched base types or checkable indices we can check + // they do not alias. + if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) || + (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) && + (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1)) + return false; + } - // If we know required SrcValue1 and SrcValue2 have relatively large alignment - // compared to the size and offset of the access, we may be able to prove they - // do not alias. This check is conservative for now to catch cases created by - // splitting vector types. + // If we know required SrcValue1 and SrcValue2 have relatively large + // alignment compared to the size and offset of the access, we may be able + // to prove they do not alias. This check is conservative for now to catch + // cases created by splitting vector types. int64_t SrcValOffset0 = Op0->getSrcValueOffset(); int64_t SrcValOffset1 = Op1->getSrcValueOffset(); unsigned OrigAlignment0 = Op0->getOriginalAlignment(); @@ -17480,8 +18275,8 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0; int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1; - // There is no overlap between these relatively aligned accesses of similar - // size. Return no alias. + // There is no overlap between these relatively aligned accesses of + // similar size. Return no alias. if ((OffAlign0 + NumBytes0) <= OffAlign1 || (OffAlign1 + NumBytes1) <= OffAlign0) return false; @@ -17644,7 +18439,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { // This holds the base pointer, index, and the offset in bytes from the base // pointer. - BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG); + BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG); // We must have a base and an offset. if (!BasePtr.getBase().getNode()) @@ -17670,7 +18465,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { break; // Find the base pointer and offset for this memory node. - BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG); + BaseIndexOffset Ptr = BaseIndexOffset::match(Index, DAG); // Check that the base pointer is the same as the original one. if (!BasePtr.equalBaseIndex(Ptr, DAG)) @@ -17696,7 +18491,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { Index = nullptr; break; } - } // end while + }// end while } // At this point, ChainedStores lists all of the Store nodes diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index d3c94b5f9e6b..e4a9d557d386 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -61,7 +61,6 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" @@ -99,6 +98,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" @@ -113,6 +113,11 @@ using namespace llvm; #define DEBUG_TYPE "isel" +// FIXME: Remove this after the feature has proven reliable. +static cl::opt<bool> SinkLocalValues("fast-isel-sink-local-values", + cl::init(true), cl::Hidden, + cl::desc("Sink local values in FastISel")); + STATISTIC(NumFastIselSuccessIndependent, "Number of insts selected by " "target-independent selector"); STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by " @@ -120,9 +125,10 @@ STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by " STATISTIC(NumFastIselDead, "Number of dead insts removed on failure"); /// Set the current block to which generated machine instructions will be -/// appended, and clear the local CSE map. +/// appended. void FastISel::startNewBlock() { - LocalValueMap.clear(); + assert(LocalValueMap.empty() && + "local values should be cleared after finishing a BB"); // Instructions are appended to FuncInfo.MBB. If the basic block already // contains labels or copies, use the last instruction as the last local @@ -133,6 +139,9 @@ void FastISel::startNewBlock() { LastLocalValue = EmitStartPt; } +/// Flush the local CSE map and sink anything we can. +void FastISel::finishBasicBlock() { flushLocalValueMap(); } + bool FastISel::lowerArguments() { if (!FuncInfo.CanLowerReturn) // Fallback to SDISel argument lowering code to deal with sret pointer @@ -153,11 +162,168 @@ bool FastISel::lowerArguments() { return true; } +/// Return the defined register if this instruction defines exactly one +/// virtual register and uses no other virtual registers. Otherwise return 0. +static unsigned findSinkableLocalRegDef(MachineInstr &MI) { + unsigned RegDef = 0; + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg()) + continue; + if (MO.isDef()) { + if (RegDef) + return 0; + RegDef = MO.getReg(); + } else if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + // This is another use of a vreg. Don't try to sink it. + return 0; + } + } + return RegDef; +} + void FastISel::flushLocalValueMap() { + // Try to sink local values down to their first use so that we can give them a + // better debug location. This has the side effect of shrinking local value + // live ranges, which helps out fast regalloc. + if (SinkLocalValues && LastLocalValue != EmitStartPt) { + // Sink local value materialization instructions between EmitStartPt and + // LastLocalValue. Visit them bottom-up, starting from LastLocalValue, to + // avoid inserting into the range that we're iterating over. + MachineBasicBlock::reverse_iterator RE = + EmitStartPt ? MachineBasicBlock::reverse_iterator(EmitStartPt) + : FuncInfo.MBB->rend(); + MachineBasicBlock::reverse_iterator RI(LastLocalValue); + + InstOrderMap OrderMap; + for (; RI != RE;) { + MachineInstr &LocalMI = *RI; + ++RI; + bool Store = true; + if (!LocalMI.isSafeToMove(nullptr, Store)) + continue; + unsigned DefReg = findSinkableLocalRegDef(LocalMI); + if (DefReg == 0) + continue; + + sinkLocalValueMaterialization(LocalMI, DefReg, OrderMap); + } + } + LocalValueMap.clear(); LastLocalValue = EmitStartPt; recomputeInsertPt(); SavedInsertPt = FuncInfo.InsertPt; + LastFlushPoint = FuncInfo.InsertPt; +} + +static bool isRegUsedByPhiNodes(unsigned DefReg, + FunctionLoweringInfo &FuncInfo) { + for (auto &P : FuncInfo.PHINodesToUpdate) + if (P.second == DefReg) + return true; + return false; +} + +/// Build a map of instruction orders. Return the first terminator and its +/// order. Consider EH_LABEL instructions to be terminators as well, since local +/// values for phis after invokes must be materialized before the call. +void FastISel::InstOrderMap::initialize( + MachineBasicBlock *MBB, MachineBasicBlock::iterator LastFlushPoint) { + unsigned Order = 0; + for (MachineInstr &I : *MBB) { + if (!FirstTerminator && + (I.isTerminator() || (I.isEHLabel() && &I != &MBB->front()))) { + FirstTerminator = &I; + FirstTerminatorOrder = Order; + } + Orders[&I] = Order++; + + // We don't need to order instructions past the last flush point. + if (I.getIterator() == LastFlushPoint) + break; + } +} + +void FastISel::sinkLocalValueMaterialization(MachineInstr &LocalMI, + unsigned DefReg, + InstOrderMap &OrderMap) { + // If this register is used by a register fixup, MRI will not contain all + // the uses until after register fixups, so don't attempt to sink or DCE + // this instruction. Register fixups typically come from no-op cast + // instructions, which replace the cast instruction vreg with the local + // value vreg. + if (FuncInfo.RegsWithFixups.count(DefReg)) + return; + + // We can DCE this instruction if there are no uses and it wasn't a + // materialized for a successor PHI node. + bool UsedByPHI = isRegUsedByPhiNodes(DefReg, FuncInfo); + if (!UsedByPHI && MRI.use_nodbg_empty(DefReg)) { + if (EmitStartPt == &LocalMI) + EmitStartPt = EmitStartPt->getPrevNode(); + LLVM_DEBUG(dbgs() << "removing dead local value materialization " + << LocalMI); + OrderMap.Orders.erase(&LocalMI); + LocalMI.eraseFromParent(); + return; + } + + // Number the instructions if we haven't yet so we can efficiently find the + // earliest use. + if (OrderMap.Orders.empty()) + OrderMap.initialize(FuncInfo.MBB, LastFlushPoint); + + // Find the first user in the BB. + MachineInstr *FirstUser = nullptr; + unsigned FirstOrder = std::numeric_limits<unsigned>::max(); + for (MachineInstr &UseInst : MRI.use_nodbg_instructions(DefReg)) { + auto I = OrderMap.Orders.find(&UseInst); + assert(I != OrderMap.Orders.end() && + "local value used by instruction outside local region"); + unsigned UseOrder = I->second; + if (UseOrder < FirstOrder) { + FirstOrder = UseOrder; + FirstUser = &UseInst; + } + } + + // The insertion point will be the first terminator or the first user, + // whichever came first. If there was no terminator, this must be a + // fallthrough block and the insertion point is the end of the block. + MachineBasicBlock::instr_iterator SinkPos; + if (UsedByPHI && OrderMap.FirstTerminatorOrder < FirstOrder) { + FirstOrder = OrderMap.FirstTerminatorOrder; + SinkPos = OrderMap.FirstTerminator->getIterator(); + } else if (FirstUser) { + SinkPos = FirstUser->getIterator(); + } else { + assert(UsedByPHI && "must be users if not used by a phi"); + SinkPos = FuncInfo.MBB->instr_end(); + } + + // Collect all DBG_VALUEs before the new insertion position so that we can + // sink them. + SmallVector<MachineInstr *, 1> DbgValues; + for (MachineInstr &DbgVal : MRI.use_instructions(DefReg)) { + if (!DbgVal.isDebugValue()) + continue; + unsigned UseOrder = OrderMap.Orders[&DbgVal]; + if (UseOrder < FirstOrder) + DbgValues.push_back(&DbgVal); + } + + // Sink LocalMI before SinkPos and assign it the same DebugLoc. + LLVM_DEBUG(dbgs() << "sinking local value to first use " << LocalMI); + FuncInfo.MBB->remove(&LocalMI); + FuncInfo.MBB->insert(SinkPos, &LocalMI); + if (SinkPos != FuncInfo.MBB->end()) + LocalMI.setDebugLoc(SinkPos->getDebugLoc()); + + // Sink any debug values that we've collected. + for (MachineInstr *DI : DbgValues) { + FuncInfo.MBB->remove(DI); + FuncInfo.MBB->insert(SinkPos, DI); + } } bool FastISel::hasTrivialKill(const Value *V) { @@ -328,8 +494,10 @@ void FastISel::updateValueMap(const Value *I, unsigned Reg, unsigned NumRegs) { AssignedReg = Reg; else if (Reg != AssignedReg) { // Arrange for uses of AssignedReg to be replaced by uses of Reg. - for (unsigned i = 0; i < NumRegs; i++) + for (unsigned i = 0; i < NumRegs; i++) { FuncInfo.RegFixups[AssignedReg + i] = Reg + i; + FuncInfo.RegsWithFixups.insert(Reg + i); + } AssignedReg = Reg; } @@ -681,7 +849,7 @@ bool FastISel::selectStackmap(const CallInst *I) { return true; } -/// \brief Lower an argument list according to the target calling convention. +/// Lower an argument list according to the target calling convention. /// /// This is a helper for lowering intrinsics that follow a target calling /// convention or require stack pointer adjustment. Only a subset of the @@ -702,7 +870,7 @@ bool FastISel::lowerCallOperands(const CallInst *CI, unsigned ArgIdx, ArgListEntry Entry; Entry.Val = V; Entry.Ty = V->getType(); - Entry.setAttributes(&CS, ArgIdx); + Entry.setAttributes(&CS, ArgI); Args.push_back(Entry); } @@ -874,10 +1042,31 @@ bool FastISel::selectXRayCustomEvent(const CallInst *I) { TII.get(TargetOpcode::PATCHABLE_EVENT_CALL)); for (auto &MO : Ops) MIB.add(MO); + // Insert the Patchable Event Call instruction, that gets lowered properly. return true; } +bool FastISel::selectXRayTypedEvent(const CallInst *I) { + const auto &Triple = TM.getTargetTriple(); + if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux()) + return true; // don't do anything to this instruction. + SmallVector<MachineOperand, 8> Ops; + Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(0)), + /*IsDef=*/false)); + Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(1)), + /*IsDef=*/false)); + Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(2)), + /*IsDef=*/false)); + MachineInstrBuilder MIB = + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::PATCHABLE_TYPED_EVENT_CALL)); + for (auto &MO : Ops) + MIB.add(MO); + + // Insert the Patchable Typed Event Call instruction, that gets lowered properly. + return true; +} /// Returns an AttributeList representing the attributes applied to the return /// value of the given call. @@ -1141,13 +1330,13 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { const DbgDeclareInst *DI = cast<DbgDeclareInst>(II); assert(DI->getVariable() && "Missing variable"); if (!FuncInfo.MF->getMMI().hasDebugInfo()) { - DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); + LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); return true; } const Value *Address = DI->getAddress(); if (!Address || isa<UndefValue>(Address)) { - DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); + LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); return true; } @@ -1182,24 +1371,15 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { if (Op) { assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) && "Expected inlined-at fields to agree"); - if (Op->isReg()) { - Op->setIsDebug(true); - // A dbg.declare describes the address of a source variable, so lower it - // into an indirect DBG_VALUE. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true, - Op->getReg(), DI->getVariable(), DI->getExpression()); - } else - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::DBG_VALUE)) - .add(*Op) - .addImm(0) - .addMetadata(DI->getVariable()) - .addMetadata(DI->getExpression()); + // A dbg.declare describes the address of a source variable, so lower it + // into an indirect DBG_VALUE. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true, + *Op, DI->getVariable(), DI->getExpression()); } else { // We can't yet handle anything else here because it would require // generating code, thus altering codegen because of debug info. - DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); + LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); } return true; } @@ -1242,7 +1422,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { } else { // We can't yet handle anything else here because it would require // generating code, thus altering codegen because of debug info. - DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); + LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); } return true; } @@ -1256,7 +1436,8 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { updateValueMap(II, ResultReg); return true; } - case Intrinsic::invariant_group_barrier: + case Intrinsic::launder_invariant_group: + case Intrinsic::strip_invariant_group: case Intrinsic::expect: { unsigned ResultReg = getRegForValue(II->getArgOperand(0)); if (!ResultReg) @@ -1272,6 +1453,8 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { case Intrinsic::xray_customevent: return selectXRayCustomEvent(II); + case Intrinsic::xray_typedevent: + return selectXRayTypedEvent(II); } return fastLowerIntrinsicCall(II); @@ -2051,11 +2234,9 @@ bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // At this point we know that there is a 1-1 correspondence between LLVM PHI // nodes and Machine PHI nodes, but the incoming operands have not been // emitted yet. - for (BasicBlock::const_iterator I = SuccBB->begin(); - const auto *PN = dyn_cast<PHINode>(I); ++I) { - + for (const PHINode &PN : SuccBB->phis()) { // Ignore dead phi's. - if (PN->use_empty()) + if (PN.use_empty()) continue; // Only handle legal types. Two interesting things to note here. First, @@ -2064,7 +2245,7 @@ bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // own moves. Second, this check is necessary because FastISel doesn't // use CreateRegs to create registers, so it always creates // exactly one register for each non-void instruction. - EVT VT = TLI.getValueType(DL, PN->getType(), /*AllowUnknown=*/true); + EVT VT = TLI.getValueType(DL, PN.getType(), /*AllowUnknown=*/true); if (VT == MVT::Other || !TLI.isTypeLegal(VT)) { // Handle integer promotions, though, because they're common and easy. if (!(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) { @@ -2073,11 +2254,11 @@ bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { } } - const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB); + const Value *PHIOp = PN.getIncomingValueForBlock(LLVMBB); // Set the DebugLoc for the copy. Prefer the location of the operand // if there is one; use the location of the PHI otherwise. - DbgLoc = PN->getDebugLoc(); + DbgLoc = PN.getDebugLoc(); if (const auto *Inst = dyn_cast<Instruction>(PHIOp)) DbgLoc = Inst->getDebugLoc(); diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index c7cdb49203b1..42c7181dac41 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -23,6 +23,7 @@ #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/CodeGen/WasmEHFuncInfo.h" #include "llvm/CodeGen/WinEHFuncInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" @@ -118,6 +119,10 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, } } } + if (Personality == EHPersonality::Wasm_CXX) { + WasmEHFuncInfo &EHInfo = *MF->getWasmEHFuncInfo(); + calculateWasmEHInfo(&fn, EHInfo); + } // Initialize the mapping of values to registers. This is only set up for // instruction values that are used outside of the block that defines @@ -226,9 +231,10 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, const Instruction *PadInst = BB.getFirstNonPHI(); // If this is a non-landingpad EH pad, mark this function as using // funclets. - // FIXME: SEH catchpads do not create funclets, so we could avoid setting - // this in such cases in order to improve frame layout. + // FIXME: SEH catchpads do not create EH scope/funclets, so we could avoid + // setting this in such cases in order to improve frame layout. if (!isa<LandingPadInst>(PadInst)) { + MF->setHasEHScopes(true); MF->setHasEHFunclets(true); MF->getFrameInfo().setHasOpaqueSPAdjustment(true); } @@ -257,20 +263,20 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, // Create Machine PHI nodes for LLVM PHI nodes, lowering them as // appropriate. - for (BasicBlock::const_iterator I = BB.begin(); - const PHINode *PN = dyn_cast<PHINode>(I); ++I) { - if (PN->use_empty()) continue; + for (const PHINode &PN : BB.phis()) { + if (PN.use_empty()) + continue; // Skip empty types - if (PN->getType()->isEmptyTy()) + if (PN.getType()->isEmptyTy()) continue; - DebugLoc DL = PN->getDebugLoc(); - unsigned PHIReg = ValueMap[PN]; + DebugLoc DL = PN.getDebugLoc(); + unsigned PHIReg = ValueMap[&PN]; assert(PHIReg && "PHI node does not have an assigned virtual register!"); SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(*TLI, MF->getDataLayout(), PN->getType(), ValueVTs); + ComputeValueVTs(*TLI, MF->getDataLayout(), PN.getType(), ValueVTs); for (EVT VT : ValueVTs) { unsigned NumRegisters = TLI->getNumRegisters(Fn->getContext(), VT); const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); @@ -281,28 +287,46 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, } } - if (!isFuncletEHPersonality(Personality)) - return; - - WinEHFuncInfo &EHInfo = *MF->getWinEHFuncInfo(); + if (isFuncletEHPersonality(Personality)) { + WinEHFuncInfo &EHInfo = *MF->getWinEHFuncInfo(); - // Map all BB references in the WinEH data to MBBs. - for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) { - for (WinEHHandlerType &H : TBME.HandlerArray) { - if (H.Handler) - H.Handler = MBBMap[H.Handler.get<const BasicBlock *>()]; + // Map all BB references in the WinEH data to MBBs. + for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) { + for (WinEHHandlerType &H : TBME.HandlerArray) { + if (H.Handler) + H.Handler = MBBMap[H.Handler.get<const BasicBlock *>()]; + } + } + for (CxxUnwindMapEntry &UME : EHInfo.CxxUnwindMap) + if (UME.Cleanup) + UME.Cleanup = MBBMap[UME.Cleanup.get<const BasicBlock *>()]; + for (SEHUnwindMapEntry &UME : EHInfo.SEHUnwindMap) { + const auto *BB = UME.Handler.get<const BasicBlock *>(); + UME.Handler = MBBMap[BB]; + } + for (ClrEHUnwindMapEntry &CME : EHInfo.ClrEHUnwindMap) { + const auto *BB = CME.Handler.get<const BasicBlock *>(); + CME.Handler = MBBMap[BB]; } } - for (CxxUnwindMapEntry &UME : EHInfo.CxxUnwindMap) - if (UME.Cleanup) - UME.Cleanup = MBBMap[UME.Cleanup.get<const BasicBlock *>()]; - for (SEHUnwindMapEntry &UME : EHInfo.SEHUnwindMap) { - const BasicBlock *BB = UME.Handler.get<const BasicBlock *>(); - UME.Handler = MBBMap[BB]; - } - for (ClrEHUnwindMapEntry &CME : EHInfo.ClrEHUnwindMap) { - const BasicBlock *BB = CME.Handler.get<const BasicBlock *>(); - CME.Handler = MBBMap[BB]; + + else if (Personality == EHPersonality::Wasm_CXX) { + WasmEHFuncInfo &EHInfo = *MF->getWasmEHFuncInfo(); + // Map all BB references in the WinEH data to MBBs. + DenseMap<BBOrMBB, BBOrMBB> NewMap; + for (auto &KV : EHInfo.EHPadUnwindMap) { + const auto *Src = KV.first.get<const BasicBlock *>(); + const auto *Dst = KV.second.get<const BasicBlock *>(); + NewMap[MBBMap[Src]] = MBBMap[Dst]; + } + EHInfo.EHPadUnwindMap = std::move(NewMap); + NewMap.clear(); + for (auto &KV : EHInfo.ThrowUnwindMap) { + const auto *Src = KV.first.get<const BasicBlock *>(); + const auto *Dst = KV.second.get<const BasicBlock *>(); + NewMap[MBBMap[Src]] = MBBMap[Dst]; + } + EHInfo.ThrowUnwindMap = std::move(NewMap); } } @@ -312,12 +336,14 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, void FunctionLoweringInfo::clear() { MBBMap.clear(); ValueMap.clear(); + VirtReg2Value.clear(); StaticAllocaMap.clear(); LiveOutRegInfo.clear(); VisitedBBs.clear(); ArgDbgValues.clear(); ByValArgFrameIndexMap.clear(); RegFixups.clear(); + RegsWithFixups.clear(); StatepointStackSlots.clear(); StatepointSpillMaps.clear(); PreferredExtendType.clear(); @@ -483,7 +509,7 @@ int FunctionLoweringInfo::getArgumentFrameIndex(const Argument *A) { auto I = ByValArgFrameIndexMap.find(A); if (I != ByValArgFrameIndexMap.end()) return I->second; - DEBUG(dbgs() << "Argument does not have assigned frame index!\n"); + LLVM_DEBUG(dbgs() << "Argument does not have assigned frame index!\n"); return INT_MAX; } @@ -547,3 +573,13 @@ FunctionLoweringInfo::getOrCreateSwiftErrorVRegUseAt(const Instruction *I, const } return std::make_pair(It->second, false); } + +const Value * +FunctionLoweringInfo::getValueFromVirtualReg(unsigned Vreg) { + if (VirtReg2Value.empty()) { + for (auto &P : ValueMap) { + VirtReg2Value[P.second] = P.first; + } + } + return VirtReg2Value[Vreg]; +} diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index cc9b41b4b487..d6171f3177d7 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -394,11 +394,26 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, } else if (ConstantFPSDNode *F = dyn_cast<ConstantFPSDNode>(Op)) { MIB.addFPImm(F->getConstantFPValue()); } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) { + unsigned VReg = R->getReg(); + MVT OpVT = Op.getSimpleValueType(); + const TargetRegisterClass *OpRC = + TLI->isTypeLegal(OpVT) ? TLI->getRegClassFor(OpVT) : nullptr; + const TargetRegisterClass *IIRC = + II ? TRI->getAllocatableClass(TII->getRegClass(*II, IIOpNum, TRI, *MF)) + : nullptr; + + if (OpRC && IIRC && OpRC != IIRC && + TargetRegisterInfo::isVirtualRegister(VReg)) { + unsigned NewVReg = MRI->createVirtualRegister(IIRC); + BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(), + TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg); + VReg = NewVReg; + } // Turn additional physreg operands into implicit uses on non-variadic // instructions. This is used by call and return instructions passing // arguments in registers. bool Imp = II && (IIOpNum >= II->getNumOperands() && !II->isVariadic()); - MIB.addReg(R->getReg(), getImplRegState(Imp)); + MIB.addReg(VReg, getImplRegState(Imp)); } else if (RegisterMaskSDNode *RM = dyn_cast<RegisterMaskSDNode>(Op)) { MIB.addRegMask(RM->getRegMask()); } else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) { @@ -682,11 +697,15 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, if (SD->getKind() == SDDbgValue::FRAMEIX) { // Stack address; this needs to be lowered in target-dependent fashion. // EmitTargetCodeForFrameDebugValue is responsible for allocation. - return BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE)) - .addFrameIndex(SD->getFrameIx()) - .addImm(0) - .addMetadata(Var) - .addMetadata(Expr); + auto FrameMI = BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE)) + .addFrameIndex(SD->getFrameIx()); + if (SD->isIndirect()) + // Push [fi + 0] onto the DIExpression stack. + FrameMI.addImm(0); + else + // Push fi onto the DIExpression stack. + FrameMI.addReg(0); + return FrameMI.addMetadata(Var).addMetadata(Expr); } // Otherwise, we're going to create an instruction here. const MCInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE); @@ -705,6 +724,8 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, else AddOperand(MIB, Op, (*MIB).getNumOperands(), &II, VRBaseMap, /*IsDebug=*/true, /*IsClone=*/false, /*IsCloned=*/false); + } else if (SD->getKind() == SDDbgValue::VREG) { + MIB.addReg(SD->getVReg(), RegState::Debug); } else if (SD->getKind() == SDDbgValue::CONST) { const Value *V = SD->getConst(); if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { @@ -736,6 +757,20 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, return &*MIB; } +MachineInstr * +InstrEmitter::EmitDbgLabel(SDDbgLabel *SD) { + MDNode *Label = SD->getLabel(); + DebugLoc DL = SD->getDebugLoc(); + assert(cast<DILabel>(Label)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); + + const MCInstrDesc &II = TII->get(TargetOpcode::DBG_LABEL); + MachineInstrBuilder MIB = BuildMI(*MF, DL, II); + MIB.addMetadata(Label); + + return &*MIB; +} + /// EmitMachineNode - Generate machine code for a target-specific node and /// needed dependencies. /// @@ -807,9 +842,34 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // Add result register values for things that are defined by this // instruction. - if (NumResults) + if (NumResults) { CreateVirtualRegisters(Node, MIB, II, IsClone, IsCloned, VRBaseMap); + // Transfer any IR flags from the SDNode to the MachineInstr + MachineInstr *MI = MIB.getInstr(); + const SDNodeFlags Flags = Node->getFlags(); + if (Flags.hasNoSignedZeros()) + MI->setFlag(MachineInstr::MIFlag::FmNsz); + + if (Flags.hasAllowReciprocal()) + MI->setFlag(MachineInstr::MIFlag::FmArcp); + + if (Flags.hasNoNaNs()) + MI->setFlag(MachineInstr::MIFlag::FmNoNans); + + if (Flags.hasNoInfs()) + MI->setFlag(MachineInstr::MIFlag::FmNoInfs); + + if (Flags.hasAllowContract()) + MI->setFlag(MachineInstr::MIFlag::FmContract); + + if (Flags.hasApproximateFuncs()) + MI->setFlag(MachineInstr::MIFlag::FmAfn); + + if (Flags.hasAllowReassociation()) + MI->setFlag(MachineInstr::MIFlag::FmReassoc); + } + // Emit all of the actual operands of this instruction, adding them to the // instruction as appropriate. bool HasOptPRefs = NumDefs > NumResults; diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h index 8a8a1bbd18f7..701b6368690b 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -113,6 +113,9 @@ public: MachineInstr *EmitDbgValue(SDDbgValue *SD, DenseMap<SDValue, unsigned> &VRBaseMap); + /// Generate machine instruction for a dbg_label node. + MachineInstr *EmitDbgLabel(SDDbgLabel *SD); + /// EmitNode - Generate machine code for a node and needed dependencies. /// void EmitNode(SDNode *Node, bool IsClone, bool IsCloned, diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index bb1dc17b7a1b..2b7ba1ffb309 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -22,7 +22,6 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" @@ -41,6 +40,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetMachine.h" @@ -87,11 +87,11 @@ class SelectionDAGLegalize { const TargetLowering &TLI; SelectionDAG &DAG; - /// \brief The set of nodes which have already been legalized. We hold a + /// The set of nodes which have already been legalized. We hold a /// reference to it in order to update as necessary on node deletion. SmallPtrSetImpl<SDNode *> &LegalizedNodes; - /// \brief A set of all the nodes updated during legalization. + /// A set of all the nodes updated during legalization. SmallSetVector<SDNode *, 16> *UpdatedNodes; EVT getSetCCResultType(EVT VT) const { @@ -107,7 +107,7 @@ public: : TM(DAG.getTarget()), TLI(DAG.getTargetLoweringInfo()), DAG(DAG), LegalizedNodes(LegalizedNodes), UpdatedNodes(UpdatedNodes) {} - /// \brief Legalizes the given operation. + /// Legalizes the given operation. void LegalizeOp(SDNode *Node); private: @@ -167,7 +167,7 @@ private: SDValue NewIntValue) const; SDValue ExpandFCOPYSIGN(SDNode *Node) const; SDValue ExpandFABS(SDNode *Node) const; - SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue LegalOp, EVT DestVT, + SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, EVT DestVT, const SDLoc &dl); SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned, const SDLoc &dl); @@ -200,8 +200,8 @@ public: } void ReplaceNode(SDNode *Old, SDNode *New) { - DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG); - dbgs() << " with: "; New->dump(&DAG)); + LLVM_DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG); + dbgs() << " with: "; New->dump(&DAG)); assert(Old->getNumValues() == New->getNumValues() && "Replacing one node with another that produces a different number " @@ -213,8 +213,8 @@ public: } void ReplaceNode(SDValue Old, SDValue New) { - DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG); - dbgs() << " with: "; New->dump(&DAG)); + LLVM_DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG); + dbgs() << " with: "; New->dump(&DAG)); DAG.ReplaceAllUsesWith(Old, New); if (UpdatedNodes) @@ -223,13 +223,12 @@ public: } void ReplaceNode(SDNode *Old, const SDValue *New) { - DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG)); + LLVM_DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG)); DAG.ReplaceAllUsesWith(Old, New); for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) { - DEBUG(dbgs() << (i == 0 ? " with: " - : " and: "); - New[i]->dump(&DAG)); + LLVM_DEBUG(dbgs() << (i == 0 ? " with: " : " and: "); + New[i]->dump(&DAG)); if (UpdatedNodes) UpdatedNodes->insert(New[i].getNode()); } @@ -408,7 +407,7 @@ SDValue SelectionDAGLegalize::ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, } SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { - DEBUG(dbgs() << "Optimizing float store operations\n"); + LLVM_DEBUG(dbgs() << "Optimizing float store operations\n"); // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' // FIXME: We shouldn't do this for TargetConstantFP's. // FIXME: move this to the DAG Combiner! Note that we can't regress due @@ -477,7 +476,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { AAMDNodes AAInfo = ST->getAAInfo(); if (!ST->isTruncatingStore()) { - DEBUG(dbgs() << "Legalizing store operation\n"); + LLVM_DEBUG(dbgs() << "Legalizing store operation\n"); if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) { ReplaceNode(ST, OptStore); return; @@ -495,15 +494,15 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { unsigned Align = ST->getAlignment(); const DataLayout &DL = DAG.getDataLayout(); if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) { - DEBUG(dbgs() << "Expanding unsupported unaligned store\n"); + LLVM_DEBUG(dbgs() << "Expanding unsupported unaligned store\n"); SDValue Result = TLI.expandUnalignedStore(ST, DAG); ReplaceNode(SDValue(ST, 0), Result); } else - DEBUG(dbgs() << "Legal store\n"); + LLVM_DEBUG(dbgs() << "Legal store\n"); break; } case TargetLowering::Custom: { - DEBUG(dbgs() << "Trying custom lowering\n"); + LLVM_DEBUG(dbgs() << "Trying custom lowering\n"); SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); if (Res && Res != SDValue(Node, 0)) ReplaceNode(SDValue(Node, 0), Res); @@ -524,7 +523,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { return; } - DEBUG(dbgs() << "Legalizing truncating store operations\n"); + LLVM_DEBUG(dbgs() << "Legalizing truncating store operations\n"); SDValue Value = ST->getValue(); EVT StVT = ST->getMemoryVT(); unsigned StWidth = StVT.getSizeInBits(); @@ -656,7 +655,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { ISD::LoadExtType ExtType = LD->getExtensionType(); if (ExtType == ISD::NON_EXTLOAD) { - DEBUG(dbgs() << "Legalizing non-extending load operation\n"); + LLVM_DEBUG(dbgs() << "Legalizing non-extending load operation\n"); MVT VT = Node->getSimpleValueType(0); SDValue RVal = SDValue(Node, 0); SDValue RChain = SDValue(Node, 1); @@ -706,7 +705,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { return; } - DEBUG(dbgs() << "Legalizing extending load operation\n"); + LLVM_DEBUG(dbgs() << "Legalizing extending load operation\n"); EVT SrcVT = LD->getMemoryVT(); unsigned SrcWidth = SrcVT.getSizeInBits(); unsigned Alignment = LD->getAlignment(); @@ -947,39 +946,9 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { } } -static TargetLowering::LegalizeAction -getStrictFPOpcodeAction(const TargetLowering &TLI, unsigned Opcode, EVT VT) { - unsigned EqOpc; - switch (Opcode) { - default: llvm_unreachable("Unexpected FP pseudo-opcode"); - case ISD::STRICT_FSQRT: EqOpc = ISD::FSQRT; break; - case ISD::STRICT_FPOW: EqOpc = ISD::FPOW; break; - case ISD::STRICT_FPOWI: EqOpc = ISD::FPOWI; break; - case ISD::STRICT_FMA: EqOpc = ISD::FMA; break; - case ISD::STRICT_FSIN: EqOpc = ISD::FSIN; break; - case ISD::STRICT_FCOS: EqOpc = ISD::FCOS; break; - case ISD::STRICT_FEXP: EqOpc = ISD::FEXP; break; - case ISD::STRICT_FEXP2: EqOpc = ISD::FEXP2; break; - case ISD::STRICT_FLOG: EqOpc = ISD::FLOG; break; - case ISD::STRICT_FLOG10: EqOpc = ISD::FLOG10; break; - case ISD::STRICT_FLOG2: EqOpc = ISD::FLOG2; break; - case ISD::STRICT_FRINT: EqOpc = ISD::FRINT; break; - case ISD::STRICT_FNEARBYINT: EqOpc = ISD::FNEARBYINT; break; - } - - auto Action = TLI.getOperationAction(EqOpc, VT); - - // We don't currently handle Custom or Promote for strict FP pseudo-ops. - // For now, we just expand for those cases. - if (Action != TargetLowering::Legal) - Action = TargetLowering::Expand; - - return Action; -} - /// Return a legal replacement for the given operation, with all legal operands. void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { - DEBUG(dbgs() << "\nLegalizing: "; Node->dump(&DAG)); + LLVM_DEBUG(dbgs() << "\nLegalizing: "; Node->dump(&DAG)); // Allow illegal target nodes and illegal registers. if (Node->getOpcode() == ISD::TargetConstant || @@ -1043,8 +1012,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::SETCC: case ISD::BR_CC: { unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 : - Node->getOpcode() == ISD::SETCC ? 2 : - Node->getOpcode() == ISD::SETCCE ? 3 : 1; + Node->getOpcode() == ISD::SETCC ? 2 : 1; unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0; MVT OpVT = Node->getOperand(CompareOperand).getSimpleValueType(); ISD::CondCode CCCode = @@ -1122,6 +1090,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { return; } break; + case ISD::STRICT_FADD: + case ISD::STRICT_FSUB: + case ISD::STRICT_FMUL: + case ISD::STRICT_FDIV: case ISD::STRICT_FSQRT: case ISD::STRICT_FMA: case ISD::STRICT_FPOW: @@ -1139,8 +1111,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT // is also legal, but if ISD::FSQRT requires expansion then so does // ISD::STRICT_FSQRT. - Action = getStrictFPOpcodeAction(TLI, Node->getOpcode(), - Node->getValueType(0)); + Action = TLI.getStrictFPOperationAction(Node->getOpcode(), + Node->getValueType(0)); break; default: if (Node->getOpcode() >= ISD::BUILTIN_OP_END) { @@ -1202,10 +1174,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { } switch (Action) { case TargetLowering::Legal: - DEBUG(dbgs() << "Legal node: nothing to do\n"); + LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n"); return; case TargetLowering::Custom: - DEBUG(dbgs() << "Trying custom legalization\n"); + LLVM_DEBUG(dbgs() << "Trying custom legalization\n"); // FIXME: The handling for custom lowering with multiple results is // a complete mess. if (SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG)) { @@ -1213,7 +1185,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { return; if (Node->getNumValues() == 1) { - DEBUG(dbgs() << "Successfully custom legalized node\n"); + LLVM_DEBUG(dbgs() << "Successfully custom legalized node\n"); // We can just directly replace this node with the lowered value. ReplaceNode(SDValue(Node, 0), Res); return; @@ -1222,11 +1194,11 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { SmallVector<SDValue, 8> ResultVals; for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) ResultVals.push_back(Res.getValue(i)); - DEBUG(dbgs() << "Successfully custom legalized node\n"); + LLVM_DEBUG(dbgs() << "Successfully custom legalized node\n"); ReplaceNode(Node, ResultVals.data()); return; } - DEBUG(dbgs() << "Could not custom legalize node\n"); + LLVM_DEBUG(dbgs() << "Could not custom legalize node\n"); LLVM_FALLTHROUGH; case TargetLowering::Expand: if (ExpandNode(Node)) @@ -1623,6 +1595,7 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS, MVT OpVT = LHS.getSimpleValueType(); ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get(); NeedInvert = false; + bool NeedSwap = false; switch (TLI.getCondCodeAction(CCCode, OpVT)) { default: llvm_unreachable("Unknown condition code action!"); case TargetLowering::Legal: @@ -1630,23 +1603,37 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS, break; case TargetLowering::Expand: { ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode); - if (TLI.isCondCodeLegal(InvCC, OpVT)) { + if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) { std::swap(LHS, RHS); CC = DAG.getCondCode(InvCC); return true; } + // Swapping operands didn't work. Try inverting the condition. + InvCC = getSetCCInverse(CCCode, OpVT.isInteger()); + if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) { + // If inverting the condition is not enough, try swapping operands + // on top of it. + InvCC = ISD::getSetCCSwappedOperands(InvCC); + NeedSwap = true; + } + if (TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) { + CC = DAG.getCondCode(InvCC); + NeedInvert = true; + if (NeedSwap) + std::swap(LHS, RHS); + return true; + } + ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID; unsigned Opc = 0; switch (CCCode) { default: llvm_unreachable("Don't know how to expand this condition!"); case ISD::SETO: - assert(TLI.getCondCodeAction(ISD::SETOEQ, OpVT) - == TargetLowering::Legal + assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) && "If SETO is expanded, SETOEQ must be legal!"); CC1 = ISD::SETOEQ; CC2 = ISD::SETOEQ; Opc = ISD::AND; break; case ISD::SETUO: - assert(TLI.getCondCodeAction(ISD::SETUNE, OpVT) - == TargetLowering::Legal + assert(TLI.isCondCodeLegal(ISD::SETUNE, OpVT) && "If SETUO is expanded, SETUNE must be legal!"); CC1 = ISD::SETUNE; CC2 = ISD::SETUNE; Opc = ISD::OR; break; case ISD::SETOEQ: @@ -1676,20 +1663,10 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS, case ISD::SETGT: case ISD::SETGE: case ISD::SETLT: - // We only support using the inverted operation, which is computed above - // and not a different manner of supporting expanding these cases. - llvm_unreachable("Don't know how to expand this condition!"); case ISD::SETNE: case ISD::SETEQ: - // Try inverting the result of the inverse condition. - InvCC = CCCode == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ; - if (TLI.isCondCodeLegal(InvCC, OpVT)) { - CC = DAG.getCondCode(InvCC); - NeedInvert = true; - return true; - } - // If inverting the condition didn't work then we have no means to expand - // the condition. + // If all combinations of inverting the condition and swapping operands + // didn't work then we have no means to expand the condition. llvm_unreachable("Don't know how to expand this condition!"); } @@ -1996,14 +1973,15 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); Entry.Node = Op; Entry.Ty = ArgTy; - Entry.IsSExt = isSigned; - Entry.IsZExt = !isSigned; + Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgVT, isSigned); + Entry.IsZExt = !TLI.shouldSignExtendTypeInLibCall(ArgVT, isSigned); Args.push_back(Entry); } SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy(DAG.getDataLayout())); - Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); + EVT RetVT = Node->getValueType(0); + Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); // By default, the input chain to this libcall is the entry node of the // function. If the libcall is going to be emitted as a tail call then @@ -2022,24 +2000,25 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, InChain = TCChain; TargetLowering::CallLoweringInfo CLI(DAG); + bool signExtend = TLI.shouldSignExtendTypeInLibCall(RetVT, isSigned); CLI.setDebugLoc(SDLoc(Node)) .setChain(InChain) .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) .setTailCall(isTailCall) - .setSExtResult(isSigned) - .setZExtResult(!isSigned) + .setSExtResult(signExtend) + .setZExtResult(!signExtend) .setIsPostTypeLegalization(true); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); if (!CallInfo.second.getNode()) { - DEBUG(dbgs() << "Created tailcall: "; DAG.getRoot().dump()); + LLVM_DEBUG(dbgs() << "Created tailcall: "; DAG.getRoot().dump()); // It's a tailcall, return the chain (which is the DAG root). return DAG.getRoot(); } - DEBUG(dbgs() << "Created libcall: "; CallInfo.first.dump()); + LLVM_DEBUG(dbgs() << "Created libcall: "; CallInfo.first.dump()); return CallInfo.first; } @@ -2325,10 +2304,10 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, EVT DestVT, const SDLoc &dl) { // TODO: Should any fast-math-flags be set for the created nodes? - DEBUG(dbgs() << "Legalizing INT_TO_FP\n"); + LLVM_DEBUG(dbgs() << "Legalizing INT_TO_FP\n"); if (Op0.getValueType() == MVT::i32 && TLI.isTypeLegal(MVT::f64)) { - DEBUG(dbgs() << "32-bit [signed|unsigned] integer to float/double " - "expansion\n"); + LLVM_DEBUG(dbgs() << "32-bit [signed|unsigned] integer to float/double " + "expansion\n"); // Get the stack frame index of a 8 byte buffer. SDValue StackSlot = DAG.CreateStackTemporary(MVT::f64); @@ -2393,7 +2372,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, // and in all alternate rounding modes. // TODO: Generalize this for use with other types. if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f64) { - DEBUG(dbgs() << "Converting unsigned i64 to f64\n"); + LLVM_DEBUG(dbgs() << "Converting unsigned i64 to f64\n"); SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, MVT::i64); SDValue TwoP84PlusTwoP52 = @@ -2416,7 +2395,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, // TODO: Generalize this for use with other types. if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) { - DEBUG(dbgs() << "Converting unsigned i64 to f32\n"); + LLVM_DEBUG(dbgs() << "Converting unsigned i64 to f32\n"); // For unsigned conversions, convert them to signed conversions using the // algorithm from the x86_64 __floatundidf in compiler_rt. if (!isSigned) { @@ -2851,7 +2830,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, } bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { - DEBUG(dbgs() << "Trying to expand node\n"); + LLVM_DEBUG(dbgs() << "Trying to expand node\n"); SmallVector<SDValue, 8> Results; SDLoc dl(Node); SDValue Tmp1, Tmp2, Tmp3, Tmp4; @@ -2965,12 +2944,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::ZERO_EXTEND: LHS = DAG.getNode(ISD::AssertZext, dl, OuterType, Res, DAG.getValueType(AtomicType)); - RHS = DAG.getNode(ISD::ZERO_EXTEND, dl, OuterType, Node->getOperand(2)); + RHS = DAG.getZeroExtendInReg(Node->getOperand(2), dl, AtomicType); ExtRes = LHS; break; case ISD::ANY_EXTEND: LHS = DAG.getZeroExtendInReg(Res, dl, AtomicType); - RHS = DAG.getNode(ISD::ZERO_EXTEND, dl, OuterType, Node->getOperand(2)); + RHS = DAG.getZeroExtendInReg(Node->getOperand(2), dl, AtomicType); break; default: llvm_unreachable("Invalid atomic op extension"); @@ -3309,7 +3288,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { } break; case ISD::FP_TO_FP16: - DEBUG(dbgs() << "Legalizing FP_TO_FP16\n"); + LLVM_DEBUG(dbgs() << "Legalizing FP_TO_FP16\n"); if (!TLI.useSoftFloat() && TM.Options.UnsafeFPMath) { SDValue Op = Node->getOperand(0); MVT SVT = Op.getSimpleValueType(); @@ -3523,15 +3502,25 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::USUBO: { SDValue LHS = Node->getOperand(0); SDValue RHS = Node->getOperand(1); - SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::UADDO ? - ISD::ADD : ISD::SUB, dl, LHS.getValueType(), - LHS, RHS); + bool IsAdd = Node->getOpcode() == ISD::UADDO; + // If ADD/SUBCARRY is legal, use that instead. + unsigned OpcCarry = IsAdd ? ISD::ADDCARRY : ISD::SUBCARRY; + if (TLI.isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) { + SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1)); + SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(), + { LHS, RHS, CarryIn }); + Results.push_back(SDValue(NodeCarry.getNode(), 0)); + Results.push_back(SDValue(NodeCarry.getNode(), 1)); + break; + } + + SDValue Sum = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl, + LHS.getValueType(), LHS, RHS); Results.push_back(Sum); EVT ResultType = Node->getValueType(1); EVT SetCCType = getSetCCResultType(Node->getValueType(0)); - ISD::CondCode CC - = Node->getOpcode() == ISD::UADDO ? ISD::SETULT : ISD::SETUGT; + ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT; SDValue SetCC = DAG.getSetCC(dl, SetCCType, Sum, LHS, CC); Results.push_back(DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType)); @@ -3682,8 +3671,17 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { unsigned EntrySize = DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD); - Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index, - DAG.getConstant(EntrySize, dl, Index.getValueType())); + // For power-of-two jumptable entry sizes convert multiplication to a shift. + // This transformation needs to be done here since otherwise the MIPS + // backend will end up emitting a three instruction multiply sequence + // instead of a single shift and MSP430 will call a runtime function. + if (llvm::isPowerOf2_32(EntrySize)) + Index = DAG.getNode( + ISD::SHL, dl, Index.getValueType(), Index, + DAG.getConstant(llvm::Log2_32(EntrySize), dl, Index.getValueType())); + else + Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index, + DAG.getConstant(EntrySize, dl, Index.getValueType())); SDValue Addr = DAG.getNode(ISD::ADD, dl, Index.getValueType(), Index, Table); @@ -3699,7 +3697,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, TLI.getPICJumpTableRelocBase(Table, DAG)); } - Tmp1 = DAG.getNode(ISD::BRIND, dl, MVT::Other, LD.getValue(1), Addr); + + Tmp1 = TLI.expandIndirectJTBranch(dl, LD.getValue(1), Addr, DAG); Results.push_back(Tmp1); break; } @@ -3718,7 +3717,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { if (Tmp2.isUndef() || (Tmp2.getOpcode() == ISD::AND && isa<ConstantSDNode>(Tmp2.getOperand(1)) && - dyn_cast<ConstantSDNode>(Tmp2.getOperand(1))->getZExtValue() == 1)) + cast<ConstantSDNode>(Tmp2.getOperand(1))->getZExtValue() == 1)) Tmp3 = Tmp2; else Tmp3 = DAG.getNode(ISD::AND, dl, Tmp2.getValueType(), Tmp2, @@ -3757,7 +3756,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { // illegal; expand it into a SELECT_CC. EVT VT = Node->getValueType(0); int TrueValue; - switch (TLI.getBooleanContents(Tmp1->getValueType(0))) { + switch (TLI.getBooleanContents(Tmp1.getValueType())) { case TargetLowering::ZeroOrOneBooleanContent: case TargetLowering::UndefinedBooleanContent: TrueValue = 1; @@ -3782,7 +3781,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { SDValue CC = Node->getOperand(4); ISD::CondCode CCOp = cast<CondCodeSDNode>(CC)->get(); - if (TLI.isCondCodeLegal(CCOp, Tmp1.getSimpleValueType())) { + if (TLI.isCondCodeLegalOrCustom(CCOp, Tmp1.getSimpleValueType())) { // If the condition code is legal, then we need to expand this // node using SETCC and SELECT. EVT CmpVT = Tmp1.getValueType(); @@ -3803,7 +3802,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { // version (or vice versa). ISD::CondCode InvCC = ISD::getSetCCInverse(CCOp, Tmp1.getValueType().isInteger()); - if (TLI.isCondCodeLegal(InvCC, Tmp1.getSimpleValueType())) { + if (TLI.isCondCodeLegalOrCustom(InvCC, Tmp1.getSimpleValueType())) { // Use the new condition code and swap true and false Legalized = true; Tmp1 = DAG.getSelectCC(dl, Tmp1, Tmp2, Tmp4, Tmp3, InvCC); @@ -3811,7 +3810,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { // If The inverse is not legal, then try to swap the arguments using // the inverse condition code. ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InvCC); - if (TLI.isCondCodeLegal(SwapInvCC, Tmp1.getSimpleValueType())) { + if (TLI.isCondCodeLegalOrCustom(SwapInvCC, Tmp1.getSimpleValueType())) { // The swapped inverse condition is legal, so swap true and false, // lhs and rhs. Legalized = true; @@ -3904,6 +3903,46 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { ReplaceNode(SDValue(Node, 0), Result); break; } + case ISD::ROTL: + case ISD::ROTR: { + bool IsLeft = Node->getOpcode() == ISD::ROTL; + SDValue Op0 = Node->getOperand(0), Op1 = Node->getOperand(1); + EVT ResVT = Node->getValueType(0); + EVT OpVT = Op0.getValueType(); + assert(OpVT == ResVT && + "The result and the operand types of rotate should match"); + EVT ShVT = Op1.getValueType(); + SDValue Width = DAG.getConstant(OpVT.getScalarSizeInBits(), dl, ShVT); + + // If a rotate in the other direction is legal, use it. + unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL; + if (TLI.isOperationLegal(RevRot, ResVT)) { + SDValue Sub = DAG.getNode(ISD::SUB, dl, ShVT, Width, Op1); + Results.push_back(DAG.getNode(RevRot, dl, ResVT, Op0, Sub)); + break; + } + + // Otherwise, + // (rotl x, c) -> (or (shl x, (and c, w-1)), (srl x, (and w-c, w-1))) + // (rotr x, c) -> (or (srl x, (and c, w-1)), (shl x, (and w-c, w-1))) + // + assert(isPowerOf2_32(OpVT.getScalarSizeInBits()) && + "Expecting the type bitwidth to be a power of 2"); + unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL; + unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL; + SDValue Width1 = DAG.getNode(ISD::SUB, dl, ShVT, + Width, DAG.getConstant(1, dl, ShVT)); + SDValue NegOp1 = DAG.getNode(ISD::SUB, dl, ShVT, Width, Op1); + SDValue And0 = DAG.getNode(ISD::AND, dl, ShVT, Op1, Width1); + SDValue And1 = DAG.getNode(ISD::AND, dl, ShVT, NegOp1, Width1); + + SDValue Or = DAG.getNode(ISD::OR, dl, ResVT, + DAG.getNode(ShOpc, dl, ResVT, Op0, And0), + DAG.getNode(HsOpc, dl, ResVT, Op0, And1)); + Results.push_back(Or); + break; + } + case ISD::GLOBAL_OFFSET_TABLE: case ISD::GlobalAddress: case ISD::GlobalTLSAddress: @@ -3919,19 +3958,21 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { // Replace the original node with the legalized result. if (Results.empty()) { - DEBUG(dbgs() << "Cannot expand node\n"); + LLVM_DEBUG(dbgs() << "Cannot expand node\n"); return false; } - DEBUG(dbgs() << "Succesfully expanded node\n"); + LLVM_DEBUG(dbgs() << "Succesfully expanded node\n"); ReplaceNode(Node, Results.data()); return true; } void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { - DEBUG(dbgs() << "Trying to convert node to libcall\n"); + LLVM_DEBUG(dbgs() << "Trying to convert node to libcall\n"); SmallVector<SDValue, 8> Results; SDLoc dl(Node); + // FIXME: Check flags on the node to see if we can use a finite call. + bool CanUseFiniteLibCall = TM.Options.NoInfsFPMath && TM.Options.NoNaNsFPMath; unsigned Opc = Node->getOpcode(); switch (Opc) { case ISD::ATOMIC_FENCE: { @@ -3960,6 +4001,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { case ISD::ATOMIC_LOAD_ADD: case ISD::ATOMIC_LOAD_SUB: case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_CLR: case ISD::ATOMIC_LOAD_OR: case ISD::ATOMIC_LOAD_XOR: case ISD::ATOMIC_LOAD_NAND: @@ -4026,33 +4068,68 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { break; case ISD::FLOG: case ISD::STRICT_FLOG: - Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64, - RTLIB::LOG_F80, RTLIB::LOG_F128, - RTLIB::LOG_PPCF128)); + if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log_finite)) + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_FINITE_F32, + RTLIB::LOG_FINITE_F64, + RTLIB::LOG_FINITE_F80, + RTLIB::LOG_FINITE_F128, + RTLIB::LOG_FINITE_PPCF128)); + else + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64, + RTLIB::LOG_F80, RTLIB::LOG_F128, + RTLIB::LOG_PPCF128)); break; case ISD::FLOG2: case ISD::STRICT_FLOG2: - Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64, - RTLIB::LOG2_F80, RTLIB::LOG2_F128, - RTLIB::LOG2_PPCF128)); + if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log2_finite)) + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_FINITE_F32, + RTLIB::LOG2_FINITE_F64, + RTLIB::LOG2_FINITE_F80, + RTLIB::LOG2_FINITE_F128, + RTLIB::LOG2_FINITE_PPCF128)); + else + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64, + RTLIB::LOG2_F80, RTLIB::LOG2_F128, + RTLIB::LOG2_PPCF128)); break; case ISD::FLOG10: case ISD::STRICT_FLOG10: - Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64, - RTLIB::LOG10_F80, RTLIB::LOG10_F128, - RTLIB::LOG10_PPCF128)); + if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log10_finite)) + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_FINITE_F32, + RTLIB::LOG10_FINITE_F64, + RTLIB::LOG10_FINITE_F80, + RTLIB::LOG10_FINITE_F128, + RTLIB::LOG10_FINITE_PPCF128)); + else + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64, + RTLIB::LOG10_F80, RTLIB::LOG10_F128, + RTLIB::LOG10_PPCF128)); break; case ISD::FEXP: case ISD::STRICT_FEXP: - Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64, - RTLIB::EXP_F80, RTLIB::EXP_F128, - RTLIB::EXP_PPCF128)); + if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_exp_finite)) + Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_FINITE_F32, + RTLIB::EXP_FINITE_F64, + RTLIB::EXP_FINITE_F80, + RTLIB::EXP_FINITE_F128, + RTLIB::EXP_FINITE_PPCF128)); + else + Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64, + RTLIB::EXP_F80, RTLIB::EXP_F128, + RTLIB::EXP_PPCF128)); break; case ISD::FEXP2: case ISD::STRICT_FEXP2: - Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64, - RTLIB::EXP2_F80, RTLIB::EXP2_F128, - RTLIB::EXP2_PPCF128)); + if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_exp2_finite)) + Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_FINITE_F32, + RTLIB::EXP2_FINITE_F64, + RTLIB::EXP2_FINITE_F80, + RTLIB::EXP2_FINITE_F128, + RTLIB::EXP2_FINITE_PPCF128)); + else + Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64, + RTLIB::EXP2_F80, RTLIB::EXP2_F128, + RTLIB::EXP2_PPCF128)); break; case ISD::FTRUNC: Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, @@ -4098,9 +4175,16 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { break; case ISD::FPOW: case ISD::STRICT_FPOW: - Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64, - RTLIB::POW_F80, RTLIB::POW_F128, - RTLIB::POW_PPCF128)); + if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_pow_finite)) + Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_FINITE_F32, + RTLIB::POW_FINITE_F64, + RTLIB::POW_FINITE_F80, + RTLIB::POW_FINITE_F128, + RTLIB::POW_FINITE_PPCF128)); + else + Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64, + RTLIB::POW_F80, RTLIB::POW_F128, + RTLIB::POW_PPCF128)); break; case ISD::FDIV: Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64, @@ -4184,10 +4268,10 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { // Replace the original node with the legalized result. if (!Results.empty()) { - DEBUG(dbgs() << "Successfully converted node to libcall\n"); + LLVM_DEBUG(dbgs() << "Successfully converted node to libcall\n"); ReplaceNode(Node, Results.data()); } else - DEBUG(dbgs() << "Could not convert node to libcall\n"); + LLVM_DEBUG(dbgs() << "Could not convert node to libcall\n"); } // Determine the vector type to use in place of an original scalar element when @@ -4201,7 +4285,7 @@ static MVT getPromotedVectorElementType(const TargetLowering &TLI, } void SelectionDAGLegalize::PromoteNode(SDNode *Node) { - DEBUG(dbgs() << "Trying to promote node\n"); + LLVM_DEBUG(dbgs() << "Trying to promote node\n"); SmallVector<SDValue, 8> Results; MVT OVT = Node->getSimpleValueType(0); if (Node->getOpcode() == ISD::UINT_TO_FP || @@ -4254,7 +4338,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { ISD::SRL, dl, NVT, Tmp1, DAG.getConstant(DiffBits, dl, TLI.getShiftAmountTy(NVT, DAG.getDataLayout()))); - Results.push_back(Tmp1); + + Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1)); break; } case ISD::FP_TO_UINT: @@ -4638,10 +4723,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { // Replace the original node with the legalized result. if (!Results.empty()) { - DEBUG(dbgs() << "Successfully promoted node\n"); + LLVM_DEBUG(dbgs() << "Successfully promoted node\n"); ReplaceNode(Node, Results.data()); } else - DEBUG(dbgs() << "Could not promote node\n"); + LLVM_DEBUG(dbgs() << "Could not promote node\n"); } /// This is the entry point for the file. diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index e28a3aa47ca3..b0ae1e0399fb 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -47,8 +47,8 @@ static RTLIB::Libcall GetFPLibCall(EVT VT, //===----------------------------------------------------------------------===// bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { - DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG); - dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG); + dbgs() << "\n"); SDValue R = SDValue(); switch (N->getOpcode()) { @@ -738,8 +738,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { //===----------------------------------------------------------------------===// bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { - DEBUG(dbgs() << "Soften float operand " << OpNo << ": "; N->dump(&DAG); - dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Soften float operand " << OpNo << ": "; N->dump(&DAG); + dbgs() << "\n"); SDValue Res = SDValue(); switch (N->getOpcode()) { @@ -1039,7 +1039,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) { /// have invalid operands or may have other results that need promotion, we just /// know that (at least) one result needs expansion. void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { - DEBUG(dbgs() << "Expand float result: "; N->dump(&DAG); dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Expand float result: "; N->dump(&DAG); dbgs() << "\n"); SDValue Lo, Hi; Lo = Hi = SDValue(); @@ -1538,7 +1538,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, /// types of the node are known to be legal, but other operands of the node may /// need promotion or expansion as well as the specified one. bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) { - DEBUG(dbgs() << "Expand float operand: "; N->dump(&DAG); dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Expand float operand: "; N->dump(&DAG); dbgs() << "\n"); SDValue Res = SDValue(); // See if the target wants to custom expand this node. @@ -1658,18 +1658,6 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) { EVT RVT = N->getValueType(0); SDLoc dl(N); - // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on - // PPC (the libcall is not available). FIXME: Do this in a less hacky way. - if (RVT == MVT::i32) { - assert(N->getOperand(0).getValueType() == MVT::ppcf128 && - "Logic only correct for ppcf128!"); - SDValue Res = DAG.getNode(ISD::FP_ROUND_INREG, dl, MVT::ppcf128, - N->getOperand(0), DAG.getValueType(MVT::f64)); - Res = DAG.getNode(ISD::FP_ROUND, dl, MVT::f64, Res, - DAG.getIntPtrConstant(1, dl)); - return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res); - } - RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!"); return TLI.makeLibCall(DAG, LC, RVT, N->getOperand(0), false, dl).first; @@ -1679,31 +1667,6 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { EVT RVT = N->getValueType(0); SDLoc dl(N); - // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on - // PPC (the libcall is not available). FIXME: Do this in a less hacky way. - if (RVT == MVT::i32) { - assert(N->getOperand(0).getValueType() == MVT::ppcf128 && - "Logic only correct for ppcf128!"); - const uint64_t TwoE31[] = {0x41e0000000000000LL, 0}; - APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31)); - SDValue Tmp = DAG.getConstantFP(APF, dl, MVT::ppcf128); - // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X - // FIXME: generated code sucks. - // TODO: Are there fast-math-flags to propagate to this FSUB? - return DAG.getSelectCC(dl, N->getOperand(0), Tmp, - DAG.getNode(ISD::ADD, dl, MVT::i32, - DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, - DAG.getNode(ISD::FSUB, dl, - MVT::ppcf128, - N->getOperand(0), - Tmp)), - DAG.getConstant(0x80000000, dl, - MVT::i32)), - DAG.getNode(ISD::FP_TO_SINT, dl, - MVT::i32, N->getOperand(0)), - ISD::SETGE); - } - RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!"); return TLI.makeLibCall(DAG, LC, N->getValueType(0), N->getOperand(0), @@ -2139,13 +2102,12 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_LOAD(SDNode *N) { // Load the value as an integer value with the same number of bits. EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); - auto MMOFlags = - L->getMemOperand()->getFlags() & - ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable); SDValue newL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), IVT, SDLoc(N), L->getChain(), L->getBasePtr(), L->getOffset(), L->getPointerInfo(), IVT, - L->getAlignment(), MMOFlags, L->getAAInfo()); + L->getAlignment(), + L->getMemOperand()->getFlags(), + L->getAAInfo()); // Legalize the chain result by replacing uses of the old value chain with the // new one ReplaceValueWith(SDValue(N, 1), newL.getValue(1)); diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 29f0bb475b08..63a1ea13a5f5 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -36,12 +36,13 @@ using namespace llvm; /// may also have invalid operands or may have other results that need /// expansion, we just know that (at least) one result needs promotion. void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { - DEBUG(dbgs() << "Promote integer result: "; N->dump(&DAG); dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Promote integer result: "; N->dump(&DAG); + dbgs() << "\n"); SDValue Res = SDValue(); // See if the target wants to custom expand this node. if (CustomLowerNode(N, N->getValueType(ResNo), true)) { - DEBUG(dbgs() << "Node has been custom expanded, done\n"); + LLVM_DEBUG(dbgs() << "Node has been custom expanded, done\n"); return; } @@ -146,6 +147,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::ATOMIC_LOAD_ADD: case ISD::ATOMIC_LOAD_SUB: case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_CLR: case ISD::ATOMIC_LOAD_OR: case ISD::ATOMIC_LOAD_XOR: case ISD::ATOMIC_LOAD_NAND: @@ -501,7 +503,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) { SDLoc dl(N); SDValue Ops[] = {N->getChain(), ExtSrc0, N->getMask(), N->getBasePtr(), - N->getIndex()}; + N->getIndex(), N->getScale() }; SDValue Res = DAG.getMaskedGather(DAG.getVTList(NVT, MVT::Other), N->getMemoryVT(), dl, Ops, N->getMemOperand()); @@ -586,43 +588,39 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) { - EVT SVT = getSetCCResultType(N->getOperand(0).getValueType()); - + EVT InVT = N->getOperand(0).getValueType(); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - // Only use the result of getSetCCResultType if it is legal, - // otherwise just use the promoted result type (NVT). - if (!TLI.isTypeLegal(SVT)) - SVT = NVT; + EVT SVT = getSetCCResultType(InVT); + + // If we got back a type that needs to be promoted, this likely means the + // the input type also needs to be promoted. So get the promoted type for + // the input and try the query again. + if (getTypeAction(SVT) == TargetLowering::TypePromoteInteger) { + if (getTypeAction(InVT) == TargetLowering::TypePromoteInteger) { + InVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT); + SVT = getSetCCResultType(InVT); + } else { + // Input type isn't promoted, just use the default promoted type. + SVT = NVT; + } + } SDLoc dl(N); assert(SVT.isVector() == N->getOperand(0).getValueType().isVector() && "Vector compare must return a vector result!"); - SDValue LHS = N->getOperand(0); - SDValue RHS = N->getOperand(1); - if (LHS.getValueType() != RHS.getValueType()) { - if (getTypeAction(LHS.getValueType()) == TargetLowering::TypePromoteInteger && - !LHS.getValueType().isVector()) - LHS = GetPromotedInteger(LHS); - if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger && - !RHS.getValueType().isVector()) - RHS = GetPromotedInteger(RHS); - } - // Get the SETCC result using the canonical SETCC type. - SDValue SetCC = DAG.getNode(N->getOpcode(), dl, SVT, LHS, RHS, - N->getOperand(2)); + SDValue SetCC = DAG.getNode(N->getOpcode(), dl, SVT, N->getOperand(0), + N->getOperand(1), N->getOperand(2)); // Convert to the expected type. return DAG.getSExtOrTrunc(SetCC, dl, NVT); } SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) { - SDValue LHS = N->getOperand(0); + SDValue LHS = GetPromotedInteger(N->getOperand(0)); SDValue RHS = N->getOperand(1); - if (getTypeAction(LHS.getValueType()) == TargetLowering::TypePromoteInteger) - LHS = GetPromotedInteger(LHS); if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) RHS = ZExtPromotedInteger(RHS); return DAG.getNode(ISD::SHL, SDLoc(N), LHS.getValueType(), LHS, RHS); @@ -661,22 +659,18 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ZExtIntBinOp(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) { - SDValue LHS = N->getOperand(0); - SDValue RHS = N->getOperand(1); // The input value must be properly sign extended. - if (getTypeAction(LHS.getValueType()) == TargetLowering::TypePromoteInteger) - LHS = SExtPromotedInteger(LHS); + SDValue LHS = SExtPromotedInteger(N->getOperand(0)); + SDValue RHS = N->getOperand(1); if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) RHS = ZExtPromotedInteger(RHS); return DAG.getNode(ISD::SRA, SDLoc(N), LHS.getValueType(), LHS, RHS); } SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) { - SDValue LHS = N->getOperand(0); - SDValue RHS = N->getOperand(1); // The input value must be properly zero extended. - if (getTypeAction(LHS.getValueType()) == TargetLowering::TypePromoteInteger) - LHS = ZExtPromotedInteger(LHS); + SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); + SDValue RHS = N->getOperand(1); if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) RHS = ZExtPromotedInteger(RHS); return DAG.getNode(ISD::SRL, SDLoc(N), LHS.getValueType(), LHS, RHS); @@ -904,11 +898,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) { /// result types of the node are known to be legal, but other operands of the /// node may need promotion or expansion as well as the specified one. bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { - DEBUG(dbgs() << "Promote integer operand: "; N->dump(&DAG); dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Promote integer operand: "; N->dump(&DAG); + dbgs() << "\n"); SDValue Res = SDValue(); if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) { - DEBUG(dbgs() << "Node has been custom lowered, done\n"); + LLVM_DEBUG(dbgs() << "Node has been custom lowered, done\n"); return false; } @@ -1001,11 +996,11 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS, // than the width of NewLHS/NewRH, we can avoid inserting real truncate // instruction, which is redudant eventually. unsigned OpLEffectiveBits = - OpL.getValueSizeInBits() - DAG.ComputeNumSignBits(OpL) + 1; + OpL.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(OpL) + 1; unsigned OpREffectiveBits = - OpR.getValueSizeInBits() - DAG.ComputeNumSignBits(OpR) + 1; - if (OpLEffectiveBits <= NewLHS.getValueSizeInBits() && - OpREffectiveBits <= NewRHS.getValueSizeInBits()) { + OpR.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(OpR) + 1; + if (OpLEffectiveBits <= NewLHS.getScalarValueSizeInBits() && + OpREffectiveBits <= NewRHS.getScalarValueSizeInBits()) { NewLHS = OpL; NewRHS = OpR; } else { @@ -1356,7 +1351,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo) { /// have invalid operands or may have other results that need promotion, we just /// know that (at least) one result needs expansion. void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { - DEBUG(dbgs() << "Expand integer result: "; N->dump(&DAG); dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Expand integer result: "; N->dump(&DAG); + dbgs() << "\n"); SDValue Lo, Hi; Lo = Hi = SDValue(); @@ -1413,6 +1409,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::ATOMIC_LOAD_ADD: case ISD::ATOMIC_LOAD_SUB: case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_CLR: case ISD::ATOMIC_LOAD_OR: case ISD::ATOMIC_LOAD_XOR: case ISD::ATOMIC_LOAD_NAND: @@ -2893,7 +2890,8 @@ void DAGTypeLegalizer::ExpandIntRes_ATOMIC_LOAD(SDNode *N, /// result types of the node are known to be legal, but other operands of the /// node may need promotion or expansion as well as the specified one. bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { - DEBUG(dbgs() << "Expand integer operand: "; N->dump(&DAG); dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Expand integer operand: "; N->dump(&DAG); + dbgs() << "\n"); SDValue Res = SDValue(); if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) @@ -2915,7 +2913,6 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::SCALAR_TO_VECTOR: Res = ExpandOp_SCALAR_TO_VECTOR(N); break; case ISD::SELECT_CC: Res = ExpandIntOp_SELECT_CC(N); break; case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break; - case ISD::SETCCE: Res = ExpandIntOp_SETCCE(N); break; case ISD::SETCCCARRY: Res = ExpandIntOp_SETCCCARRY(N); break; case ISD::SINT_TO_FP: Res = ExpandIntOp_SINT_TO_FP(N); break; case ISD::STORE: Res = ExpandIntOp_STORE(cast<StoreSDNode>(N), OpNo); break; @@ -3051,15 +3048,14 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, return; } - // Lower with SETCCE or SETCCCARRY if the target supports it. + // Lower with SETCCCARRY if the target supports it. EVT HiVT = LHSHi.getValueType(); EVT ExpandVT = TLI.getTypeToExpandTo(*DAG.getContext(), HiVT); bool HasSETCCCARRY = TLI.isOperationLegalOrCustom(ISD::SETCCCARRY, ExpandVT); // FIXME: Make all targets support this, then remove the other lowering. - if (HasSETCCCARRY || - TLI.getOperationAction(ISD::SETCCE, ExpandVT) == TargetLowering::Custom) { - // SETCCE/SETCCCARRY can detect < and >= directly. For > and <=, flip + if (HasSETCCCARRY) { + // SETCCCARRY can detect < and >= directly. For > and <=, flip // operands and condition code. bool FlipOperands = false; switch (CCCode) { @@ -3074,17 +3070,15 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, std::swap(LHSHi, RHSHi); } // Perform a wide subtraction, feeding the carry from the low part into - // SETCCE/SETCCCARRY. The SETCCE/SETCCCARRY operation is essentially - // looking at the high part of the result of LHS - RHS. It is negative - // iff LHS < RHS. It is zero or positive iff LHS >= RHS. + // SETCCCARRY. The SETCCCARRY operation is essentially looking at the high + // part of the result of LHS - RHS. It is negative iff LHS < RHS. It is + // zero or positive iff LHS >= RHS. EVT LoVT = LHSLo.getValueType(); - SDVTList VTList = DAG.getVTList( - LoVT, HasSETCCCARRY ? getSetCCResultType(LoVT) : MVT::Glue); - SDValue LowCmp = DAG.getNode(HasSETCCCARRY ? ISD::USUBO : ISD::SUBC, dl, - VTList, LHSLo, RHSLo); - SDValue Res = DAG.getNode(HasSETCCCARRY ? ISD::SETCCCARRY : ISD::SETCCE, dl, - getSetCCResultType(HiVT), LHSHi, RHSHi, - LowCmp.getValue(1), DAG.getCondCode(CCCode)); + SDVTList VTList = DAG.getVTList(LoVT, getSetCCResultType(LoVT)); + SDValue LowCmp = DAG.getNode(ISD::USUBO, dl, VTList, LHSLo, RHSLo); + SDValue Res = DAG.getNode(ISD::SETCCCARRY, dl, getSetCCResultType(HiVT), + LHSHi, RHSHi, LowCmp.getValue(1), + DAG.getCondCode(CCCode)); NewLHS = Res; NewRHS = SDValue(); return; @@ -3152,24 +3146,6 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) { DAG.UpdateNodeOperands(N, NewLHS, NewRHS, DAG.getCondCode(CCCode)), 0); } -SDValue DAGTypeLegalizer::ExpandIntOp_SETCCE(SDNode *N) { - SDValue LHS = N->getOperand(0); - SDValue RHS = N->getOperand(1); - SDValue Carry = N->getOperand(2); - SDValue Cond = N->getOperand(3); - SDLoc dl = SDLoc(N); - - SDValue LHSLo, LHSHi, RHSLo, RHSHi; - GetExpandedInteger(LHS, LHSLo, LHSHi); - GetExpandedInteger(RHS, RHSLo, RHSHi); - - // Expand to a SUBE for the low part and a smaller SETCCE for the high. - SDVTList VTList = DAG.getVTList(LHSLo.getValueType(), MVT::Glue); - SDValue LowCmp = DAG.getNode(ISD::SUBE, dl, VTList, LHSLo, RHSLo, Carry); - return DAG.getNode(ISD::SETCCE, dl, N->getValueType(0), LHSHi, RHSHi, - LowCmp.getValue(1), Cond); -} - SDValue DAGTypeLegalizer::ExpandIntOp_SETCCCARRY(SDNode *N) { SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); @@ -3497,21 +3473,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { assert(NumElem * NumOperands == NumOutElem && "Unexpected number of elements"); - // If the input type is legal and we can promote it to a legal type with the - // same element size, go ahead do that to create a new concat. - if (getTypeAction(N->getOperand(0).getValueType()) == - TargetLowering::TypeLegal) { - EVT InPromotedTy = EVT::getVectorVT(*DAG.getContext(), OutElemTy, NumElem); - if (TLI.isTypeLegal(InPromotedTy)) { - SmallVector<SDValue, 8> Ops(NumOperands); - for (unsigned i = 0; i < NumOperands; ++i) { - Ops[i] = DAG.getNode(ISD::ANY_EXTEND, dl, InPromotedTy, - N->getOperand(i)); - } - return DAG.getNode(ISD::CONCAT_VECTORS, dl, NOutVT, Ops); - } - } - // Take the elements from the first vector. SmallVector<SDValue, 8> Ops(NumOutElem); for (unsigned i = 0; i < NumOperands; ++i) { diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 4438ee7878b8..a9f144c06e9a 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -84,9 +84,11 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { SDValue Res(&Node, i); EVT VT = Res.getValueType(); bool Failed = false; + // Don't create a value in map. + auto ResId = (ValueToIdMap.count(Res)) ? ValueToIdMap[Res] : 0; unsigned Mapped = 0; - if (ReplacedValues.find(Res) != ReplacedValues.end()) { + if (ResId && (ReplacedValues.find(ResId) != ReplacedValues.end())) { Mapped |= 1; // Check that remapped values are only used by nodes marked NewNode. for (SDNode::use_iterator UI = Node.use_begin(), UE = Node.use_end(); @@ -97,30 +99,32 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { // Check that the final result of applying ReplacedValues is not // marked NewNode. - SDValue NewVal = ReplacedValues[Res]; - DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.find(NewVal); + auto NewValId = ReplacedValues[ResId]; + auto I = ReplacedValues.find(NewValId); while (I != ReplacedValues.end()) { - NewVal = I->second; - I = ReplacedValues.find(NewVal); + NewValId = I->second; + I = ReplacedValues.find(NewValId); } + SDValue NewVal = getSDValue(NewValId); + (void)NewVal; assert(NewVal.getNode()->getNodeId() != NewNode && "ReplacedValues maps to a new node!"); } - if (PromotedIntegers.find(Res) != PromotedIntegers.end()) + if (ResId && PromotedIntegers.find(ResId) != PromotedIntegers.end()) Mapped |= 2; - if (SoftenedFloats.find(Res) != SoftenedFloats.end()) + if (ResId && SoftenedFloats.find(ResId) != SoftenedFloats.end()) Mapped |= 4; - if (ScalarizedVectors.find(Res) != ScalarizedVectors.end()) + if (ResId && ScalarizedVectors.find(ResId) != ScalarizedVectors.end()) Mapped |= 8; - if (ExpandedIntegers.find(Res) != ExpandedIntegers.end()) + if (ResId && ExpandedIntegers.find(ResId) != ExpandedIntegers.end()) Mapped |= 16; - if (ExpandedFloats.find(Res) != ExpandedFloats.end()) + if (ResId && ExpandedFloats.find(ResId) != ExpandedFloats.end()) Mapped |= 32; - if (SplitVectors.find(Res) != SplitVectors.end()) + if (ResId && SplitVectors.find(ResId) != SplitVectors.end()) Mapped |= 64; - if (WidenedVectors.find(Res) != WidenedVectors.end()) + if (ResId && WidenedVectors.find(ResId) != WidenedVectors.end()) Mapped |= 128; - if (PromotedFloats.find(Res) != PromotedFloats.end()) + if (ResId && PromotedFloats.find(ResId) != PromotedFloats.end()) Mapped |= 256; if (Node.getNodeId() != Processed) { @@ -224,9 +228,9 @@ bool DAGTypeLegalizer::run() { assert(N->getNodeId() == ReadyToProcess && "Node should be ready if on worklist!"); - DEBUG(dbgs() << "Legalizing node: "; N->dump(&DAG)); + LLVM_DEBUG(dbgs() << "Legalizing node: "; N->dump(&DAG)); if (IgnoreNodeResults(N)) { - DEBUG(dbgs() << "Ignoring node results\n"); + LLVM_DEBUG(dbgs() << "Ignoring node results\n"); goto ScanOperands; } @@ -234,11 +238,11 @@ bool DAGTypeLegalizer::run() { // types are illegal. for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) { EVT ResultVT = N->getValueType(i); - DEBUG(dbgs() << "Analyzing result type: " << - ResultVT.getEVTString() << "\n"); + LLVM_DEBUG(dbgs() << "Analyzing result type: " << ResultVT.getEVTString() + << "\n"); switch (getTypeAction(ResultVT)) { case TargetLowering::TypeLegal: - DEBUG(dbgs() << "Legal result type\n"); + LLVM_DEBUG(dbgs() << "Legal result type\n"); break; // The following calls must take care of *all* of the node's results, // not just the illegal result they were passed (this includes results @@ -296,11 +300,11 @@ ScanOperands: continue; const auto Op = N->getOperand(i); - DEBUG(dbgs() << "Analyzing operand: "; Op.dump(&DAG)); + LLVM_DEBUG(dbgs() << "Analyzing operand: "; Op.dump(&DAG)); EVT OpVT = Op.getValueType(); switch (getTypeAction(OpVT)) { case TargetLowering::TypeLegal: - DEBUG(dbgs() << "Legal operand\n"); + LLVM_DEBUG(dbgs() << "Legal operand\n"); continue; // The following calls must either replace all of the node's results // using ReplaceValueWith, and return "false"; or update the node's @@ -370,7 +374,8 @@ ScanOperands: } if (i == NumOperands) { - DEBUG(dbgs() << "Legally typed node: "; N->dump(&DAG); dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Legally typed node: "; N->dump(&DAG); + dbgs() << "\n"); } } NodeDone: @@ -490,9 +495,6 @@ SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) { if (N->getNodeId() != NewNode && N->getNodeId() != Unanalyzed) return N; - // Remove any stale map entries. - ExpungeNode(N); - // Okay, we know that this node is new. Recursively walk all of its operands // to see if they are new also. The depth of this walk is bounded by the size // of the new tree that was constructed (usually 2-3 nodes), so we don't worry @@ -543,7 +545,6 @@ SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) { // to remap the operands, since they are the same as the operands we // remapped above. N = M; - ExpungeNode(N); } } @@ -564,100 +565,25 @@ void DAGTypeLegalizer::AnalyzeNewValue(SDValue &Val) { RemapValue(Val); } -/// If N has a bogus mapping in ReplacedValues, eliminate it. -/// This can occur when a node is deleted then reallocated as a new node - -/// the mapping in ReplacedValues applies to the deleted node, not the new -/// one. -/// The only map that can have a deleted node as a source is ReplacedValues. -/// Other maps can have deleted nodes as targets, but since their looked-up -/// values are always immediately remapped using RemapValue, resulting in a -/// not-deleted node, this is harmless as long as ReplacedValues/RemapValue -/// always performs correct mappings. In order to keep the mapping correct, -/// ExpungeNode should be called on any new nodes *before* adding them as -/// either source or target to ReplacedValues (which typically means calling -/// Expunge when a new node is first seen, since it may no longer be marked -/// NewNode by the time it is added to ReplacedValues). -void DAGTypeLegalizer::ExpungeNode(SDNode *N) { - if (N->getNodeId() != NewNode) - return; - - // If N is not remapped by ReplacedValues then there is nothing to do. - unsigned i, e; - for (i = 0, e = N->getNumValues(); i != e; ++i) - if (ReplacedValues.find(SDValue(N, i)) != ReplacedValues.end()) - break; - - if (i == e) - return; - - // Remove N from all maps - this is expensive but rare. - - for (DenseMap<SDValue, SDValue>::iterator I = PromotedIntegers.begin(), - E = PromotedIntegers.end(); I != E; ++I) { - assert(I->first.getNode() != N); - RemapValue(I->second); - } - - for (DenseMap<SDValue, SDValue>::iterator I = SoftenedFloats.begin(), - E = SoftenedFloats.end(); I != E; ++I) { - assert(I->first.getNode() != N); - RemapValue(I->second); - } - - for (DenseMap<SDValue, SDValue>::iterator I = ScalarizedVectors.begin(), - E = ScalarizedVectors.end(); I != E; ++I) { - assert(I->first.getNode() != N); - RemapValue(I->second); - } - - for (DenseMap<SDValue, SDValue>::iterator I = WidenedVectors.begin(), - E = WidenedVectors.end(); I != E; ++I) { - assert(I->first.getNode() != N); - RemapValue(I->second); - } - - for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator - I = ExpandedIntegers.begin(), E = ExpandedIntegers.end(); I != E; ++I){ - assert(I->first.getNode() != N); - RemapValue(I->second.first); - RemapValue(I->second.second); - } - - for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator - I = ExpandedFloats.begin(), E = ExpandedFloats.end(); I != E; ++I) { - assert(I->first.getNode() != N); - RemapValue(I->second.first); - RemapValue(I->second.second); - } - - for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator - I = SplitVectors.begin(), E = SplitVectors.end(); I != E; ++I) { - assert(I->first.getNode() != N); - RemapValue(I->second.first); - RemapValue(I->second.second); - } - - for (DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.begin(), - E = ReplacedValues.end(); I != E; ++I) - RemapValue(I->second); - - for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) - ReplacedValues.erase(SDValue(N, i)); -} - /// If the specified value was already legalized to another value, /// replace it by that value. -void DAGTypeLegalizer::RemapValue(SDValue &N) { - DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.find(N); +void DAGTypeLegalizer::RemapValue(SDValue &V) { + auto Id = getTableId(V); + V = getSDValue(Id); +} + +void DAGTypeLegalizer::RemapId(TableId &Id) { + auto I = ReplacedValues.find(Id); if (I != ReplacedValues.end()) { + assert(Id != I->second && "Id is mapped to itself."); // Use path compression to speed up future lookups if values get multiply // replaced with other values. - RemapValue(I->second); - N = I->second; + RemapId(I->second); + Id = I->second; - // Note that it is possible to have N.getNode()->getNodeId() == NewNode at - // this point because it is possible for a node to be put in the map before - // being processed. + // Note that N = IdToValueMap[Id] it is possible to have + // N.getNode()->getNodeId() == NewNode at this point because it is possible + // for a node to be put in the map before being processed. } } @@ -714,19 +640,22 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) { assert(From.getNode() != To.getNode() && "Potential legalization loop!"); // If expansion produced new nodes, make sure they are properly marked. - ExpungeNode(From.getNode()); - AnalyzeNewValue(To); // Expunges To. + AnalyzeNewValue(To); // Anything that used the old node should now use the new one. Note that this // can potentially cause recursive merging. SmallSetVector<SDNode*, 16> NodesToAnalyze; NodeUpdateListener NUL(*this, NodesToAnalyze); do { - DAG.ReplaceAllUsesOfValueWith(From, To); - // The old node may still be present in a map like ExpandedIntegers or - // PromotedIntegers. Inform maps about the replacement. - ReplacedValues[From] = To; + // The old node may be present in a map like ExpandedIntegers or + // PromotedIntegers. Inform maps about the replacement. + auto FromId = getTableId(From); + auto ToId = getTableId(To); + + if (FromId != ToId) + ReplacedValues[FromId] = ToId; + DAG.ReplaceAllUsesOfValueWith(From, To); // Process the list of nodes that need to be reanalyzed. while (!NodesToAnalyze.empty()) { @@ -751,12 +680,15 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) { SDValue NewVal(M, i); if (M->getNodeId() == Processed) RemapValue(NewVal); - DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal); // OldVal may be a target of the ReplacedValues map which was marked // NewNode to force reanalysis because it was updated. Ensure that // anything that ReplacedValues mapped to OldVal will now be mapped // all the way to NewVal. - ReplacedValues[OldVal] = NewVal; + auto OldValId = getTableId(OldVal); + auto NewValId = getTableId(NewVal); + DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal); + if (OldValId != NewValId) + ReplacedValues[OldValId] = NewValId; } // The original node continues to exist in the DAG, marked NewNode. } @@ -773,9 +705,11 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { "Invalid type for promoted integer"); AnalyzeNewValue(Result); - SDValue &OpEntry = PromotedIntegers[Op]; - assert(!OpEntry.getNode() && "Node is already promoted!"); - OpEntry = Result; + auto &OpIdEntry = PromotedIntegers[getTableId(Op)]; + assert((OpIdEntry == 0) && "Node is already promoted!"); + OpIdEntry = getTableId(Result); + + DAG.transferDbgValues(Op, Result); } void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { @@ -788,15 +722,15 @@ void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { "Invalid type for softened float"); AnalyzeNewValue(Result); - SDValue &OpEntry = SoftenedFloats[Op]; + auto &OpIdEntry = SoftenedFloats[getTableId(Op)]; // Allow repeated calls to save f128 type nodes // or any node with type that transforms to itself. // Many operations on these types are not softened. - assert((!OpEntry.getNode()|| + assert(((OpIdEntry == 0) || Op.getValueType() == - TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) && + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) && "Node is already converted to integer!"); - OpEntry = Result; + OpIdEntry = getTableId(Result); } void DAGTypeLegalizer::SetPromotedFloat(SDValue Op, SDValue Result) { @@ -805,9 +739,9 @@ void DAGTypeLegalizer::SetPromotedFloat(SDValue Op, SDValue Result) { "Invalid type for promoted float"); AnalyzeNewValue(Result); - SDValue &OpEntry = PromotedFloats[Op]; - assert(!OpEntry.getNode() && "Node is already promoted!"); - OpEntry = Result; + auto &OpIdEntry = PromotedFloats[getTableId(Op)]; + assert((OpIdEntry == 0) && "Node is already promoted!"); + OpIdEntry = getTableId(Result); } void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) { @@ -818,19 +752,17 @@ void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) { "Invalid type for scalarized vector"); AnalyzeNewValue(Result); - SDValue &OpEntry = ScalarizedVectors[Op]; - assert(!OpEntry.getNode() && "Node is already scalarized!"); - OpEntry = Result; + auto &OpIdEntry = ScalarizedVectors[getTableId(Op)]; + assert((OpIdEntry == 0) && "Node is already scalarized!"); + OpIdEntry = getTableId(Result); } void DAGTypeLegalizer::GetExpandedInteger(SDValue Op, SDValue &Lo, SDValue &Hi) { - std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op]; - RemapValue(Entry.first); - RemapValue(Entry.second); - assert(Entry.first.getNode() && "Operand isn't expanded"); - Lo = Entry.first; - Hi = Entry.second; + std::pair<TableId, TableId> &Entry = ExpandedIntegers[getTableId(Op)]; + assert((Entry.first != 0) && "Operand isn't expanded"); + Lo = getSDValue(Entry.first); + Hi = getSDValue(Entry.second); } void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo, @@ -856,20 +788,18 @@ void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo, } // Remember that this is the result of the node. - std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op]; - assert(!Entry.first.getNode() && "Node already expanded"); - Entry.first = Lo; - Entry.second = Hi; + std::pair<TableId, TableId> &Entry = ExpandedIntegers[getTableId(Op)]; + assert((Entry.first == 0) && "Node already expanded"); + Entry.first = getTableId(Lo); + Entry.second = getTableId(Hi); } void DAGTypeLegalizer::GetExpandedFloat(SDValue Op, SDValue &Lo, SDValue &Hi) { - std::pair<SDValue, SDValue> &Entry = ExpandedFloats[Op]; - RemapValue(Entry.first); - RemapValue(Entry.second); - assert(Entry.first.getNode() && "Operand isn't expanded"); - Lo = Entry.first; - Hi = Entry.second; + std::pair<TableId, TableId> &Entry = ExpandedFloats[getTableId(Op)]; + assert((Entry.first != 0) && "Operand isn't expanded"); + Lo = getSDValue(Entry.first); + Hi = getSDValue(Entry.second); } void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo, @@ -882,21 +812,19 @@ void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo, AnalyzeNewValue(Lo); AnalyzeNewValue(Hi); - // Remember that this is the result of the node. - std::pair<SDValue, SDValue> &Entry = ExpandedFloats[Op]; - assert(!Entry.first.getNode() && "Node already expanded"); - Entry.first = Lo; - Entry.second = Hi; + std::pair<TableId, TableId> &Entry = ExpandedFloats[getTableId(Op)]; + assert((Entry.first == 0) && "Node already expanded"); + Entry.first = getTableId(Lo); + Entry.second = getTableId(Hi); } void DAGTypeLegalizer::GetSplitVector(SDValue Op, SDValue &Lo, SDValue &Hi) { - std::pair<SDValue, SDValue> &Entry = SplitVectors[Op]; - RemapValue(Entry.first); - RemapValue(Entry.second); - assert(Entry.first.getNode() && "Operand isn't split"); - Lo = Entry.first; - Hi = Entry.second; + std::pair<TableId, TableId> &Entry = SplitVectors[getTableId(Op)]; + Lo = getSDValue(Entry.first); + Hi = getSDValue(Entry.second); + assert(Lo.getNode() && "Operand isn't split"); + ; } void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo, @@ -912,10 +840,10 @@ void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo, AnalyzeNewValue(Hi); // Remember that this is the result of the node. - std::pair<SDValue, SDValue> &Entry = SplitVectors[Op]; - assert(!Entry.first.getNode() && "Node already split"); - Entry.first = Lo; - Entry.second = Hi; + std::pair<TableId, TableId> &Entry = SplitVectors[getTableId(Op)]; + assert((Entry.first == 0) && "Node already split"); + Entry.first = getTableId(Lo); + Entry.second = getTableId(Hi); } void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) { @@ -924,9 +852,9 @@ void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) { "Invalid type for widened vector"); AnalyzeNewValue(Result); - SDValue &OpEntry = WidenedVectors[Op]; - assert(!OpEntry.getNode() && "Node already widened!"); - OpEntry = Result; + auto &OpIdEntry = WidenedVectors[getTableId(Op)]; + assert((OpIdEntry == 0) && "Node already widened!"); + OpIdEntry = getTableId(Result); } @@ -1064,11 +992,11 @@ SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) { EVT NVT = EVT::getIntegerVT(*DAG.getContext(), LVT.getSizeInBits() + HVT.getSizeInBits()); + EVT ShiftAmtVT = TLI.getShiftAmountTy(NVT, DAG.getDataLayout(), false); Lo = DAG.getNode(ISD::ZERO_EXTEND, dlLo, NVT, Lo); Hi = DAG.getNode(ISD::ANY_EXTEND, dlHi, NVT, Hi); Hi = DAG.getNode(ISD::SHL, dlHi, NVT, Hi, - DAG.getConstant(LVT.getSizeInBits(), dlHi, - TLI.getPointerTy(DAG.getDataLayout()))); + DAG.getConstant(LVT.getSizeInBits(), dlHi, ShiftAmtVT)); return DAG.getNode(ISD::OR, dlHi, NVT, Lo, Hi); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 64cb80e0d853..2c6b1ee7900f 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -93,46 +93,81 @@ private: N->getOpcode() == ISD::Register; } + // Bijection from SDValue to unique id. As each created node gets a + // new id we do not need to worry about reuse expunging. Should we + // run out of ids, we can do a one time expensive compactifcation. + typedef unsigned TableId; + + TableId NextValueId = 1; + + SmallDenseMap<SDValue, TableId, 8> ValueToIdMap; + SmallDenseMap<TableId, SDValue, 8> IdToValueMap; + /// For integer nodes that are below legal width, this map indicates what /// promoted value to use. - SmallDenseMap<SDValue, SDValue, 8> PromotedIntegers; + SmallDenseMap<TableId, TableId, 8> PromotedIntegers; /// For integer nodes that need to be expanded this map indicates which /// operands are the expanded version of the input. - SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> ExpandedIntegers; + SmallDenseMap<TableId, std::pair<TableId, TableId>, 8> ExpandedIntegers; /// For floating-point nodes converted to integers of the same size, this map /// indicates the converted value to use. - SmallDenseMap<SDValue, SDValue, 8> SoftenedFloats; + SmallDenseMap<TableId, TableId, 8> SoftenedFloats; /// For floating-point nodes that have a smaller precision than the smallest /// supported precision, this map indicates what promoted value to use. - SmallDenseMap<SDValue, SDValue, 8> PromotedFloats; + SmallDenseMap<TableId, TableId, 8> PromotedFloats; /// For float nodes that need to be expanded this map indicates which operands /// are the expanded version of the input. - SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> ExpandedFloats; + SmallDenseMap<TableId, std::pair<TableId, TableId>, 8> ExpandedFloats; /// For nodes that are <1 x ty>, this map indicates the scalar value of type /// 'ty' to use. - SmallDenseMap<SDValue, SDValue, 8> ScalarizedVectors; + SmallDenseMap<TableId, TableId, 8> ScalarizedVectors; /// For nodes that need to be split this map indicates which operands are the /// expanded version of the input. - SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> SplitVectors; + SmallDenseMap<TableId, std::pair<TableId, TableId>, 8> SplitVectors; /// For vector nodes that need to be widened, indicates the widened value to /// use. - SmallDenseMap<SDValue, SDValue, 8> WidenedVectors; + SmallDenseMap<TableId, TableId, 8> WidenedVectors; /// For values that have been replaced with another, indicates the replacement /// value to use. - SmallDenseMap<SDValue, SDValue, 8> ReplacedValues; + SmallDenseMap<TableId, TableId, 8> ReplacedValues; /// This defines a worklist of nodes to process. In order to be pushed onto /// this worklist, all operands of a node must have already been processed. SmallVector<SDNode*, 128> Worklist; + TableId getTableId(SDValue V) { + assert(V.getNode() && "Getting TableId on SDValue()"); + + auto I = ValueToIdMap.find(V); + if (I != ValueToIdMap.end()) { + // replace if there's been a shift. + RemapId(I->second); + assert(I->second && "All Ids should be nonzero"); + return I->second; + } + // Add if it's not there. + ValueToIdMap.insert(std::make_pair(V, NextValueId)); + IdToValueMap.insert(std::make_pair(NextValueId, V)); + ++NextValueId; + assert(NextValueId != 0 && + "Ran out of Ids. Increase id type size or add compactification"); + return NextValueId - 1; + } + + const SDValue &getSDValue(TableId &Id) { + RemapId(Id); + assert(Id && "TableId should be non-zero"); + return IdToValueMap[Id]; + } + public: explicit DAGTypeLegalizer(SelectionDAG &dag) : TLI(dag.getTargetLoweringInfo()), DAG(dag), @@ -147,10 +182,25 @@ public: bool run(); void NoteDeletion(SDNode *Old, SDNode *New) { - ExpungeNode(Old); - ExpungeNode(New); - for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) - ReplacedValues[SDValue(Old, i)] = SDValue(New, i); + for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) { + TableId NewId = getTableId(SDValue(New, i)); + TableId OldId = getTableId(SDValue(Old, i)); + + if (OldId != NewId) + ReplacedValues[OldId] = NewId; + + // Delete Node from tables. + ValueToIdMap.erase(SDValue(Old, i)); + IdToValueMap.erase(OldId); + PromotedIntegers.erase(OldId); + ExpandedIntegers.erase(OldId); + SoftenedFloats.erase(OldId); + PromotedFloats.erase(OldId); + ExpandedFloats.erase(OldId); + ScalarizedVectors.erase(OldId); + SplitVectors.erase(OldId); + WidenedVectors.erase(OldId); + } } SelectionDAG &getDAG() const { return DAG; } @@ -158,9 +208,9 @@ public: private: SDNode *AnalyzeNewNode(SDNode *N); void AnalyzeNewValue(SDValue &Val); - void ExpungeNode(SDNode *N); void PerformExpensiveChecks(); - void RemapValue(SDValue &N); + void RemapId(TableId &Id); + void RemapValue(SDValue &V); // Common routines. SDValue BitConvertToInteger(SDValue Op); @@ -207,8 +257,8 @@ private: /// returns an i32, the lower 16 bits of which coincide with Op, and the upper /// 16 bits of which contain rubbish. SDValue GetPromotedInteger(SDValue Op) { - SDValue &PromotedOp = PromotedIntegers[Op]; - RemapValue(PromotedOp); + TableId &PromotedId = PromotedIntegers[getTableId(Op)]; + SDValue PromotedOp = getSDValue(PromotedId); assert(PromotedOp.getNode() && "Operand wasn't promoted?"); return PromotedOp; } @@ -282,7 +332,7 @@ private: SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo); // Integer Operand Promotion. - bool PromoteIntegerOperand(SDNode *N, unsigned OperandNo); + bool PromoteIntegerOperand(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_ANY_EXTEND(SDNode *N); SDValue PromoteIntOp_ATOMIC_STORE(AtomicSDNode *N); SDValue PromoteIntOp_BITCAST(SDNode *N); @@ -373,11 +423,10 @@ private: bool ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi); // Integer Operand Expansion. - bool ExpandIntegerOperand(SDNode *N, unsigned OperandNo); + bool ExpandIntegerOperand(SDNode *N, unsigned OpNo); SDValue ExpandIntOp_BR_CC(SDNode *N); SDValue ExpandIntOp_SELECT_CC(SDNode *N); SDValue ExpandIntOp_SETCC(SDNode *N); - SDValue ExpandIntOp_SETCCE(SDNode *N); SDValue ExpandIntOp_SETCCCARRY(SDNode *N); SDValue ExpandIntOp_Shift(SDNode *N); SDValue ExpandIntOp_SINT_TO_FP(SDNode *N); @@ -403,16 +452,15 @@ private: /// stay in a register, the Op is not converted to an integer. /// In that case, the given op is returned. SDValue GetSoftenedFloat(SDValue Op) { - auto Iter = SoftenedFloats.find(Op); + TableId Id = getTableId(Op); + auto Iter = SoftenedFloats.find(Id); if (Iter == SoftenedFloats.end()) { assert(isSimpleLegalType(Op.getValueType()) && "Operand wasn't converted to integer?"); return Op; } - - SDValue &SoftenedOp = Iter->second; + SDValue SoftenedOp = getSDValue(Iter->second); assert(SoftenedOp.getNode() && "Unconverted op in SoftenedFloats?"); - RemapValue(SoftenedOp); return SoftenedOp; } void SetSoftenedFloat(SDValue Op, SDValue Result); @@ -531,7 +579,7 @@ private: void ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi); // Float Operand Expansion. - bool ExpandFloatOperand(SDNode *N, unsigned OperandNo); + bool ExpandFloatOperand(SDNode *N, unsigned OpNo); SDValue ExpandFloatOp_BR_CC(SDNode *N); SDValue ExpandFloatOp_FCOPYSIGN(SDNode *N); SDValue ExpandFloatOp_FP_ROUND(SDNode *N); @@ -549,8 +597,8 @@ private: //===--------------------------------------------------------------------===// SDValue GetPromotedFloat(SDValue Op) { - SDValue &PromotedOp = PromotedFloats[Op]; - RemapValue(PromotedOp); + TableId &PromotedId = PromotedFloats[getTableId(Op)]; + SDValue PromotedOp = getSDValue(PromotedId); assert(PromotedOp.getNode() && "Operand wasn't promoted?"); return PromotedOp; } @@ -572,7 +620,7 @@ private: SDValue PromoteFloatRes_UNDEF(SDNode *N); SDValue PromoteFloatRes_XINT_TO_FP(SDNode *N); - bool PromoteFloatOperand(SDNode *N, unsigned ResNo); + bool PromoteFloatOperand(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo); @@ -589,15 +637,15 @@ private: /// element type, this returns the element. For example, if Op is a v1i32, /// Op = < i32 val >, this method returns val, an i32. SDValue GetScalarizedVector(SDValue Op) { - SDValue &ScalarizedOp = ScalarizedVectors[Op]; - RemapValue(ScalarizedOp); + TableId &ScalarizedId = ScalarizedVectors[getTableId(Op)]; + SDValue ScalarizedOp = getSDValue(ScalarizedId); assert(ScalarizedOp.getNode() && "Operand wasn't scalarized?"); return ScalarizedOp; } void SetScalarizedVector(SDValue Op, SDValue Result); // Vector Result Scalarization: <1 x ty> -> ty. - void ScalarizeVectorResult(SDNode *N, unsigned OpNo); + void ScalarizeVectorResult(SDNode *N, unsigned ResNo); SDValue ScalarizeVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo); SDValue ScalarizeVecRes_BinOp(SDNode *N); SDValue ScalarizeVecRes_TernaryOp(SDNode *N); @@ -646,13 +694,14 @@ private: void SetSplitVector(SDValue Op, SDValue Lo, SDValue Hi); // Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>. - void SplitVectorResult(SDNode *N, unsigned OpNo); + void SplitVectorResult(SDNode *N, unsigned ResNo); void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -662,9 +711,9 @@ private: void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); - void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi); - void SplitVecRes_MLOAD(MaskedLoadSDNode *N, SDValue &Lo, SDValue &Hi); - void SplitVecRes_MGATHER(MaskedGatherSDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi); + void SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue &Lo, SDValue &Hi); + void SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, SDValue &Lo, SDValue &Hi); void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo, @@ -684,7 +733,7 @@ private: SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo); SDValue SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo); SDValue SplitVecOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo); - SDValue SplitVecOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo); + SDValue SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, unsigned OpNo); SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N); SDValue SplitVecOp_VSETCC(SDNode *N); SDValue SplitVecOp_FP_ROUND(SDNode *N); @@ -701,8 +750,8 @@ private: /// method returns a v4i32 for which the first two elements are the same as /// those of Op, while the last two elements contain rubbish. SDValue GetWidenedVector(SDValue Op) { - SDValue &WidenedOp = WidenedVectors[Op]; - RemapValue(WidenedOp); + TableId &WidenedId = WidenedVectors[getTableId(Op)]; + SDValue WidenedOp = getSDValue(WidenedId); assert(WidenedOp.getNode() && "Operand wasn't widened?"); return WidenedOp; } diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 993465ae9dc2..df3134828af5 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -300,6 +300,7 @@ void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) { Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2), Align); Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2), 0); + Chain = Hi.getValue(1); // Handle endianness of the load. if (TLI.hasBigEndianPartOrdering(OVT, DAG.getDataLayout())) @@ -307,7 +308,7 @@ void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) { // Modified the chain - switch anything that used the old chain to use // the new one. - ReplaceValueWith(SDValue(N, 1), Hi.getValue(1)); + ReplaceValueWith(SDValue(N, 1), Chain); } @@ -384,7 +385,7 @@ SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) { // Build a vector of twice the length out of the expanded elements. // For example <3 x i64> -> <6 x i32>. - std::vector<SDValue> NewElts; + SmallVector<SDValue, 16> NewElts; NewElts.reserve(NumElts*2); for (unsigned i = 0; i < NumElts; ++i) { diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 7643790df350..67928d4bdbd5 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -32,7 +32,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetLowering.h" @@ -41,6 +40,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" #include <cassert> #include <cstdint> @@ -63,7 +63,7 @@ class VectorLegalizer { /// legalizing the same thing more than once. SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes; - /// \brief Adds a node to the translation cache. + /// Adds a node to the translation cache. void AddLegalizedOperand(SDValue From, SDValue To) { LegalizedNodes.insert(std::make_pair(From, To)); // If someone requests legalization of the new node, return itself. @@ -71,55 +71,55 @@ class VectorLegalizer { LegalizedNodes.insert(std::make_pair(To, To)); } - /// \brief Legalizes the given node. + /// Legalizes the given node. SDValue LegalizeOp(SDValue Op); - /// \brief Assuming the node is legal, "legalize" the results. + /// Assuming the node is legal, "legalize" the results. SDValue TranslateLegalizeResults(SDValue Op, SDValue Result); - /// \brief Implements unrolling a VSETCC. + /// Implements unrolling a VSETCC. SDValue UnrollVSETCC(SDValue Op); - /// \brief Implement expand-based legalization of vector operations. + /// Implement expand-based legalization of vector operations. /// /// This is just a high-level routine to dispatch to specific code paths for /// operations to legalize them. SDValue Expand(SDValue Op); - /// \brief Implements expansion for FNEG; falls back to UnrollVectorOp if + /// Implements expansion for FNEG; falls back to UnrollVectorOp if /// FSUB isn't legal. /// /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if /// SINT_TO_FLOAT and SHR on vectors isn't legal. SDValue ExpandUINT_TO_FLOAT(SDValue Op); - /// \brief Implement expansion for SIGN_EXTEND_INREG using SRL and SRA. + /// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA. SDValue ExpandSEXTINREG(SDValue Op); - /// \brief Implement expansion for ANY_EXTEND_VECTOR_INREG. + /// Implement expansion for ANY_EXTEND_VECTOR_INREG. /// /// Shuffles the low lanes of the operand into place and bitcasts to the proper /// type. The contents of the bits in the extended part of each element are /// undef. SDValue ExpandANY_EXTEND_VECTOR_INREG(SDValue Op); - /// \brief Implement expansion for SIGN_EXTEND_VECTOR_INREG. + /// Implement expansion for SIGN_EXTEND_VECTOR_INREG. /// /// Shuffles the low lanes of the operand into place, bitcasts to the proper /// type, then shifts left and arithmetic shifts right to introduce a sign /// extension. SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op); - /// \brief Implement expansion for ZERO_EXTEND_VECTOR_INREG. + /// Implement expansion for ZERO_EXTEND_VECTOR_INREG. /// /// Shuffles the low lanes of the operand into place and blends zeros into /// the remaining lanes, finally bitcasting to the proper type. SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op); - /// \brief Expand bswap of vectors into a shuffle if legal. + /// Expand bswap of vectors into a shuffle if legal. SDValue ExpandBSWAP(SDValue Op); - /// \brief Implement vselect in terms of XOR, AND, OR when blend is not + /// Implement vselect in terms of XOR, AND, OR when blend is not /// supported by the target. SDValue ExpandVSELECT(SDValue Op); SDValue ExpandSELECT(SDValue Op); @@ -130,29 +130,30 @@ class VectorLegalizer { SDValue ExpandBITREVERSE(SDValue Op); SDValue ExpandCTLZ(SDValue Op); SDValue ExpandCTTZ_ZERO_UNDEF(SDValue Op); - - /// \brief Implements vector promotion. + SDValue ExpandStrictFPOp(SDValue Op); + + /// Implements vector promotion. /// /// This is essentially just bitcasting the operands to a different type and /// bitcasting the result back to the original type. SDValue Promote(SDValue Op); - /// \brief Implements [SU]INT_TO_FP vector promotion. + /// Implements [SU]INT_TO_FP vector promotion. /// - /// This is a [zs]ext of the input operand to the next size up. + /// This is a [zs]ext of the input operand to a larger integer type. SDValue PromoteINT_TO_FP(SDValue Op); - /// \brief Implements FP_TO_[SU]INT vector promotion of the result type. + /// Implements FP_TO_[SU]INT vector promotion of the result type. /// - /// It is promoted to the next size up integer type. The result is then + /// It is promoted to a larger integer type. The result is then /// truncated back to the original type. - SDValue PromoteFP_TO_INT(SDValue Op, bool isSigned); + SDValue PromoteFP_TO_INT(SDValue Op); public: VectorLegalizer(SelectionDAG& dag) : DAG(dag), TLI(dag.getTargetLoweringInfo()) {} - /// \brief Begin legalizer the vector operations in the DAG. + /// Begin legalizer the vector operations in the DAG. bool Run(); }; @@ -222,14 +223,16 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { for (const SDValue &Op : Node->op_values()) Ops.push_back(LegalizeOp(Op)); - SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops), 0); + SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops), + Op.getResNo()); bool HasVectorValue = false; if (Op.getOpcode() == ISD::LOAD) { LoadSDNode *LD = cast<LoadSDNode>(Op.getNode()); ISD::LoadExtType ExtType = LD->getExtensionType(); if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) { - DEBUG(dbgs() << "\nLegalizing extending vector load: "; Node->dump(&DAG)); + LLVM_DEBUG(dbgs() << "\nLegalizing extending vector load: "; + Node->dump(&DAG)); switch (TLI.getLoadExtAction(LD->getExtensionType(), LD->getValueType(0), LD->getMemoryVT())) { default: llvm_unreachable("This action is not supported yet!"); @@ -261,8 +264,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { EVT StVT = ST->getMemoryVT(); MVT ValVT = ST->getValue().getSimpleValueType(); if (StVT.isVector() && ST->isTruncatingStore()) { - DEBUG(dbgs() << "\nLegalizing truncating vector store: "; - Node->dump(&DAG)); + LLVM_DEBUG(dbgs() << "\nLegalizing truncating vector store: "; + Node->dump(&DAG)); switch (TLI.getTruncStoreAction(ValVT, StVT)) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: @@ -287,10 +290,34 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { if (!HasVectorValue) return TranslateLegalizeResults(Op, Result); - EVT QueryType; + TargetLowering::LegalizeAction Action = TargetLowering::Legal; switch (Op.getOpcode()) { default: return TranslateLegalizeResults(Op, Result); + case ISD::STRICT_FADD: + case ISD::STRICT_FSUB: + case ISD::STRICT_FMUL: + case ISD::STRICT_FDIV: + case ISD::STRICT_FSQRT: + case ISD::STRICT_FMA: + case ISD::STRICT_FPOW: + case ISD::STRICT_FPOWI: + case ISD::STRICT_FSIN: + case ISD::STRICT_FCOS: + case ISD::STRICT_FEXP: + case ISD::STRICT_FEXP2: + case ISD::STRICT_FLOG: + case ISD::STRICT_FLOG10: + case ISD::STRICT_FLOG2: + case ISD::STRICT_FRINT: + case ISD::STRICT_FNEARBYINT: + // These pseudo-ops get legalized as if they were their non-strict + // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT + // is also legal, but if ISD::FSQRT requires expansion then so does + // ISD::STRICT_FSQRT. + Action = TLI.getStrictFPOperationAction(Node->getOpcode(), + Node->getValueType(0)); + break; case ISD::ADD: case ISD::SUB: case ISD::MUL: @@ -366,42 +393,47 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::UMAX: case ISD::SMUL_LOHI: case ISD::UMUL_LOHI: - QueryType = Node->getValueType(0); + case ISD::FCANONICALIZE: + Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); break; case ISD::FP_ROUND_INREG: - QueryType = cast<VTSDNode>(Node->getOperand(1))->getVT(); + Action = TLI.getOperationAction(Node->getOpcode(), + cast<VTSDNode>(Node->getOperand(1))->getVT()); break; case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: - QueryType = Node->getOperand(0).getValueType(); + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getOperand(0).getValueType()); break; case ISD::MSCATTER: - QueryType = cast<MaskedScatterSDNode>(Node)->getValue().getValueType(); + Action = TLI.getOperationAction(Node->getOpcode(), + cast<MaskedScatterSDNode>(Node)->getValue().getValueType()); break; case ISD::MSTORE: - QueryType = cast<MaskedStoreSDNode>(Node)->getValue().getValueType(); + Action = TLI.getOperationAction(Node->getOpcode(), + cast<MaskedStoreSDNode>(Node)->getValue().getValueType()); break; } - DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG)); + LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG)); - switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) { + switch (Action) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Promote: Result = Promote(Op); Changed = true; break; case TargetLowering::Legal: - DEBUG(dbgs() << "Legal node: nothing to do\n"); + LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n"); break; case TargetLowering::Custom: { - DEBUG(dbgs() << "Trying custom legalization\n"); + LLVM_DEBUG(dbgs() << "Trying custom legalization\n"); if (SDValue Tmp1 = TLI.LowerOperation(Op, DAG)) { - DEBUG(dbgs() << "Successfully custom legalized node\n"); + LLVM_DEBUG(dbgs() << "Successfully custom legalized node\n"); Result = Tmp1; break; } - DEBUG(dbgs() << "Could not custom legalize node\n"); + LLVM_DEBUG(dbgs() << "Could not custom legalize node\n"); LLVM_FALLTHROUGH; } case TargetLowering::Expand: @@ -431,7 +463,7 @@ SDValue VectorLegalizer::Promote(SDValue Op) { case ISD::FP_TO_UINT: case ISD::FP_TO_SINT: // Promote the operation by extending the operand. - return PromoteFP_TO_INT(Op, Op->getOpcode() == ISD::FP_TO_SINT); + return PromoteFP_TO_INT(Op); } // There are currently two cases of vector promotion: @@ -472,20 +504,11 @@ SDValue VectorLegalizer::Promote(SDValue Op) { SDValue VectorLegalizer::PromoteINT_TO_FP(SDValue Op) { // INT_TO_FP operations may require the input operand be promoted even // when the type is otherwise legal. - EVT VT = Op.getOperand(0).getValueType(); - assert(Op.getNode()->getNumValues() == 1 && - "Can't promote a vector with multiple results!"); - - // Normal getTypeToPromoteTo() doesn't work here, as that will promote - // by widening the vector w/ the same element width and twice the number - // of elements. We want the other way around, the same number of elements, - // each twice the width. - // - // Increase the bitwidth of the element to the next pow-of-two - // (which is greater than 8 bits). + MVT VT = Op.getOperand(0).getSimpleValueType(); + MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT); + assert(NVT.getVectorNumElements() == VT.getVectorNumElements() && + "Vectors have different number of elements!"); - EVT NVT = VT.widenIntegerVectorElementType(*DAG.getContext()); - assert(NVT.isSimple() && "Promoting to a non-simple vector type!"); SDLoc dl(Op); SmallVector<SDValue, 4> Operands(Op.getNumOperands()); @@ -505,35 +528,28 @@ SDValue VectorLegalizer::PromoteINT_TO_FP(SDValue Op) { // elements and then truncate the result. This is different from the default // PromoteVector which uses bitcast to promote thus assumning that the // promoted vector type has the same overall size. -SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op, bool isSigned) { - assert(Op.getNode()->getNumValues() == 1 && - "Can't promote a vector with multiple results!"); - EVT VT = Op.getValueType(); +SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op) { + MVT VT = Op.getSimpleValueType(); + MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT); + assert(NVT.getVectorNumElements() == VT.getVectorNumElements() && + "Vectors have different number of elements!"); - EVT NewVT = VT; - unsigned NewOpc; - while (true) { - NewVT = NewVT.widenIntegerVectorElementType(*DAG.getContext()); - assert(NewVT.isSimple() && "Promoting to a non-simple vector type!"); - if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewVT)) { - NewOpc = ISD::FP_TO_SINT; - break; - } - if (!isSigned && TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewVT)) { - NewOpc = ISD::FP_TO_UINT; - break; - } - } + unsigned NewOpc = Op->getOpcode(); + // Change FP_TO_UINT to FP_TO_SINT if possible. + // TODO: Should we only do this if FP_TO_UINT itself isn't legal? + if (NewOpc == ISD::FP_TO_UINT && + TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT)) + NewOpc = ISD::FP_TO_SINT; SDLoc dl(Op); - SDValue Promoted = DAG.getNode(NewOpc, dl, NewVT, Op.getOperand(0)); + SDValue Promoted = DAG.getNode(NewOpc, dl, NVT, Op.getOperand(0)); // Assert that the converted value fits in the original type. If it doesn't // (eg: because the value being converted is too big), then the result of the // original operation was undefined anyway, so the assert is still correct. Promoted = DAG.getNode(Op->getOpcode() == ISD::FP_TO_UINT ? ISD::AssertZext : ISD::AssertSext, - dl, NewVT, Promoted, + dl, NVT, Promoted, DAG.getValueType(VT.getScalarType())); return DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted); } @@ -665,9 +681,14 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { Value = DAG.getBuildVector(Op.getNode()->getValueType(0), dl, Vals); } else { SDValue Scalarized = TLI.scalarizeVectorLoad(LD, DAG); - - NewChain = Scalarized.getValue(1); - Value = Scalarized.getValue(0); + // Skip past MERGE_VALUE node if known. + if (Scalarized->getOpcode() == ISD::MERGE_VALUES) { + NewChain = Scalarized.getOperand(1); + Value = Scalarized.getOperand(0); + } else { + NewChain = Scalarized.getValue(1); + Value = Scalarized.getValue(0); + } } AddLegalizedOperand(Op.getValue(0), Value); @@ -678,35 +699,6 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { SDValue VectorLegalizer::ExpandStore(SDValue Op) { StoreSDNode *ST = cast<StoreSDNode>(Op.getNode()); - - EVT StVT = ST->getMemoryVT(); - EVT MemSclVT = StVT.getScalarType(); - unsigned ScalarSize = MemSclVT.getSizeInBits(); - - // Round odd types to the next pow of two. - if (!isPowerOf2_32(ScalarSize)) { - // FIXME: This is completely broken and inconsistent with ExpandLoad - // handling. - - // For sub-byte element sizes, this ends up with 0 stride between elements, - // so the same element just gets re-written to the same location. There seem - // to be tests explicitly testing for this broken behavior though. tests - // for this broken behavior. - - LLVMContext &Ctx = *DAG.getContext(); - - EVT NewMemVT - = EVT::getVectorVT(Ctx, - MemSclVT.getIntegerVT(Ctx, NextPowerOf2(ScalarSize)), - StVT.getVectorNumElements()); - - SDValue NewVectorStore = DAG.getTruncStore( - ST->getChain(), SDLoc(Op), ST->getValue(), ST->getBasePtr(), - ST->getPointerInfo(), NewMemVT, ST->getAlignment(), - ST->getMemOperand()->getFlags(), ST->getAAInfo()); - ST = cast<StoreSDNode>(NewVectorStore.getNode()); - } - SDValue TF = TLI.scalarizeVectorStore(ST, DAG); AddLegalizedOperand(Op, TF); return TF; @@ -743,6 +735,24 @@ SDValue VectorLegalizer::Expand(SDValue Op) { return ExpandCTLZ(Op); case ISD::CTTZ_ZERO_UNDEF: return ExpandCTTZ_ZERO_UNDEF(Op); + case ISD::STRICT_FADD: + case ISD::STRICT_FSUB: + case ISD::STRICT_FMUL: + case ISD::STRICT_FDIV: + case ISD::STRICT_FSQRT: + case ISD::STRICT_FMA: + case ISD::STRICT_FPOW: + case ISD::STRICT_FPOWI: + case ISD::STRICT_FSIN: + case ISD::STRICT_FCOS: + case ISD::STRICT_FEXP: + case ISD::STRICT_FEXP2: + case ISD::STRICT_FLOG: + case ISD::STRICT_FLOG10: + case ISD::STRICT_FLOG2: + case ISD::STRICT_FRINT: + case ISD::STRICT_FNEARBYINT: + return ExpandStrictFPOp(Op); default: return DAG.UnrollVectorOp(Op.getNode()); } @@ -1036,7 +1046,7 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) { SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT); // Two to the power of half-word-size. - SDValue TWOHW = DAG.getConstantFP(1 << (BW / 2), DL, Op.getValueType()); + SDValue TWOHW = DAG.getConstantFP(1ULL << (BW / 2), DL, Op.getValueType()); // Clear upper part of LO, lower HI SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord); @@ -1129,6 +1139,53 @@ SDValue VectorLegalizer::ExpandCTTZ_ZERO_UNDEF(SDValue Op) { return DAG.UnrollVectorOp(Op.getNode()); } +SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) { + EVT VT = Op.getValueType(); + EVT EltVT = VT.getVectorElementType(); + unsigned NumElems = VT.getVectorNumElements(); + unsigned NumOpers = Op.getNumOperands(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT ValueVTs[] = {EltVT, MVT::Other}; + SDValue Chain = Op.getOperand(0); + SDLoc dl(Op); + + SmallVector<SDValue, 32> OpValues; + SmallVector<SDValue, 32> OpChains; + for (unsigned i = 0; i < NumElems; ++i) { + SmallVector<SDValue, 4> Opers; + SDValue Idx = DAG.getConstant(i, dl, + TLI.getVectorIdxTy(DAG.getDataLayout())); + + // The Chain is the first operand. + Opers.push_back(Chain); + + // Now process the remaining operands. + for (unsigned j = 1; j < NumOpers; ++j) { + SDValue Oper = Op.getOperand(j); + EVT OperVT = Oper.getValueType(); + + if (OperVT.isVector()) + Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + EltVT, Oper, Idx); + + Opers.push_back(Oper); + } + + SDValue ScalarOp = DAG.getNode(Op->getOpcode(), dl, ValueVTs, Opers); + + OpValues.push_back(ScalarOp.getValue(0)); + OpChains.push_back(ScalarOp.getValue(1)); + } + + SDValue Result = DAG.getBuildVector(VT, dl, OpValues); + SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains); + + AddLegalizedOperand(Op.getValue(0), Result); + AddLegalizedOperand(Op.getValue(1), NewChain); + + return NewChain; +} + SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) { EVT VT = Op.getValueType(); unsigned NumElems = VT.getVectorNumElements(); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index ce1c01b621f0..1cd43ace48f3 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -33,9 +33,8 @@ using namespace llvm; //===----------------------------------------------------------------------===// void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { - DEBUG(dbgs() << "Scalarize node result " << ResNo << ": "; - N->dump(&DAG); - dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Scalarize node result " << ResNo << ": "; N->dump(&DAG); + dbgs() << "\n"); SDValue R = SDValue(); switch (N->getOpcode()) { @@ -169,9 +168,14 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_MERGE_VALUES(SDNode *N, } SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) { + SDValue Op = N->getOperand(0); + if (Op.getValueType().isVector() + && Op.getValueType().getVectorNumElements() == 1 + && !isSimpleLegalType(Op.getValueType())) + Op = GetScalarizedVector(Op); EVT NewVT = N->getValueType(0).getVectorElementType(); return DAG.getNode(ISD::BITCAST, SDLoc(N), - NewVT, N->getOperand(0)); + NewVT, Op); } SDValue DAGTypeLegalizer::ScalarizeVecRes_BUILD_VECTOR(SDNode *N) { @@ -338,8 +342,8 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) { ScalarBool = TargetLowering::UndefinedBooleanContent; } + EVT CondVT = Cond.getValueType(); if (ScalarBool != VecBool) { - EVT CondVT = Cond.getValueType(); switch (ScalarBool) { case TargetLowering::UndefinedBooleanContent: break; @@ -360,6 +364,11 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) { } } + // Truncate the condition if needed + auto BoolVT = getSetCCResultType(CondVT); + if (BoolVT.bitsLT(CondVT)) + Cond = DAG.getNode(ISD::TRUNCATE, SDLoc(N), BoolVT, Cond); + return DAG.getSelect(SDLoc(N), LHS.getValueType(), Cond, LHS, GetScalarizedVector(N->getOperand(2))); @@ -433,9 +442,8 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) { //===----------------------------------------------------------------------===// bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { - DEBUG(dbgs() << "Scalarize node operand " << OpNo << ": "; - N->dump(&DAG); - dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Scalarize node operand " << OpNo << ": "; N->dump(&DAG); + dbgs() << "\n"); SDValue Res = SDValue(); if (!Res.getNode()) { @@ -515,7 +523,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) { N->getValueType(0).getScalarType(), Elt); // Revectorize the result so the types line up with what the uses of this // expression expect. - return DAG.getBuildVector(N->getValueType(0), SDLoc(N), Op); + return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Op); } /// The vectors to concatenate have length one - use a BUILD_VECTOR instead. @@ -618,9 +626,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo) { /// invalid operands or may have other results that need legalization, we just /// know that (at least) one result needs vector splitting. void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { - DEBUG(dbgs() << "Split node result: "; - N->dump(&DAG); - dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Split node result: "; N->dump(&DAG); dbgs() << "\n"); SDValue Lo, Hi; // See if the target wants to custom expand this node. @@ -749,6 +755,25 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FMA: SplitVecRes_TernaryOp(N, Lo, Hi); break; + case ISD::STRICT_FADD: + case ISD::STRICT_FSUB: + case ISD::STRICT_FMUL: + case ISD::STRICT_FDIV: + case ISD::STRICT_FSQRT: + case ISD::STRICT_FMA: + case ISD::STRICT_FPOW: + case ISD::STRICT_FPOWI: + case ISD::STRICT_FSIN: + case ISD::STRICT_FCOS: + case ISD::STRICT_FEXP: + case ISD::STRICT_FEXP2: + case ISD::STRICT_FLOG: + case ISD::STRICT_FLOG10: + case ISD::STRICT_FLOG2: + case ISD::STRICT_FRINT: + case ISD::STRICT_FNEARBYINT: + SplitVecRes_StrictFPOp(N, Lo, Hi); + break; } // If Lo/Hi is null, the sub-method took care of registering results etc. @@ -1028,6 +1053,56 @@ void DAGTypeLegalizer::SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo, Hi = DAG.getNode(Opcode, dl, OutHiVT, InHi); } +void DAGTypeLegalizer::SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo, + SDValue &Hi) { + unsigned NumOps = N->getNumOperands(); + SDValue Chain = N->getOperand(0); + EVT LoVT, HiVT; + SDLoc dl(N); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + + SmallVector<SDValue, 4> OpsLo; + SmallVector<SDValue, 4> OpsHi; + + // The Chain is the first operand. + OpsLo.push_back(Chain); + OpsHi.push_back(Chain); + + // Now process the remaining operands. + for (unsigned i = 1; i < NumOps; ++i) { + SDValue Op = N->getOperand(i); + SDValue OpLo = Op; + SDValue OpHi = Op; + + EVT InVT = Op.getValueType(); + if (InVT.isVector()) { + // If the input also splits, handle it directly for a + // compile time speedup. Otherwise split it by hand. + if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) + GetSplitVector(Op, OpLo, OpHi); + else + std::tie(OpLo, OpHi) = DAG.SplitVectorOperand(N, i); + } + + OpsLo.push_back(OpLo); + OpsHi.push_back(OpHi); + } + + EVT LoValueVTs[] = {LoVT, MVT::Other}; + EVT HiValueVTs[] = {HiVT, MVT::Other}; + Lo = DAG.getNode(N->getOpcode(), dl, LoValueVTs, OpsLo); + Hi = DAG.getNode(N->getOpcode(), dl, HiValueVTs, OpsHi); + + // Build a factor node to remember that this Op is independent of the + // other one. + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + Lo.getValue(1), Hi.getValue(1)); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Chain); +} + void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Vec = N->getOperand(0); @@ -1200,16 +1275,16 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG, MLD->isExpandingLoad()); + unsigned HiOffset = LoMemVT.getStoreSize(); - MMO = DAG.getMachineFunction(). - getMachineMemOperand(MLD->getPointerInfo(), - MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), - SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges()); + MMO = DAG.getMachineFunction().getMachineMemOperand( + MLD->getPointerInfo().getWithOffset(HiOffset), MachineMemOperand::MOLoad, + HiMemVT.getStoreSize(), SecondHalfAlignment, MLD->getAAInfo(), + MLD->getRanges()); Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, Src0Hi, HiMemVT, MMO, ExtType, MLD->isExpandingLoad()); - // Build a factor node to remember that this load is independent of the // other one. Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), @@ -1232,6 +1307,7 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, SDValue Mask = MGT->getMask(); SDValue Src0 = MGT->getValue(); SDValue Index = MGT->getIndex(); + SDValue Scale = MGT->getScale(); unsigned Alignment = MGT->getOriginalAlignment(); // Split Mask operand @@ -1263,11 +1339,11 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), Alignment, MGT->getAAInfo(), MGT->getRanges()); - SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo}; + SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo, Scale}; Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, OpsLo, MMO); - SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi}; + SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi, Scale}; Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, OpsHi, MMO); @@ -1365,8 +1441,8 @@ void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, std::tie(SplitLoVT, SplitHiVT) = DAG.GetSplitDestVTs(NewSrcVT); if (TLI.isTypeLegal(SrcVT) && !TLI.isTypeLegal(SplitSrcVT) && TLI.isTypeLegal(NewSrcVT) && TLI.isTypeLegal(SplitLoVT)) { - DEBUG(dbgs() << "Split vector extend via incremental extend:"; - N->dump(&DAG); dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Split vector extend via incremental extend:"; + N->dump(&DAG); dbgs() << "\n"); // Extend the source vector by one step. SDValue NewSrc = DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0)); @@ -1501,9 +1577,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, /// the node are known to be legal, but other operands of the node may need /// legalization as well as the specified one. bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { - DEBUG(dbgs() << "Split node operand: "; - N->dump(&DAG); - dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Split node operand: "; N->dump(&DAG); dbgs() << "\n"); SDValue Res = SDValue(); // See if the target wants to custom split this node. @@ -1683,8 +1757,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_VECREDUCE(SDNode *N, unsigned OpNo) { // Use the appropriate scalar instruction on the split subvectors before // reducing the now partially reduced smaller vector. - SDValue Partial = DAG.getNode(CombineOpc, dl, LoOpVT, Lo, Hi); - return DAG.getNode(N->getOpcode(), dl, ResVT, Partial); + SDValue Partial = DAG.getNode(CombineOpc, dl, LoOpVT, Lo, Hi, N->getFlags()); + return DAG.getNode(N->getOpcode(), dl, ResVT, Partial, N->getFlags()); } SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) { @@ -1810,6 +1884,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, SDValue Ch = MGT->getChain(); SDValue Ptr = MGT->getBasePtr(); SDValue Index = MGT->getIndex(); + SDValue Scale = MGT->getScale(); SDValue Mask = MGT->getMask(); SDValue Src0 = MGT->getValue(); unsigned Alignment = MGT->getOriginalAlignment(); @@ -1842,7 +1917,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), Alignment, MGT->getAAInfo(), MGT->getRanges()); - SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo}; + SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo, Scale}; SDValue Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, OpsLo, MMO); @@ -1852,7 +1927,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, Alignment, MGT->getAAInfo(), MGT->getRanges()); - SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi}; + SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi, Scale}; SDValue Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, OpsHi, MMO); @@ -1916,10 +1991,12 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, N->isCompressingStore()); - MMO = DAG.getMachineFunction(). - getMachineMemOperand(N->getPointerInfo(), - MachineMemOperand::MOStore, HiMemVT.getStoreSize(), - SecondHalfAlignment, N->getAAInfo(), N->getRanges()); + unsigned HiOffset = LoMemVT.getStoreSize(); + + MMO = DAG.getMachineFunction().getMachineMemOperand( + N->getPointerInfo().getWithOffset(HiOffset), MachineMemOperand::MOStore, + HiMemVT.getStoreSize(), SecondHalfAlignment, N->getAAInfo(), + N->getRanges()); Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO, N->isTruncatingStore(), N->isCompressingStore()); @@ -1935,6 +2012,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, SDValue Ptr = N->getBasePtr(); SDValue Mask = N->getMask(); SDValue Index = N->getIndex(); + SDValue Scale = N->getScale(); SDValue Data = N->getValue(); EVT MemoryVT = N->getMemoryVT(); unsigned Alignment = N->getOriginalAlignment(); @@ -1970,7 +2048,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, MachineMemOperand::MOStore, LoMemVT.getStoreSize(), Alignment, N->getAAInfo(), N->getRanges()); - SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo}; + SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo, Scale}; Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(), DL, OpsLo, MMO); @@ -1982,7 +2060,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, // The order of the Scatter operation after split is well defined. The "Hi" // part comes after the "Lo". So these two operations should be chained one // after another. - SDValue OpsHi[] = {Lo, DataHi, MaskHi, Ptr, IndexHi}; + SDValue OpsHi[] = {Lo, DataHi, MaskHi, Ptr, IndexHi, Scale}; return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(), DL, OpsHi, MMO); } @@ -2005,6 +2083,10 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { EVT LoMemVT, HiMemVT; std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + // Scalarize if the split halves are not byte-sized. + if (!LoMemVT.isByteSized() || !HiMemVT.isByteSized()) + return TLI.scalarizeVectorStore(N, DAG); + unsigned IncrementSize = LoMemVT.getSizeInBits()/8; if (isTruncating) @@ -2089,9 +2171,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) { return SplitVecOp_UnaryOp(N); SDLoc DL(N); - // Extract the halves of the input via extract_subvector. + // Get the split input vector. SDValue InLoVec, InHiVec; - std::tie(InLoVec, InHiVec) = DAG.SplitVector(InVec, DL); + GetSplitVector(InVec, InLoVec, InHiVec); // Truncate them to 1/2 the element size. EVT HalfElementVT = IsFloat ? EVT::getFloatingPointVT(InElementSize/2) : @@ -2164,9 +2246,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_FCOPYSIGN(SDNode *N) { //===----------------------------------------------------------------------===// void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { - DEBUG(dbgs() << "Widen node result " << ResNo << ": "; - N->dump(&DAG); - dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Widen node result " << ResNo << ": "; N->dump(&DAG); + dbgs() << "\n"); // See if the target wants to custom widen this node. if (CustomWidenLowerNode(N, N->getValueType(ResNo))) @@ -2948,6 +3029,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) { SDValue Mask = N->getMask(); EVT MaskVT = Mask.getValueType(); SDValue Src0 = GetWidenedVector(N->getValue()); + SDValue Scale = N->getScale(); unsigned NumElts = WideVT.getVectorNumElements(); SDLoc dl(N); @@ -2963,7 +3045,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) { Index.getValueType().getScalarType(), NumElts); Index = ModifyToType(Index, WideIndexVT); - SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index }; + SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index, Scale }; SDValue Res = DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), N->getMemoryVT(), dl, Ops, N->getMemOperand()); @@ -3309,9 +3391,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) { // Widen Vector Operand //===----------------------------------------------------------------------===// bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { - DEBUG(dbgs() << "Widen node operand " << OpNo << ": "; - N->dump(&DAG); - dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Widen node operand " << OpNo << ": "; N->dump(&DAG); + dbgs() << "\n"); SDValue Res = SDValue(); // See if the target wants to custom widen this node. @@ -3374,11 +3455,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) { EVT VT = N->getValueType(0); SDValue InOp = N->getOperand(0); - // If some legalization strategy other than widening is used on the operand, - // we can't safely assume that just extending the low lanes is the correct - // transformation. - if (getTypeAction(InOp.getValueType()) != TargetLowering::TypeWidenVector) - return WidenVecOp_Convert(N); + assert(getTypeAction(InOp.getValueType()) == + TargetLowering::TypeWidenVector && + "Unexpected type action"); InOp = GetWidenedVector(InOp); assert(VT.getVectorNumElements() < InOp.getValueType().getVectorNumElements() && @@ -3422,7 +3501,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) { // low lanes. switch (N->getOpcode()) { default: - llvm_unreachable("Extend legalization on on extend operation!"); + llvm_unreachable("Extend legalization on extend operation!"); case ISD::ANY_EXTEND: return DAG.getAnyExtendVectorInReg(InOp, DL, VT); case ISD::SIGN_EXTEND: @@ -3440,20 +3519,31 @@ SDValue DAGTypeLegalizer::WidenVecOp_FCOPYSIGN(SDNode *N) { } SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { - // Since the result is legal and the input is illegal, it is unlikely that we - // can fix the input to a legal type so unroll the convert into some scalar - // code and create a nasty build vector. + // Since the result is legal and the input is illegal. EVT VT = N->getValueType(0); EVT EltVT = VT.getVectorElementType(); SDLoc dl(N); unsigned NumElts = VT.getVectorNumElements(); SDValue InOp = N->getOperand(0); - if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector) - InOp = GetWidenedVector(InOp); + assert(getTypeAction(InOp.getValueType()) == + TargetLowering::TypeWidenVector && + "Unexpected type action"); + InOp = GetWidenedVector(InOp); EVT InVT = InOp.getValueType(); + unsigned Opcode = N->getOpcode(); + + // See if a widened result type would be legal, if so widen the node. + EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, + InVT.getVectorNumElements()); + if (TLI.isTypeLegal(WideVT)) { + SDValue Res = DAG.getNode(Opcode, dl, WideVT, InOp); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res, + DAG.getIntPtrConstant(0, dl)); + } + EVT InEltVT = InVT.getVectorElementType(); - unsigned Opcode = N->getOpcode(); + // Unroll the convert into some scalar code and create a nasty build vector. SmallVector<SDValue, 16> Ops(NumElts); for (unsigned i=0; i < NumElts; ++i) Ops[i] = DAG.getNode( @@ -3506,8 +3596,10 @@ SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) { unsigned NumOperands = N->getNumOperands(); for (unsigned i=0; i < NumOperands; ++i) { SDValue InOp = N->getOperand(i); - if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector) - InOp = GetWidenedVector(InOp); + assert(getTypeAction(InOp.getValueType()) == + TargetLowering::TypeWidenVector && + "Unexpected type action"); + InOp = GetWidenedVector(InOp); for (unsigned j=0; j < NumInElts; ++j) Ops[Idx++] = DAG.getNode( ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, @@ -3533,6 +3625,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { // vector type. StoreSDNode *ST = cast<StoreSDNode>(N); + if (!ST->getMemoryVT().getScalarType().isByteSized()) + return TLI.scalarizeVectorStore(ST, DAG); + SmallVector<SDValue, 16> StChain; if (ST->isTruncatingStore()) GenWidenVectorTruncStores(StChain, ST); @@ -3576,6 +3671,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) { SDValue DataOp = MSC->getValue(); SDValue Mask = MSC->getMask(); EVT MaskVT = Mask.getValueType(); + SDValue Scale = MSC->getScale(); // Widen the value. SDValue WideVal = GetWidenedVector(DataOp); @@ -3595,7 +3691,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) { NumElts); Index = ModifyToType(Index, WideIndexVT); - SDValue Ops[] = {MSC->getChain(), WideVal, Mask, MSC->getBasePtr(), Index}; + SDValue Ops[] = {MSC->getChain(), WideVal, Mask, MSC->getBasePtr(), Index, + Scale}; return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(), dl, Ops, MSC->getMemOperand()); @@ -3605,6 +3702,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { SDValue InOp0 = GetWidenedVector(N->getOperand(0)); SDValue InOp1 = GetWidenedVector(N->getOperand(1)); SDLoc dl(N); + EVT VT = N->getValueType(0); // WARNING: In this code we widen the compare instruction with garbage. // This garbage may contain denormal floats which may be slow. Is this a real @@ -3614,18 +3712,23 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { // Only some of the compared elements are legal. EVT SVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), InOp0.getValueType()); + // The result type is legal, if its vXi1, keep vXi1 for the new SETCC. + if (VT.getScalarType() == MVT::i1) + SVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, + SVT.getVectorNumElements()); + SDValue WideSETCC = DAG.getNode(ISD::SETCC, SDLoc(N), - SVT, InOp0, InOp1, N->getOperand(2)); + SVT, InOp0, InOp1, N->getOperand(2)); // Extract the needed results from the result vector. EVT ResVT = EVT::getVectorVT(*DAG.getContext(), SVT.getVectorElementType(), - N->getValueType(0).getVectorNumElements()); + VT.getVectorNumElements()); SDValue CC = DAG.getNode( ISD::EXTRACT_SUBVECTOR, dl, ResVT, WideSETCC, DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - return PromoteTargetBoolean(CC, N->getValueType(0)); + return PromoteTargetBoolean(CC, VT); } diff --git a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h index cf92907a8b5f..7e6b57426338 100644 --- a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h +++ b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h @@ -32,7 +32,8 @@ public: enum DbgValueKind { SDNODE = 0, ///< Value is the result of an expression. CONST = 1, ///< Value is a constant. - FRAMEIX = 2 ///< Value is contents of a stack location. + FRAMEIX = 2, ///< Value is contents of a stack location. + VREG = 3 ///< Value is a virtual register. }; private: union { @@ -42,6 +43,7 @@ private: } s; const Value *Const; ///< Valid for constants. unsigned FrameIx; ///< Valid for stack objects. + unsigned VReg; ///< Valid for registers. } u; DIVariable *Var; DIExpression *Expr; @@ -69,12 +71,18 @@ public: u.Const = C; } - /// Constructor for frame indices. - SDDbgValue(DIVariable *Var, DIExpression *Expr, unsigned FI, DebugLoc dl, - unsigned O) - : Var(Var), Expr(Expr), DL(std::move(dl)), Order(O), IsIndirect(false) { - kind = FRAMEIX; - u.FrameIx = FI; + /// Constructor for virtual registers and frame indices. + SDDbgValue(DIVariable *Var, DIExpression *Expr, unsigned VRegOrFrameIdx, + bool IsIndirect, DebugLoc DL, unsigned Order, + enum DbgValueKind Kind) + : Var(Var), Expr(Expr), DL(DL), Order(Order), IsIndirect(IsIndirect) { + assert((Kind == VREG || Kind == FRAMEIX) && + "Invalid SDDbgValue constructor"); + kind = Kind; + if (kind == VREG) + u.VReg = VRegOrFrameIdx; + else + u.FrameIx = VRegOrFrameIdx; } /// Returns the kind. @@ -98,6 +106,9 @@ public: /// Returns the FrameIx for a stack object unsigned getFrameIx() const { assert (kind==FRAMEIX); return u.FrameIx; } + /// Returns the Virtual Register for a VReg + unsigned getVReg() const { assert (kind==VREG); return u.VReg; } + /// Returns whether this is an indirect value. bool isIndirect() const { return IsIndirect; } @@ -115,6 +126,28 @@ public: bool isInvalidated() const { return Invalid; } }; +/// Holds the information from a dbg_label node through SDISel. +/// We do not use SDValue here to avoid including its header. +class SDDbgLabel { + MDNode *Label; + DebugLoc DL; + unsigned Order; + +public: + SDDbgLabel(MDNode *Label, DebugLoc dl, unsigned O) + : Label(Label), DL(std::move(dl)), Order(O) {} + + /// Returns the MDNode pointer for the label. + MDNode *getLabel() const { return Label; } + + /// Returns the DebugLoc. + DebugLoc getDebugLoc() const { return DL; } + + /// Returns the SDNodeOrder. This is the order of the preceding node in the + /// input. + unsigned getOrder() const { return Order; } +}; + } // end llvm namespace #endif diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 698e14453d1d..3944d7df286d 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -13,6 +13,7 @@ #include "InstrEmitter.h" #include "ScheduleDAGSDNodes.h" +#include "SDNodeDbgValue.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" @@ -115,7 +116,7 @@ private: /// Schedule - Schedule the DAG using list scheduling. void ScheduleDAGFast::Schedule() { - DEBUG(dbgs() << "********** List Scheduling **********\n"); + LLVM_DEBUG(dbgs() << "********** List Scheduling **********\n"); NumLiveRegs = 0; LiveRegDefs.resize(TRI->getNumRegs(), nullptr); @@ -124,8 +125,8 @@ void ScheduleDAGFast::Schedule() { // Build the scheduling graph. BuildSchedGraph(nullptr); - DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) - SUnits[su].dumpAll(this)); + LLVM_DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) SUnits[su] + .dumpAll(this)); // Execute the actual scheduling loop. ListScheduleBottomUp(); @@ -180,8 +181,8 @@ void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { /// count of its predecessors. If a predecessor pending count is zero, add it to /// the Available queue. void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { - DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); - DEBUG(SU->dump(this)); + LLVM_DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); + LLVM_DEBUG(SU->dump(this)); assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!"); SU->setHeightToAtLeast(CurCycle); @@ -236,7 +237,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) return nullptr; - DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n"); + LLVM_DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n"); assert(NewNodes.size() == 2 && "Expected a load folding node!"); N = NewNodes[1]; @@ -346,7 +347,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { SU = NewSU; } - DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n"); + LLVM_DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n"); NewSU = Clone(SU); // New SUnit has the exact same predecessors. @@ -592,14 +593,14 @@ void ScheduleDAGFast::ListScheduleBottomUp() { // Issue copies, these can be expensive cross register class copies. SmallVector<SUnit*, 2> Copies; InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); - DEBUG(dbgs() << "Adding an edge from SU # " << TrySU->NodeNum - << " to SU #" << Copies.front()->NodeNum << "\n"); + LLVM_DEBUG(dbgs() << "Adding an edge from SU # " << TrySU->NodeNum + << " to SU #" << Copies.front()->NodeNum << "\n"); AddPred(TrySU, SDep(Copies.front(), SDep::Artificial)); NewDef = Copies.back(); } - DEBUG(dbgs() << "Adding an edge from SU # " << NewDef->NodeNum - << " to SU #" << TrySU->NodeNum << "\n"); + LLVM_DEBUG(dbgs() << "Adding an edge from SU # " << NewDef->NodeNum + << " to SU #" << TrySU->NodeNum << "\n"); LiveRegDefs[Reg] = NewDef; AddPred(NewDef, SDep(TrySU, SDep::Artificial)); TrySU->isAvailable = false; @@ -666,8 +667,8 @@ void ScheduleDAGLinearize::ScheduleNode(SDNode *N) { // These nodes do not need to be translated into MIs. return; - DEBUG(dbgs() << "\n*** Scheduling: "); - DEBUG(N->dump(DAG)); + LLVM_DEBUG(dbgs() << "\n*** Scheduling: "); + LLVM_DEBUG(N->dump(DAG)); Sequence.push_back(N); unsigned NumOps = N->getNumOperands(); @@ -713,7 +714,7 @@ static SDNode *findGluedUser(SDNode *N) { } void ScheduleDAGLinearize::Schedule() { - DEBUG(dbgs() << "********** DAG Linearization **********\n"); + LLVM_DEBUG(dbgs() << "********** DAG Linearization **********\n"); SmallVector<SDNode*, 8> Glues; unsigned DAGSize = 0; @@ -763,19 +764,29 @@ ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) { InstrEmitter Emitter(BB, InsertPos); DenseMap<SDValue, unsigned> VRBaseMap; - DEBUG({ - dbgs() << "\n*** Final schedule ***\n"; - }); + LLVM_DEBUG({ dbgs() << "\n*** Final schedule ***\n"; }); - // FIXME: Handle dbg_values. unsigned NumNodes = Sequence.size(); + MachineBasicBlock *BB = Emitter.getBlock(); for (unsigned i = 0; i != NumNodes; ++i) { SDNode *N = Sequence[NumNodes-i-1]; - DEBUG(N->dump(DAG)); + LLVM_DEBUG(N->dump(DAG)); Emitter.EmitNode(N, false, false, VRBaseMap); + + // Emit any debug values associated with the node. + if (N->getHasDebugValue()) { + MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos(); + for (auto DV : DAG->GetDbgValues(N)) { + if (DV->isInvalidated()) + continue; + if (auto *DbgMI = Emitter.EmitDbgValue(DV, VRBaseMap)) + BB->insert(InsertPos, DbgMI); + DV->setIsInvalidated(); + } + } } - DEBUG(dbgs() << '\n'); + LLVM_DEBUG(dbgs() << '\n'); InsertPos = Emitter.getInsertPos(); return Emitter.getBlock(); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 49f304c8cc86..43e8ffd3839c 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -26,7 +26,6 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SchedulerRegistry.h" @@ -37,6 +36,7 @@ #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/llvm-config.h" #include "llvm/IR/InlineAsm.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" @@ -46,6 +46,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachineValueType.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> #include <cassert> @@ -346,8 +347,8 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, /// Schedule - Schedule the DAG using list scheduling. void ScheduleDAGRRList::Schedule() { - DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB) - << " '" << BB->getName() << "' **********\n"); + LLVM_DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB) + << " '" << BB->getName() << "' **********\n"); CurCycle = 0; IssueCount = 0; @@ -364,8 +365,7 @@ void ScheduleDAGRRList::Schedule() { // Build the scheduling graph. BuildSchedGraph(nullptr); - DEBUG(for (SUnit &SU : SUnits) - SU.dumpAll(this)); + LLVM_DEBUG(for (SUnit &SU : SUnits) SU.dumpAll(this)); Topo.InitDAGTopologicalSorting(); AvailableQueue->initNodes(SUnits); @@ -377,11 +377,11 @@ void ScheduleDAGRRList::Schedule() { AvailableQueue->releaseState(); - DEBUG({ - dbgs() << "*** Final schedule ***\n"; - dumpSchedule(); - dbgs() << '\n'; - }); + LLVM_DEBUG({ + dbgs() << "*** Final schedule ***\n"; + dumpSchedule(); + dbgs() << '\n'; + }); } //===----------------------------------------------------------------------===// @@ -728,13 +728,13 @@ static void resetVRegCycle(SUnit *SU); /// count of its predecessors. If a predecessor pending count is zero, add it to /// the Available queue. void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { - DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: "); - DEBUG(SU->dump(this)); + LLVM_DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: "); + LLVM_DEBUG(SU->dump(this)); #ifndef NDEBUG if (CurCycle < SU->getHeight()) - DEBUG(dbgs() << " Height [" << SU->getHeight() - << "] pipeline stall!\n"); + LLVM_DEBUG(dbgs() << " Height [" << SU->getHeight() + << "] pipeline stall!\n"); #endif // FIXME: Do not modify node height. It may interfere with @@ -827,8 +827,8 @@ void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and /// its predecessor states to reflect the change. void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { - DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: "); - DEBUG(SU->dump(this)); + LLVM_DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: "); + LLVM_DEBUG(SU->dump(this)); for (SDep &Pred : SU->Preds) { CapturePred(&Pred); @@ -1010,7 +1010,35 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) { computeLatency(LoadSU); } - DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n"); + bool isNewN = true; + SUnit *NewSU; + // This can only happen when isNewLoad is false. + if (N->getNodeId() != -1) { + NewSU = &SUnits[N->getNodeId()]; + // If NewSU has already been scheduled, we need to clone it, but this + // negates the benefit to unfolding so just return SU. + if (NewSU->isScheduled) + return SU; + isNewN = false; + } else { + NewSU = CreateNewSUnit(N); + N->setNodeId(NewSU->NodeNum); + + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); + for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { + if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { + NewSU->isTwoAddress = true; + break; + } + } + if (MCID.isCommutable()) + NewSU->isCommutable = true; + + InitNumRegDefsLeft(NewSU); + computeLatency(NewSU); + } + + LLVM_DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n"); // Now that we are committed to unfolding replace DAG Uses. for (unsigned i = 0; i != NumVals; ++i) @@ -1018,23 +1046,6 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) { DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals - 1), SDValue(LoadNode, 1)); - SUnit *NewSU = CreateNewSUnit(N); - assert(N->getNodeId() == -1 && "Node already inserted!"); - N->setNodeId(NewSU->NodeNum); - - const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); - for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { - if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { - NewSU->isTwoAddress = true; - break; - } - } - if (MCID.isCommutable()) - NewSU->isCommutable = true; - - InitNumRegDefsLeft(NewSU); - computeLatency(NewSU); - // Record all the edges to and from the old SU, by category. SmallVector<SDep, 4> ChainPreds; SmallVector<SDep, 4> ChainSuccs; @@ -1100,7 +1111,8 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) { if (isNewLoad) AvailableQueue->addNode(LoadSU); - AvailableQueue->addNode(NewSU); + if (isNewN) + AvailableQueue->addNode(NewSU); ++NumUnfolds; @@ -1117,22 +1129,36 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { if (!N) return nullptr; - if (SU->getNode()->getGluedNode()) + LLVM_DEBUG(dbgs() << "Considering duplicating the SU\n"); + LLVM_DEBUG(SU->dump(this)); + + if (N->getGluedNode() && + !TII->canCopyGluedNodeDuringSchedule(N)) { + LLVM_DEBUG( + dbgs() + << "Giving up because it has incoming glue and the target does not " + "want to copy it\n"); return nullptr; + } SUnit *NewSU; bool TryUnfold = false; for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { MVT VT = N->getSimpleValueType(i); - if (VT == MVT::Glue) + if (VT == MVT::Glue) { + LLVM_DEBUG(dbgs() << "Giving up because it has outgoing glue\n"); return nullptr; - else if (VT == MVT::Other) + } else if (VT == MVT::Other) TryUnfold = true; } for (const SDValue &Op : N->op_values()) { MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); - if (VT == MVT::Glue) + if (VT == MVT::Glue && !TII->canCopyGluedNodeDuringSchedule(N)) { + LLVM_DEBUG( + dbgs() << "Giving up because it one of the operands is glue and " + "the target does not want to copy it\n"); return nullptr; + } } // If possible unfold instruction. @@ -1147,7 +1173,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { return SU; } - DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n"); + LLVM_DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n"); NewSU = CreateClone(SU); // New SUnit has the exact same predecessors. @@ -1408,7 +1434,7 @@ void ScheduleDAGRRList::releaseInterferences(unsigned Reg) { // Furthermore, it may have been made available again, in which case it is // now already in the AvailableQueue. if (SU->isAvailable && !SU->NodeQueueId) { - DEBUG(dbgs() << " Repushing SU #" << SU->NodeNum << '\n'); + LLVM_DEBUG(dbgs() << " Repushing SU #" << SU->NodeNum << '\n'); AvailableQueue->push(SU); } if (i < Interferences.size()) @@ -1429,12 +1455,10 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { SmallVector<unsigned, 4> LRegs; if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) break; - DEBUG(dbgs() << " Interfering reg "; - if (LRegs[0] == TRI->getNumRegs()) - dbgs() << "CallResource"; - else - dbgs() << printReg(LRegs[0], TRI); - dbgs() << " SU #" << CurSU->NodeNum << '\n'); + LLVM_DEBUG(dbgs() << " Interfering reg "; + if (LRegs[0] == TRI->getNumRegs()) dbgs() << "CallResource"; + else dbgs() << printReg(LRegs[0], TRI); + dbgs() << " SU #" << CurSU->NodeNum << '\n'); std::pair<LRegsMapT::iterator, bool> LRegsPair = LRegsMap.insert(std::make_pair(CurSU, LRegs)); if (LRegsPair.second) { @@ -1480,17 +1504,17 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { if (!BtSU->isPending) AvailableQueue->remove(BtSU); } - DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum << ") to SU(" - << TrySU->NodeNum << ")\n"); + LLVM_DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum + << ") to SU(" << TrySU->NodeNum << ")\n"); AddPred(TrySU, SDep(BtSU, SDep::Artificial)); // If one or more successors has been unscheduled, then the current // node is no longer available. if (!TrySU->isAvailable || !TrySU->NodeQueueId) { - DEBUG(dbgs() << "TrySU not available; choosing node from queue\n"); + LLVM_DEBUG(dbgs() << "TrySU not available; choosing node from queue\n"); CurSU = AvailableQueue->pop(); } else { - DEBUG(dbgs() << "TrySU available\n"); + LLVM_DEBUG(dbgs() << "TrySU available\n"); // Available and in AvailableQueue AvailableQueue->remove(TrySU); CurSU = TrySU; @@ -1534,14 +1558,14 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { // Issue copies, these can be expensive cross register class copies. SmallVector<SUnit*, 2> Copies; InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); - DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum - << " to SU #" << Copies.front()->NodeNum << "\n"); + LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum + << " to SU #" << Copies.front()->NodeNum << "\n"); AddPred(TrySU, SDep(Copies.front(), SDep::Artificial)); NewDef = Copies.back(); } - DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum - << " to SU #" << TrySU->NodeNum << "\n"); + LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum + << " to SU #" << TrySU->NodeNum << "\n"); LiveRegDefs[Reg] = NewDef; AddPred(NewDef, SDep(TrySU, SDep::Artificial)); TrySU->isAvailable = false; @@ -1569,8 +1593,8 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { // priority. If it is not ready put it back. Schedule the node. Sequence.reserve(SUnits.size()); while (!AvailableQueue->empty() || !Interferences.empty()) { - DEBUG(dbgs() << "\nExamining Available:\n"; - AvailableQueue->dump(this)); + LLVM_DEBUG(dbgs() << "\nExamining Available:\n"; + AvailableQueue->dump(this)); // Pick the best node to schedule taking all constraints into // consideration. @@ -2033,8 +2057,8 @@ LLVM_DUMP_METHOD void RegReductionPQBase::dumpRegPressure() const { unsigned Id = RC->getID(); unsigned RP = RegPressure[Id]; if (!RP) continue; - DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / " - << RegLimit[Id] << '\n'); + LLVM_DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / " + << RegLimit[Id] << '\n'); } } #endif @@ -2186,14 +2210,15 @@ void RegReductionPQBase::scheduledNode(SUnit *SU) { if (RegPressure[RCId] < Cost) { // Register pressure tracking is imprecise. This can happen. But we try // hard not to let it happen because it likely results in poor scheduling. - DEBUG(dbgs() << " SU(" << SU->NodeNum << ") has too many regdefs\n"); + LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum + << ") has too many regdefs\n"); RegPressure[RCId] = 0; } else { RegPressure[RCId] -= Cost; } } - DEBUG(dumpRegPressure()); + LLVM_DEBUG(dumpRegPressure()); } void RegReductionPQBase::unscheduledNode(SUnit *SU) { @@ -2273,7 +2298,7 @@ void RegReductionPQBase::unscheduledNode(SUnit *SU) { } } - DEBUG(dumpRegPressure()); + LLVM_DEBUG(dumpRegPressure()); } //===----------------------------------------------------------------------===// @@ -2368,7 +2393,7 @@ static void initVRegCycle(SUnit *SU) { if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU)) return; - DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n"); + LLVM_DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n"); SU->isVRegCycle = true; @@ -2406,7 +2431,7 @@ static bool hasVRegCycleUse(const SUnit *SU) { if (Pred.isCtrl()) continue; // ignore chain preds if (Pred.getSUnit()->isVRegCycle && Pred.getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) { - DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n"); + LLVM_DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n"); return true; } } @@ -2466,9 +2491,9 @@ static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, int LDepth = left->getDepth() - LPenalty; int RDepth = right->getDepth() - RPenalty; if (LDepth != RDepth) { - DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum - << ") depth " << LDepth << " vs SU (" << right->NodeNum - << ") depth " << RDepth << "\n"); + LLVM_DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum + << ") depth " << LDepth << " vs SU (" << right->NodeNum + << ") depth " << RDepth << "\n"); return LDepth < RDepth ? 1 : -1; } if (left->Latency != right->Latency) @@ -2490,9 +2515,9 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { static const char *const PhysRegMsg[] = { " has no physreg", " defines a physreg" }; #endif - DEBUG(dbgs() << " SU (" << left->NodeNum << ") " - << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") " - << PhysRegMsg[RHasPhysReg] << "\n"); + LLVM_DEBUG(dbgs() << " SU (" << left->NodeNum << ") " + << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum + << ") " << PhysRegMsg[RHasPhysReg] << "\n"); return LHasPhysReg < RHasPhysReg; } } @@ -2636,13 +2661,13 @@ bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { // Avoid causing spills. If register pressure is high, schedule for // register pressure reduction. if (LHigh && !RHigh) { - DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU(" - << right->NodeNum << ")\n"); + LLVM_DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU(" + << right->NodeNum << ")\n"); return true; } else if (!LHigh && RHigh) { - DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU(" - << left->NodeNum << ")\n"); + LLVM_DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU(" + << left->NodeNum << ")\n"); return false; } if (!LHigh && !RHigh) { @@ -2704,8 +2729,9 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { RPDiff = SPQ->RegPressureDiff(right, RLiveUses); } if (!DisableSchedRegPressure && LPDiff != RPDiff) { - DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff - << " != SU(" << right->NodeNum << "): " << RPDiff << "\n"); + LLVM_DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum + << "): " << LPDiff << " != SU(" << right->NodeNum + << "): " << RPDiff << "\n"); return LPDiff > RPDiff; } @@ -2717,8 +2743,9 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { } if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) { - DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses - << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n"); + LLVM_DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses + << " != SU(" << right->NodeNum << "): " << RLiveUses + << "\n"); return LLiveUses < RLiveUses; } @@ -2732,9 +2759,9 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { if (!DisableSchedCriticalPath) { int spread = (int)left->getDepth() - (int)right->getDepth(); if (std::abs(spread) > MaxReorderWindow) { - DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): " - << left->getDepth() << " != SU(" << right->NodeNum << "): " - << right->getDepth() << "\n"); + LLVM_DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): " + << left->getDepth() << " != SU(" << right->NodeNum + << "): " << right->getDepth() << "\n"); return left->getDepth() < right->getDepth(); } } @@ -2955,9 +2982,10 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { // Ok, the transformation is safe and the heuristics suggest it is // profitable. Update the graph. - DEBUG(dbgs() << " Prescheduling SU #" << SU.NodeNum - << " next to PredSU #" << PredSU->NodeNum - << " to guide scheduling in the presence of multiple uses\n"); + LLVM_DEBUG( + dbgs() << " Prescheduling SU #" << SU.NodeNum << " next to PredSU #" + << PredSU->NodeNum + << " to guide scheduling in the presence of multiple uses\n"); for (unsigned i = 0; i != PredSU->Succs.size(); ++i) { SDep Edge = PredSU->Succs[i]; assert(!Edge.isAssignedRegDep()); @@ -3046,8 +3074,9 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() { (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) || (!SU.isCommutable && SuccSU->isCommutable)) && !scheduleDAG->IsReachable(SuccSU, &SU)) { - DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #" - << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); + LLVM_DEBUG(dbgs() + << " Adding a pseudo-two-addr edge from SU #" + << SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); scheduleDAG->AddPred(&SU, SDep(SuccSU, SDep::Artificial)); } } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index c09b47af26a6..430d8fb34476 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -27,6 +27,7 @@ #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Config/llvm-config.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -243,7 +244,7 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { return; // Sort them in increasing order. - std::sort(Offsets.begin(), Offsets.end()); + llvm::sort(Offsets.begin(), Offsets.end()); // Check if the loads are close enough. SmallVector<SDNode*, 4> Loads; @@ -910,6 +911,39 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { MachineBasicBlock *InsertBB = Emitter.getBlock(); MachineBasicBlock::iterator Pos = InsertBB->getFirstTerminator(); InsertBB->insert(Pos, DbgMIs.begin(), DbgMIs.end()); + + SDDbgInfo::DbgLabelIterator DLI = DAG->DbgLabelBegin(); + SDDbgInfo::DbgLabelIterator DLE = DAG->DbgLabelEnd(); + // Now emit the rest according to source order. + LastOrder = 0; + for (const auto &InstrOrder : Orders) { + unsigned Order = InstrOrder.first; + MachineInstr *MI = InstrOrder.second; + if (!MI) + continue; + + // Insert all SDDbgLabel's whose order(s) are before "Order". + for (; DLI != DLE && + (*DLI)->getOrder() >= LastOrder && (*DLI)->getOrder() < Order; + ++DLI) { + MachineInstr *DbgMI = Emitter.EmitDbgLabel(*DLI); + if (DbgMI) { + if (!LastOrder) + // Insert to start of the BB (after PHIs). + BB->insert(BBBegin, DbgMI); + else { + // Insert at the instruction, which may be in a different + // block, if the block was split by a custom inserter. + MachineBasicBlock::iterator Pos = MI; + MI->getParent()->insert(Pos, DbgMI); + } + } + } + if (DLI == DLE) + break; + + LastOrder = Order; + } } InsertPos = Emitter.getInsertPos(); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index a058942c5689..6417e16bd0fd 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -17,10 +17,10 @@ #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/MachineValueType.h" #include <cassert> #include <string> #include <vector> @@ -88,7 +88,7 @@ class InstrItineraryData; /// Clone - Creates a clone of the specified SUnit. It does not copy the /// predecessors / successors info nor the temporary scheduling states. /// - SUnit *Clone(SUnit *N); + SUnit *Clone(SUnit *Old); /// BuildSchedGraph - Build the SUnit graph from the selection dag that we /// are input. This SUnit graph is similar to the SelectionDAG, but diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp index 07b46b9183ab..84055f8ecc1a 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -93,8 +93,8 @@ private: /// Schedule - Schedule the DAG using list scheduling. void ScheduleDAGVLIW::Schedule() { - DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB) - << " '" << BB->getName() << "' **********\n"); + LLVM_DEBUG(dbgs() << "********** List Scheduling " << printMBBReference(*BB) + << " '" << BB->getName() << "' **********\n"); // Build the scheduling graph. BuildSchedGraph(AA); @@ -151,8 +151,8 @@ void ScheduleDAGVLIW::releaseSuccessors(SUnit *SU) { /// count of its successors. If a successor pending count is zero, add it to /// the Available queue. void ScheduleDAGVLIW::scheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { - DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); - DEBUG(SU->dump(this)); + LLVM_DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); + LLVM_DEBUG(SU->dump(this)); Sequence.push_back(SU); assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); @@ -246,7 +246,7 @@ void ScheduleDAGVLIW::listScheduleTopDown() { } else if (!HasNoopHazards) { // Otherwise, we have a pipeline stall, but no other problem, just advance // the current cycle and try again. - DEBUG(dbgs() << "*** Advancing cycle, no work to do\n"); + LLVM_DEBUG(dbgs() << "*** Advancing cycle, no work to do\n"); HazardRec->AdvanceCycle(); ++NumStalls; ++CurCycle; @@ -254,7 +254,7 @@ void ScheduleDAGVLIW::listScheduleTopDown() { // Otherwise, we have no instructions to issue and we have instructions // that will fault if we don't do this right. This is the case for // processors without pipeline interlocks and other cases. - DEBUG(dbgs() << "*** Emitting noop\n"); + LLVM_DEBUG(dbgs() << "*** Emitting noop\n"); HazardRec->EmitNoop(); Sequence.push_back(nullptr); // NULL here means noop ++NumNoops; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 4c8b63d2f239..48e03c6da68f 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -32,7 +32,6 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" #include "llvm/CodeGen/SelectionDAGNodes.h" @@ -58,6 +57,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" +#include "llvm/Support/MachineValueType.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Mutex.h" @@ -89,11 +89,16 @@ void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {} #define DEBUG_TYPE "selectiondag" +static cl::opt<bool> EnableMemCpyDAGOpt("enable-memcpy-dag-opt", + cl::Hidden, cl::init(true), + cl::desc("Gang up loads and stores generated by inlining of memcpy")); + +static cl::opt<int> MaxLdStGlue("ldstmemcpy-glue-max", + cl::desc("Number limit for gluing ld/st of memcpy."), + cl::Hidden, cl::init(0)); + static void NewSDValueDbgMsg(SDValue V, StringRef Msg, SelectionDAG *G) { - DEBUG( - dbgs() << Msg; - V.getNode()->dump(G); - ); + LLVM_DEBUG(dbgs() << Msg; V.getNode()->dump(G);); } //===----------------------------------------------------------------------===// @@ -263,6 +268,52 @@ bool ISD::allOperandsUndef(const SDNode *N) { return true; } +bool ISD::matchUnaryPredicate(SDValue Op, + std::function<bool(ConstantSDNode *)> Match) { + if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) + return Match(Cst); + + if (ISD::BUILD_VECTOR != Op.getOpcode()) + return false; + + EVT SVT = Op.getValueType().getScalarType(); + for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { + auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(i)); + if (!Cst || Cst->getValueType(0) != SVT || !Match(Cst)) + return false; + } + return true; +} + +bool ISD::matchBinaryPredicate( + SDValue LHS, SDValue RHS, + std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match) { + if (LHS.getValueType() != RHS.getValueType()) + return false; + + if (auto *LHSCst = dyn_cast<ConstantSDNode>(LHS)) + if (auto *RHSCst = dyn_cast<ConstantSDNode>(RHS)) + return Match(LHSCst, RHSCst); + + if (ISD::BUILD_VECTOR != LHS.getOpcode() || + ISD::BUILD_VECTOR != RHS.getOpcode()) + return false; + + EVT SVT = LHS.getValueType().getScalarType(); + for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) { + auto *LHSCst = dyn_cast<ConstantSDNode>(LHS.getOperand(i)); + auto *RHSCst = dyn_cast<ConstantSDNode>(RHS.getOperand(i)); + if (!LHSCst || !RHSCst) + return false; + if (LHSCst->getValueType(0) != SVT || + LHSCst->getValueType(0) != RHSCst->getValueType(0)) + return false; + if (!Match(LHSCst, RHSCst)) + return false; + } + return true; +} + ISD::NodeType ISD::getExtForLoadExtType(bool IsFP, ISD::LoadExtType ExtType) { switch (ExtType) { case ISD::EXTLOAD: @@ -487,12 +538,41 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddInteger(ST->getPointerInfo().getAddrSpace()); break; } + case ISD::MLOAD: { + const MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N); + ID.AddInteger(MLD->getMemoryVT().getRawBits()); + ID.AddInteger(MLD->getRawSubclassData()); + ID.AddInteger(MLD->getPointerInfo().getAddrSpace()); + break; + } + case ISD::MSTORE: { + const MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N); + ID.AddInteger(MST->getMemoryVT().getRawBits()); + ID.AddInteger(MST->getRawSubclassData()); + ID.AddInteger(MST->getPointerInfo().getAddrSpace()); + break; + } + case ISD::MGATHER: { + const MaskedGatherSDNode *MG = cast<MaskedGatherSDNode>(N); + ID.AddInteger(MG->getMemoryVT().getRawBits()); + ID.AddInteger(MG->getRawSubclassData()); + ID.AddInteger(MG->getPointerInfo().getAddrSpace()); + break; + } + case ISD::MSCATTER: { + const MaskedScatterSDNode *MS = cast<MaskedScatterSDNode>(N); + ID.AddInteger(MS->getMemoryVT().getRawBits()); + ID.AddInteger(MS->getRawSubclassData()); + ID.AddInteger(MS->getPointerInfo().getAddrSpace()); + break; + } case ISD::ATOMIC_CMP_SWAP: case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: case ISD::ATOMIC_SWAP: case ISD::ATOMIC_LOAD_ADD: case ISD::ATOMIC_LOAD_SUB: case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_CLR: case ISD::ATOMIC_LOAD_OR: case ISD::ATOMIC_LOAD_XOR: case ISD::ATOMIC_LOAD_NAND: @@ -726,7 +806,7 @@ static void VerifySDNode(SDNode *N) { } #endif // NDEBUG -/// \brief Insert a newly allocated node into the DAG. +/// Insert a newly allocated node into the DAG. /// /// Handles insertion into the all nodes list and CSE map, as well as /// verification and other common operations when a new node is allocated. @@ -903,13 +983,16 @@ SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) void SelectionDAG::init(MachineFunction &NewMF, OptimizationRemarkEmitter &NewORE, - Pass *PassPtr) { + Pass *PassPtr, const TargetLibraryInfo *LibraryInfo, + DivergenceAnalysis * Divergence) { MF = &NewMF; SDAGISelPass = PassPtr; ORE = &NewORE; TLI = getSubtarget().getTargetLowering(); TSI = getSubtarget().getSelectionDAGInfo(); + LibInfo = LibraryInfo; Context = &MF->getFunction().getContext(); + DA = Divergence; } SelectionDAG::~SelectionDAG() { @@ -1077,21 +1160,25 @@ SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) { } SDValue SelectionDAG::getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT) { - EVT EltVT = VT.getScalarType(); - SDValue TrueValue; - switch (TLI->getBooleanContents(VT)) { - case TargetLowering::ZeroOrOneBooleanContent: - case TargetLowering::UndefinedBooleanContent: - TrueValue = getConstant(1, DL, VT); - break; - case TargetLowering::ZeroOrNegativeOneBooleanContent: - TrueValue = getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), DL, - VT); - break; - } + SDValue TrueValue = getBoolConstant(true, DL, VT, VT); return getNode(ISD::XOR, DL, VT, Val, TrueValue); } +SDValue SelectionDAG::getBoolConstant(bool V, const SDLoc &DL, EVT VT, + EVT OpVT) { + if (!V) + return getConstant(0, DL, VT); + + switch (TLI->getBooleanContents(OpVT)) { + case TargetLowering::ZeroOrOneBooleanContent: + case TargetLowering::UndefinedBooleanContent: + return getConstant(1, DL, VT); + case TargetLowering::ZeroOrNegativeOneBooleanContent: + return getAllOnesConstant(DL, VT); + } + llvm_unreachable("Unexpected boolean content enum!"); +} + SDValue SelectionDAG::getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isT, bool isO) { EVT EltVT = VT.getScalarType(); @@ -1184,7 +1271,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL, return SDValue(N, 0); if (!N) { - N = newSDNode<ConstantSDNode>(isT, isO, Elt, DL.getDebugLoc(), EltVT); + N = newSDNode<ConstantSDNode>(isT, isO, Elt, EltVT); CSEMap.InsertNode(N, IP); InsertNode(N); NewSDValueDbgMsg(SDValue(N, 0), "Creating constant: ", this); @@ -1227,7 +1314,7 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP &V, const SDLoc &DL, return SDValue(N, 0); if (!N) { - N = newSDNode<ConstantFPSDNode>(isTarget, &V, DL.getDebugLoc(), EltVT); + N = newSDNode<ConstantFPSDNode>(isTarget, &V, EltVT); CSEMap.InsertNode(N, IP); InsertNode(N); } @@ -1503,33 +1590,35 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, if (N1.isUndef()) commuteShuffle(N1, N2, MaskVec); - // If shuffling a splat, try to blend the splat instead. We do this here so - // that even when this arises during lowering we don't have to re-handle it. - auto BlendSplat = [&](BuildVectorSDNode *BV, int Offset) { - BitVector UndefElements; - SDValue Splat = BV->getSplatValue(&UndefElements); - if (!Splat) - return; + if (TLI->hasVectorBlend()) { + // If shuffling a splat, try to blend the splat instead. We do this here so + // that even when this arises during lowering we don't have to re-handle it. + auto BlendSplat = [&](BuildVectorSDNode *BV, int Offset) { + BitVector UndefElements; + SDValue Splat = BV->getSplatValue(&UndefElements); + if (!Splat) + return; - for (int i = 0; i < NElts; ++i) { - if (MaskVec[i] < Offset || MaskVec[i] >= (Offset + NElts)) - continue; + for (int i = 0; i < NElts; ++i) { + if (MaskVec[i] < Offset || MaskVec[i] >= (Offset + NElts)) + continue; - // If this input comes from undef, mark it as such. - if (UndefElements[MaskVec[i] - Offset]) { - MaskVec[i] = -1; - continue; - } + // If this input comes from undef, mark it as such. + if (UndefElements[MaskVec[i] - Offset]) { + MaskVec[i] = -1; + continue; + } - // If we can blend a non-undef lane, use that instead. - if (!UndefElements[i]) - MaskVec[i] = i + Offset; - } - }; - if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1)) - BlendSplat(N1BV, 0); - if (auto *N2BV = dyn_cast<BuildVectorSDNode>(N2)) - BlendSplat(N2BV, NElts); + // If we can blend a non-undef lane, use that instead. + if (!UndefElements[i]) + MaskVec[i] = i + Offset; + } + }; + if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1)) + BlendSplat(N1BV, 0); + if (auto *N2BV = dyn_cast<BuildVectorSDNode>(N2)) + BlendSplat(N2BV, NElts); + } // Canonicalize all index into lhs, -> shuffle lhs, undef // Canonicalize all index into rhs, -> shuffle rhs, undef @@ -1643,7 +1732,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, } SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) { - MVT VT = SV.getSimpleValueType(0); + EVT VT = SV.getValueType(0); SmallVector<int, 8> MaskVec(SV.getMask().begin(), SV.getMask().end()); ShuffleVectorSDNode::commuteMask(MaskVec); @@ -1661,6 +1750,7 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) { return SDValue(E, 0); auto *N = newSDNode<RegisterSDNode>(RegNo, VT); + N->SDNodeBits.IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, DA); CSEMap.InsertNode(N, IP); InsertNode(N); return SDValue(N, 0); @@ -1870,19 +1960,15 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2, ISD::CondCode Cond, const SDLoc &dl) { + EVT OpVT = N1.getValueType(); + // These setcc operations always fold. switch (Cond) { default: break; case ISD::SETFALSE: - case ISD::SETFALSE2: return getConstant(0, dl, VT); + case ISD::SETFALSE2: return getBoolConstant(false, dl, VT, OpVT); case ISD::SETTRUE: - case ISD::SETTRUE2: { - TargetLowering::BooleanContent Cnt = - TLI->getBooleanContents(N1->getValueType(0)); - return getConstant( - Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, dl, - VT); - } + case ISD::SETTRUE2: return getBoolConstant(true, dl, VT, OpVT); case ISD::SETOEQ: case ISD::SETOGT: @@ -1905,16 +1991,16 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2, switch (Cond) { default: llvm_unreachable("Unknown integer setcc!"); - case ISD::SETEQ: return getConstant(C1 == C2, dl, VT); - case ISD::SETNE: return getConstant(C1 != C2, dl, VT); - case ISD::SETULT: return getConstant(C1.ult(C2), dl, VT); - case ISD::SETUGT: return getConstant(C1.ugt(C2), dl, VT); - case ISD::SETULE: return getConstant(C1.ule(C2), dl, VT); - case ISD::SETUGE: return getConstant(C1.uge(C2), dl, VT); - case ISD::SETLT: return getConstant(C1.slt(C2), dl, VT); - case ISD::SETGT: return getConstant(C1.sgt(C2), dl, VT); - case ISD::SETLE: return getConstant(C1.sle(C2), dl, VT); - case ISD::SETGE: return getConstant(C1.sge(C2), dl, VT); + case ISD::SETEQ: return getBoolConstant(C1 == C2, dl, VT, OpVT); + case ISD::SETNE: return getBoolConstant(C1 != C2, dl, VT, OpVT); + case ISD::SETULT: return getBoolConstant(C1.ult(C2), dl, VT, OpVT); + case ISD::SETUGT: return getBoolConstant(C1.ugt(C2), dl, VT, OpVT); + case ISD::SETULE: return getBoolConstant(C1.ule(C2), dl, VT, OpVT); + case ISD::SETUGE: return getBoolConstant(C1.uge(C2), dl, VT, OpVT); + case ISD::SETLT: return getBoolConstant(C1.slt(C2), dl, VT, OpVT); + case ISD::SETGT: return getBoolConstant(C1.sgt(C2), dl, VT, OpVT); + case ISD::SETLE: return getBoolConstant(C1.sle(C2), dl, VT, OpVT); + case ISD::SETGE: return getBoolConstant(C1.sge(C2), dl, VT, OpVT); } } } @@ -1926,41 +2012,54 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2, case ISD::SETEQ: if (R==APFloat::cmpUnordered) return getUNDEF(VT); LLVM_FALLTHROUGH; - case ISD::SETOEQ: return getConstant(R==APFloat::cmpEqual, dl, VT); + case ISD::SETOEQ: return getBoolConstant(R==APFloat::cmpEqual, dl, VT, + OpVT); case ISD::SETNE: if (R==APFloat::cmpUnordered) return getUNDEF(VT); LLVM_FALLTHROUGH; - case ISD::SETONE: return getConstant(R==APFloat::cmpGreaterThan || - R==APFloat::cmpLessThan, dl, VT); + case ISD::SETONE: return getBoolConstant(R==APFloat::cmpGreaterThan || + R==APFloat::cmpLessThan, dl, VT, + OpVT); case ISD::SETLT: if (R==APFloat::cmpUnordered) return getUNDEF(VT); LLVM_FALLTHROUGH; - case ISD::SETOLT: return getConstant(R==APFloat::cmpLessThan, dl, VT); + case ISD::SETOLT: return getBoolConstant(R==APFloat::cmpLessThan, dl, VT, + OpVT); case ISD::SETGT: if (R==APFloat::cmpUnordered) return getUNDEF(VT); LLVM_FALLTHROUGH; - case ISD::SETOGT: return getConstant(R==APFloat::cmpGreaterThan, dl, VT); + case ISD::SETOGT: return getBoolConstant(R==APFloat::cmpGreaterThan, dl, + VT, OpVT); case ISD::SETLE: if (R==APFloat::cmpUnordered) return getUNDEF(VT); LLVM_FALLTHROUGH; - case ISD::SETOLE: return getConstant(R==APFloat::cmpLessThan || - R==APFloat::cmpEqual, dl, VT); + case ISD::SETOLE: return getBoolConstant(R==APFloat::cmpLessThan || + R==APFloat::cmpEqual, dl, VT, + OpVT); case ISD::SETGE: if (R==APFloat::cmpUnordered) return getUNDEF(VT); LLVM_FALLTHROUGH; - case ISD::SETOGE: return getConstant(R==APFloat::cmpGreaterThan || - R==APFloat::cmpEqual, dl, VT); - case ISD::SETO: return getConstant(R!=APFloat::cmpUnordered, dl, VT); - case ISD::SETUO: return getConstant(R==APFloat::cmpUnordered, dl, VT); - case ISD::SETUEQ: return getConstant(R==APFloat::cmpUnordered || - R==APFloat::cmpEqual, dl, VT); - case ISD::SETUNE: return getConstant(R!=APFloat::cmpEqual, dl, VT); - case ISD::SETULT: return getConstant(R==APFloat::cmpUnordered || - R==APFloat::cmpLessThan, dl, VT); - case ISD::SETUGT: return getConstant(R==APFloat::cmpGreaterThan || - R==APFloat::cmpUnordered, dl, VT); - case ISD::SETULE: return getConstant(R!=APFloat::cmpGreaterThan, dl, VT); - case ISD::SETUGE: return getConstant(R!=APFloat::cmpLessThan, dl, VT); + case ISD::SETOGE: return getBoolConstant(R==APFloat::cmpGreaterThan || + R==APFloat::cmpEqual, dl, VT, OpVT); + case ISD::SETO: return getBoolConstant(R!=APFloat::cmpUnordered, dl, VT, + OpVT); + case ISD::SETUO: return getBoolConstant(R==APFloat::cmpUnordered, dl, VT, + OpVT); + case ISD::SETUEQ: return getBoolConstant(R==APFloat::cmpUnordered || + R==APFloat::cmpEqual, dl, VT, + OpVT); + case ISD::SETUNE: return getBoolConstant(R!=APFloat::cmpEqual, dl, VT, + OpVT); + case ISD::SETULT: return getBoolConstant(R==APFloat::cmpUnordered || + R==APFloat::cmpLessThan, dl, VT, + OpVT); + case ISD::SETUGT: return getBoolConstant(R==APFloat::cmpGreaterThan || + R==APFloat::cmpUnordered, dl, VT, + OpVT); + case ISD::SETULE: return getBoolConstant(R!=APFloat::cmpGreaterThan, dl, + VT, OpVT); + case ISD::SETUGE: return getBoolConstant(R!=APFloat::cmpLessThan, dl, VT, + OpVT); } } else { // Ensure that the constant occurs on the RHS. @@ -2297,10 +2396,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, break; } - // Support big-endian targets when it becomes useful. bool IsLE = getDataLayout().isLittleEndian(); - if (!IsLE) - break; // Bitcast 'small element' vector to 'large element' scalar/vector. if ((BitWidth % SubBitWidth) == 0) { @@ -2319,8 +2415,9 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, for (unsigned i = 0; i != SubScale; ++i) { computeKnownBits(N0, Known2, SubDemandedElts.shl(i), Depth + 1); - Known.One |= Known2.One.zext(BitWidth).shl(SubBitWidth * i); - Known.Zero |= Known2.Zero.zext(BitWidth).shl(SubBitWidth * i); + unsigned Shifts = IsLE ? i : SubScale - 1 - i; + Known.One |= Known2.One.zext(BitWidth).shl(SubBitWidth * Shifts); + Known.Zero |= Known2.Zero.zext(BitWidth).shl(SubBitWidth * Shifts); } } @@ -2342,7 +2439,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, Known.Zero.setAllBits(); Known.One.setAllBits(); for (unsigned i = 0; i != NumElts; ++i) if (DemandedElts[i]) { - unsigned Offset = (i % SubScale) * BitWidth; + unsigned Shifts = IsLE ? i : NumElts - 1 - i; + unsigned Offset = (Shifts % SubScale) * BitWidth; Known.One &= Known2.One.lshr(Offset).trunc(BitWidth); Known.Zero &= Known2.Zero.lshr(Offset).trunc(BitWidth); // If we don't know any bits, early out. @@ -2441,6 +2539,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, break; case ISD::SMULO: case ISD::UMULO: + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: if (Op.getResNo() != 1) break; // The boolean result conforms to getBooleanContents. @@ -2904,11 +3003,38 @@ void SelectionDAG::computeKnownBits(SDValue Op, KnownBits &Known, } case ISD::SMIN: case ISD::SMAX: { - computeKnownBits(Op.getOperand(0), Known, DemandedElts, - Depth + 1); - // If we don't know any bits, early out. - if (Known.isUnknown()) - break; + // If we have a clamp pattern, we know that the number of sign bits will be + // the minimum of the clamp min/max range. + bool IsMax = (Opcode == ISD::SMAX); + ConstantSDNode *CstLow = nullptr, *CstHigh = nullptr; + if ((CstLow = isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts))) + if (Op.getOperand(0).getOpcode() == (IsMax ? ISD::SMIN : ISD::SMAX)) + CstHigh = isConstOrDemandedConstSplat(Op.getOperand(0).getOperand(1), + DemandedElts); + if (CstLow && CstHigh) { + if (!IsMax) + std::swap(CstLow, CstHigh); + + const APInt &ValueLow = CstLow->getAPIntValue(); + const APInt &ValueHigh = CstHigh->getAPIntValue(); + if (ValueLow.sle(ValueHigh)) { + unsigned LowSignBits = ValueLow.getNumSignBits(); + unsigned HighSignBits = ValueHigh.getNumSignBits(); + unsigned MinSignBits = std::min(LowSignBits, HighSignBits); + if (ValueLow.isNegative() && ValueHigh.isNegative()) { + Known.One.setHighBits(MinSignBits); + break; + } + if (ValueLow.isNonNegative() && ValueHigh.isNonNegative()) { + Known.Zero.setHighBits(MinSignBits); + break; + } + } + } + + // Fallback - just get the shared known bits of the operands. + computeKnownBits(Op.getOperand(0), Known, DemandedElts, Depth + 1); + if (Known.isUnknown()) break; // Early-out computeKnownBits(Op.getOperand(1), Known2, DemandedElts, Depth + 1); Known.Zero &= Known2.Zero; Known.One &= Known2.One; @@ -3038,7 +3164,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, if (!DemandedElts) return 1; // No demanded elts, better to assume we don't know anything. - switch (Op.getOpcode()) { + unsigned Opcode = Op.getOpcode(); + switch (Opcode) { default: break; case ISD::AssertSext: Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits(); @@ -3189,7 +3316,32 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return std::min(Tmp, Tmp2); case ISD::SMIN: - case ISD::SMAX: + case ISD::SMAX: { + // If we have a clamp pattern, we know that the number of sign bits will be + // the minimum of the clamp min/max range. + bool IsMax = (Opcode == ISD::SMAX); + ConstantSDNode *CstLow = nullptr, *CstHigh = nullptr; + if ((CstLow = isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts))) + if (Op.getOperand(0).getOpcode() == (IsMax ? ISD::SMIN : ISD::SMAX)) + CstHigh = isConstOrDemandedConstSplat(Op.getOperand(0).getOperand(1), + DemandedElts); + if (CstLow && CstHigh) { + if (!IsMax) + std::swap(CstLow, CstHigh); + if (CstLow->getAPIntValue().sle(CstHigh->getAPIntValue())) { + Tmp = CstLow->getAPIntValue().getNumSignBits(); + Tmp2 = CstHigh->getAPIntValue().getNumSignBits(); + return std::min(Tmp, Tmp2); + } + } + + // Fallback - just get the minimum number of sign bits of the operands. + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1); + if (Tmp == 1) + return 1; // Early out. + Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth + 1); + return std::min(Tmp, Tmp2); + } case ISD::UMIN: case ISD::UMAX: Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1); @@ -3225,7 +3377,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, unsigned RotAmt = C->getAPIntValue().urem(VTBits); // Handle rotate right by N like a rotate left by 32-N. - if (Op.getOpcode() == ISD::ROTR) + if (Opcode == ISD::ROTR) RotAmt = (VTBits - RotAmt) % VTBits; // If we aren't rotating out all of the known-in sign bits, return the @@ -3423,10 +3575,10 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, } // Allow the target to implement this method for its nodes. - if (Op.getOpcode() >= ISD::BUILTIN_OP_END || - Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || - Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || - Op.getOpcode() == ISD::INTRINSIC_VOID) { + if (Opcode >= ISD::BUILTIN_OP_END || + Opcode == ISD::INTRINSIC_WO_CHAIN || + Opcode == ISD::INTRINSIC_W_CHAIN || + Opcode == ISD::INTRINSIC_VOID) { unsigned NumBits = TLI->ComputeNumSignBitsForTargetNode(Op, DemandedElts, *this, Depth); if (NumBits > 1) @@ -3487,17 +3639,33 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op) const { return false; } -bool SelectionDAG::isKnownNeverZero(SDValue Op) const { +bool SelectionDAG::isKnownNeverZeroFloat(SDValue Op) const { + assert(Op.getValueType().isFloatingPoint() && + "Floating point type expected"); + // If the value is a constant, we can obviously see if it is a zero or not. + // TODO: Add BuildVector support. if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op)) return !C->isZero(); + return false; +} + +bool SelectionDAG::isKnownNeverZero(SDValue Op) const { + assert(!Op.getValueType().isFloatingPoint() && + "Floating point types unsupported - use isKnownNeverZeroFloat"); + + // If the value is a constant, we can obviously see if it is a zero or not. + if (ISD::matchUnaryPredicate( + Op, [](ConstantSDNode *C) { return !C->isNullValue(); })) + return true; // TODO: Recognize more cases here. switch (Op.getOpcode()) { default: break; case ISD::OR: - if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) - return !C->isNullValue(); + if (isKnownNeverZero(Op.getOperand(1)) || + isKnownNeverZero(Op.getOperand(0))) + return true; break; } @@ -3517,6 +3685,8 @@ bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const { return false; } +// FIXME: unify with llvm::haveNoCommonBitsSet. +// FIXME: could also handle masked merge pattern (X & ~M) op (Y & M) bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const { assert(A.getValueType() == B.getValueType() && "Values must have the same type"); @@ -3841,11 +4011,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, else if (OpOpcode == ISD::UNDEF) return getUNDEF(VT); - // (ext (trunx x)) -> x + // (ext (trunc x)) -> x if (OpOpcode == ISD::TRUNCATE) { SDValue OpOp = Operand.getOperand(0); - if (OpOp.getValueType() == VT) + if (OpOp.getValueType() == VT) { + transferDbgValues(Operand, OpOp); return OpOp; + } } break; case ISD::TRUNCATE: @@ -3921,10 +4093,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, break; case ISD::FNEG: // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 - if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB) - // FIXME: FNEG has no fast-math-flags to propagate; use the FSUB's flags? + if ((getTarget().Options.UnsafeFPMath || Flags.hasNoSignedZeros()) && + OpOpcode == ISD::FSUB) return getNode(ISD::FSUB, DL, VT, Operand.getOperand(1), - Operand.getOperand(0), Operand.getNode()->getFlags()); + Operand.getOperand(0), Flags); if (OpOpcode == ISD::FNEG) // --X -> X return Operand.getOperand(0); break; @@ -4314,24 +4486,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::FMUL: case ISD::FDIV: case ISD::FREM: - if (getTarget().Options.UnsafeFPMath) { - if (Opcode == ISD::FADD) { - // x+0 --> x - if (N2CFP && N2CFP->getValueAPF().isZero()) - return N1; - } else if (Opcode == ISD::FSUB) { - // x-0 --> x - if (N2CFP && N2CFP->getValueAPF().isZero()) - return N1; - } else if (Opcode == ISD::FMUL) { - // x*0 --> 0 - if (N2CFP && N2CFP->isZero()) - return N2; - // x*1 --> x - if (N2CFP && N2CFP->isExactlyValue(1.0)) - return N1; - } - } assert(VT.isFloatingPoint() && "This operator only applies to FP types!"); assert(N1.getValueType() == N2.getValueType() && N1.getValueType() == VT && "Binary operator types must match!"); @@ -4448,12 +4602,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, break; } case ISD::EXTRACT_VECTOR_ELT: + assert(VT.getSizeInBits() >= N1.getValueType().getScalarSizeInBits() && + "The result of EXTRACT_VECTOR_ELT must be at least as wide as the \ + element type of the vector."); + // EXTRACT_VECTOR_ELT of an UNDEF is an UNDEF. if (N1.isUndef()) return getUNDEF(VT); // EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF - if (N2C && N2C->getZExtValue() >= N1.getValueType().getVectorNumElements()) + if (N2C && N2C->getAPIntValue().uge(N1.getValueType().getVectorNumElements())) return getUNDEF(VT); // EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is @@ -4635,6 +4793,18 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, } } + // Any FP binop with an undef operand is folded to NaN. This matches the + // behavior of the IR optimizer. + switch (Opcode) { + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FDIV: + case ISD::FREM: + if (N1.isUndef() || N2.isUndef()) + return getConstantFP(APFloat::getNaN(EVTToAPFloatSemantics(VT)), DL, VT); + } + // Canonicalize an UNDEF to the RHS, even over a constant. if (N1.isUndef()) { if (TLI->isCommutativeBinOp(Opcode)) { @@ -4644,22 +4814,15 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::FP_ROUND_INREG: case ISD::SIGN_EXTEND_INREG: case ISD::SUB: - case ISD::FSUB: - case ISD::FDIV: - case ISD::FREM: - case ISD::SRA: - return N1; // fold op(undef, arg2) -> undef + return getUNDEF(VT); // fold op(undef, arg2) -> undef case ISD::UDIV: case ISD::SDIV: case ISD::UREM: case ISD::SREM: + case ISD::SRA: case ISD::SRL: case ISD::SHL: - if (!VT.isVector()) - return getConstant(0, DL, VT); // fold op(undef, arg2) -> 0 - // For vectors, we can't easily build an all zero vector, just return - // the LHS. - return N2; + return getConstant(0, DL, VT); // fold op(undef, arg2) -> 0 } } } @@ -4681,32 +4844,15 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::SDIV: case ISD::UREM: case ISD::SREM: - return N2; // fold op(arg1, undef) -> undef - case ISD::FADD: - case ISD::FSUB: - case ISD::FMUL: - case ISD::FDIV: - case ISD::FREM: - if (getTarget().Options.UnsafeFPMath) - return N2; - break; - case ISD::MUL: - case ISD::AND: + case ISD::SRA: case ISD::SRL: case ISD::SHL: - if (!VT.isVector()) - return getConstant(0, DL, VT); // fold op(arg1, undef) -> 0 - // For vectors, we can't easily build an all zero vector, just return - // the LHS. - return N1; + return getUNDEF(VT); // fold op(arg1, undef) -> undef + case ISD::MUL: + case ISD::AND: + return getConstant(0, DL, VT); // fold op(arg1, undef) -> 0 case ISD::OR: - if (!VT.isVector()) - return getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL, VT); - // For vectors, we can't easily build an all one vector, just return - // the LHS. - return N1; - case ISD::SRA: - return N1; + return getAllOnesConstant(DL, VT); } } @@ -4739,10 +4885,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, } SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, - SDValue N1, SDValue N2, SDValue N3) { + SDValue N1, SDValue N2, SDValue N3, + const SDNodeFlags Flags) { // Perform various simplifications. switch (Opcode) { case ISD::FMA: { + assert(VT.isFloatingPoint() && "This operator only applies to FP types!"); + assert(N1.getValueType() == VT && N2.getValueType() == VT && + N3.getValueType() == VT && "FMA types must match!"); ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2); ConstantFPSDNode *N3CFP = dyn_cast<ConstantFPSDNode>(N3); @@ -4833,10 +4983,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTs, Ops); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) { + E->intersectFlagsWith(Flags); return SDValue(E, 0); + } N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs); + N->setFlags(Flags); createOperands(N, Ops); CSEMap.InsertNode(N, IP); } else { @@ -5107,6 +5260,31 @@ static bool shouldLowerMemFuncForSize(const MachineFunction &MF) { return MF.getFunction().optForSize(); } +static void chainLoadsAndStoresForMemcpy(SelectionDAG &DAG, const SDLoc &dl, + SmallVector<SDValue, 32> &OutChains, unsigned From, + unsigned To, SmallVector<SDValue, 16> &OutLoadChains, + SmallVector<SDValue, 16> &OutStoreChains) { + assert(OutLoadChains.size() && "Missing loads in memcpy inlining"); + assert(OutStoreChains.size() && "Missing stores in memcpy inlining"); + SmallVector<SDValue, 16> GluedLoadChains; + for (unsigned i = From; i < To; ++i) { + OutChains.push_back(OutLoadChains[i]); + GluedLoadChains.push_back(OutLoadChains[i]); + } + + // Chain for all loads. + SDValue LoadToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + GluedLoadChains); + + for (unsigned i = From; i < To; ++i) { + StoreSDNode *ST = dyn_cast<StoreSDNode>(OutStoreChains[i]); + SDValue NewStore = DAG.getTruncStore(LoadToken, dl, ST->getValue(), + ST->getBasePtr(), ST->getMemoryVT(), + ST->getMemOperand()); + OutChains.push_back(NewStore); + } +} + static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, unsigned Align, @@ -5171,7 +5349,9 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, MachineMemOperand::Flags MMOFlags = isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone; - SmallVector<SDValue, 8> OutChains; + SmallVector<SDValue, 16> OutLoadChains; + SmallVector<SDValue, 16> OutStoreChains; + SmallVector<SDValue, 32> OutChains; unsigned NumMemOps = MemOps.size(); uint64_t SrcOff = 0, DstOff = 0; for (unsigned i = 0; i != NumMemOps; ++i) { @@ -5205,11 +5385,13 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, SubSlice.Length = VTSize; } Value = getMemsetStringVal(VT, dl, DAG, TLI, SubSlice); - if (Value.getNode()) + if (Value.getNode()) { Store = DAG.getStore(Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl), DstPtrInfo.getWithOffset(DstOff), Align, MMOFlags); + OutChains.push_back(Store); + } } if (!Store.getNode()) { @@ -5231,17 +5413,61 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, DAG.getMemBasePlusOffset(Src, SrcOff, dl), SrcPtrInfo.getWithOffset(SrcOff), VT, MinAlign(SrcAlign, SrcOff), SrcMMOFlags); - OutChains.push_back(Value.getValue(1)); + OutLoadChains.push_back(Value.getValue(1)); + Store = DAG.getTruncStore( Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl), DstPtrInfo.getWithOffset(DstOff), VT, Align, MMOFlags); + OutStoreChains.push_back(Store); } - OutChains.push_back(Store); SrcOff += VTSize; DstOff += VTSize; Size -= VTSize; } + unsigned GluedLdStLimit = MaxLdStGlue == 0 ? + TLI.getMaxGluedStoresPerMemcpy() : MaxLdStGlue; + unsigned NumLdStInMemcpy = OutStoreChains.size(); + + if (NumLdStInMemcpy) { + // It may be that memcpy might be converted to memset if it's memcpy + // of constants. In such a case, we won't have loads and stores, but + // just stores. In the absence of loads, there is nothing to gang up. + if ((GluedLdStLimit <= 1) || !EnableMemCpyDAGOpt) { + // If target does not care, just leave as it. + for (unsigned i = 0; i < NumLdStInMemcpy; ++i) { + OutChains.push_back(OutLoadChains[i]); + OutChains.push_back(OutStoreChains[i]); + } + } else { + // Ld/St less than/equal limit set by target. + if (NumLdStInMemcpy <= GluedLdStLimit) { + chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, 0, + NumLdStInMemcpy, OutLoadChains, + OutStoreChains); + } else { + unsigned NumberLdChain = NumLdStInMemcpy / GluedLdStLimit; + unsigned RemainingLdStInMemcpy = NumLdStInMemcpy % GluedLdStLimit; + unsigned GlueIter = 0; + + for (unsigned cnt = 0; cnt < NumberLdChain; ++cnt) { + unsigned IndexFrom = NumLdStInMemcpy - GlueIter - GluedLdStLimit; + unsigned IndexTo = NumLdStInMemcpy - GlueIter; + + chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, IndexFrom, IndexTo, + OutLoadChains, OutStoreChains); + GlueIter += GluedLdStLimit; + } + + // Residual ld/st. + if (RemainingLdStInMemcpy) { + chainLoadsAndStoresForMemcpy(DAG, dl, OutChains, 0, + RemainingLdStInMemcpy, OutLoadChains, + OutStoreChains); + } + } + } + } return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); } @@ -5334,7 +5560,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); } -/// \brief Lower the call to 'memset' intrinsic function into a series of store +/// Lower the call to 'memset' intrinsic function into a series of store /// operations. /// /// \param DAG Selection DAG where lowered code is placed. @@ -5518,6 +5744,47 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, return CallResult.second; } +SDValue SelectionDAG::getAtomicMemcpy(SDValue Chain, const SDLoc &dl, + SDValue Dst, unsigned DstAlign, + SDValue Src, unsigned SrcAlign, + SDValue Size, Type *SizeTy, + unsigned ElemSz, bool isTailCall, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) { + // Emit a library call. + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = getDataLayout().getIntPtrType(*getContext()); + Entry.Node = Dst; + Args.push_back(Entry); + + Entry.Node = Src; + Args.push_back(Entry); + + Entry.Ty = SizeTy; + Entry.Node = Size; + Args.push_back(Entry); + + RTLIB::Libcall LibraryCall = + RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(ElemSz); + if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) + report_fatal_error("Unsupported element size"); + + TargetLowering::CallLoweringInfo CLI(*this); + CLI.setDebugLoc(dl) + .setChain(Chain) + .setLibCallee(TLI->getLibcallCallingConv(LibraryCall), + Type::getVoidTy(*getContext()), + getExternalSymbol(TLI->getLibcallName(LibraryCall), + TLI->getPointerTy(getDataLayout())), + std::move(Args)) + .setDiscardResult() + .setTailCall(isTailCall); + + std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI); + return CallResult.second; +} + SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, bool isTailCall, @@ -5579,6 +5846,47 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, return CallResult.second; } +SDValue SelectionDAG::getAtomicMemmove(SDValue Chain, const SDLoc &dl, + SDValue Dst, unsigned DstAlign, + SDValue Src, unsigned SrcAlign, + SDValue Size, Type *SizeTy, + unsigned ElemSz, bool isTailCall, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) { + // Emit a library call. + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = getDataLayout().getIntPtrType(*getContext()); + Entry.Node = Dst; + Args.push_back(Entry); + + Entry.Node = Src; + Args.push_back(Entry); + + Entry.Ty = SizeTy; + Entry.Node = Size; + Args.push_back(Entry); + + RTLIB::Libcall LibraryCall = + RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(ElemSz); + if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) + report_fatal_error("Unsupported element size"); + + TargetLowering::CallLoweringInfo CLI(*this); + CLI.setDebugLoc(dl) + .setChain(Chain) + .setLibCallee(TLI->getLibcallCallingConv(LibraryCall), + Type::getVoidTy(*getContext()), + getExternalSymbol(TLI->getLibcallName(LibraryCall), + TLI->getPointerTy(getDataLayout())), + std::move(Args)) + .setDiscardResult() + .setTailCall(isTailCall); + + std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI); + return CallResult.second; +} + SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, bool isTailCall, @@ -5641,6 +5949,46 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, return CallResult.second; } +SDValue SelectionDAG::getAtomicMemset(SDValue Chain, const SDLoc &dl, + SDValue Dst, unsigned DstAlign, + SDValue Value, SDValue Size, Type *SizeTy, + unsigned ElemSz, bool isTailCall, + MachinePointerInfo DstPtrInfo) { + // Emit a library call. + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = getDataLayout().getIntPtrType(*getContext()); + Entry.Node = Dst; + Args.push_back(Entry); + + Entry.Ty = Type::getInt8Ty(*getContext()); + Entry.Node = Value; + Args.push_back(Entry); + + Entry.Ty = SizeTy; + Entry.Node = Size; + Args.push_back(Entry); + + RTLIB::Libcall LibraryCall = + RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(ElemSz); + if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) + report_fatal_error("Unsupported element size"); + + TargetLowering::CallLoweringInfo CLI(*this); + CLI.setDebugLoc(dl) + .setChain(Chain) + .setLibCallee(TLI->getLibcallCallingConv(LibraryCall), + Type::getVoidTy(*getContext()), + getExternalSymbol(TLI->getLibcallName(LibraryCall), + TLI->getPointerTy(getDataLayout())), + std::move(Args)) + .setDiscardResult() + .setTailCall(isTailCall); + + std::pair<SDValue, SDValue> CallResult = TLI->LowerCallTo(CLI); + return CallResult.second; +} + SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDVTList VTList, ArrayRef<SDValue> Ops, MachineMemOperand *MMO) { @@ -5736,6 +6084,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, assert((Opcode == ISD::ATOMIC_LOAD_ADD || Opcode == ISD::ATOMIC_LOAD_SUB || Opcode == ISD::ATOMIC_LOAD_AND || + Opcode == ISD::ATOMIC_LOAD_CLR || Opcode == ISD::ATOMIC_LOAD_OR || Opcode == ISD::ATOMIC_LOAD_XOR || Opcode == ISD::ATOMIC_LOAD_NAND || @@ -6207,7 +6556,7 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef<SDValue> Ops, MachineMemOperand *MMO) { - assert(Ops.size() == 5 && "Incompatible number of operands"); + assert(Ops.size() == 6 && "Incompatible number of operands"); FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops); @@ -6233,6 +6582,9 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, assert(N->getIndex().getValueType().getVectorNumElements() == N->getValueType(0).getVectorNumElements() && "Vector width mismatch between index and data"); + assert(isa<ConstantSDNode>(N->getScale()) && + cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() && + "Scale should be a constant power of 2"); CSEMap.InsertNode(N, IP); InsertNode(N); @@ -6244,7 +6596,7 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef<SDValue> Ops, MachineMemOperand *MMO) { - assert(Ops.size() == 5 && "Incompatible number of operands"); + assert(Ops.size() == 6 && "Incompatible number of operands"); FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MSCATTER, VTs, Ops); @@ -6267,6 +6619,9 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl, assert(N->getIndex().getValueType().getVectorNumElements() == N->getValue().getValueType().getVectorNumElements() && "Vector width mismatch between index and data"); + assert(isa<ConstantSDNode>(N->getScale()) && + cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() && + "Scale should be a constant power of 2"); CSEMap.InsertNode(N, IP); InsertNode(N); @@ -6558,6 +6913,7 @@ SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op) { // Now we update the operands. N->OperandList[0].set(Op); + updateDivergence(N); // If this gets put into a CSE map, add it. if (InsertPos) CSEMap.InsertNode(N, InsertPos); return N; @@ -6586,6 +6942,7 @@ SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2) { if (N->OperandList[1] != Op2) N->OperandList[1].set(Op2); + updateDivergence(N); // If this gets put into a CSE map, add it. if (InsertPos) CSEMap.InsertNode(N, InsertPos); return N; @@ -6636,6 +6993,7 @@ UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops) { if (N->OperandList[i] != Ops[i]) N->OperandList[i].set(Ops[i]); + updateDivergence(N); // If this gets put into a CSE map, add it. if (InsertPos) CSEMap.InsertNode(N, InsertPos); return N; @@ -7061,11 +7419,24 @@ SDDbgValue *SelectionDAG::getConstantDbgValue(DIVariable *Var, /// FrameIndex SDDbgValue *SelectionDAG::getFrameIndexDbgValue(DIVariable *Var, DIExpression *Expr, unsigned FI, + bool IsIndirect, const DebugLoc &DL, unsigned O) { assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); - return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, FI, DL, O); + return new (DbgInfo->getAlloc()) + SDDbgValue(Var, Expr, FI, IsIndirect, DL, O, SDDbgValue::FRAMEIX); +} + +/// VReg +SDDbgValue *SelectionDAG::getVRegDbgValue(DIVariable *Var, + DIExpression *Expr, + unsigned VReg, bool IsIndirect, + const DebugLoc &DL, unsigned O) { + assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); + return new (DbgInfo->getAlloc()) + SDDbgValue(Var, Expr, VReg, IsIndirect, DL, O, SDDbgValue::VREG); } void SelectionDAG::transferDbgValues(SDValue From, SDValue To, @@ -7155,8 +7526,9 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) { DV->isIndirect(), DV->getDebugLoc(), DV->getOrder()); ClonedDVs.push_back(Clone); DV->setIsInvalidated(); - DEBUG(dbgs() << "SALVAGE: Rewriting"; N0.getNode()->dumprFull(this); - dbgs() << " into " << *DIExpr << '\n'); + LLVM_DEBUG(dbgs() << "SALVAGE: Rewriting"; + N0.getNode()->dumprFull(this); + dbgs() << " into " << *DIExpr << '\n'); } } } @@ -7165,6 +7537,14 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) { AddDbgValue(Dbg, Dbg->getSDNode(), false); } +/// Creates a SDDbgLabel node. +SDDbgLabel *SelectionDAG::getDbgLabel(DILabel *Label, + const DebugLoc &DL, unsigned O) { + assert(cast<DILabel>(Label)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); + return new (DbgInfo->getAlloc()) SDDbgLabel(Label, DL, O); +} + namespace { /// RAUWUpdateListener - Helper for ReplaceAllUsesWith - When the node @@ -7227,8 +7607,9 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) { SDUse &Use = UI.getUse(); ++UI; Use.set(To); + if (To->isDivergent() != From->isDivergent()) + updateDivergence(User); } while (UI != UE && *UI == User); - // Now that we have modified User, add it back to the CSE maps. If it // already exists there, recursively merge the results together. AddModifiedNodeToCSEMaps(User); @@ -7282,6 +7663,8 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To) { SDUse &Use = UI.getUse(); ++UI; Use.setNode(To); + if (To->isDivergent() != From->isDivergent()) + updateDivergence(User); } while (UI != UE && *UI == User); // Now that we have modified User, add it back to the CSE maps. If it @@ -7326,8 +7709,9 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) { const SDValue &ToOp = To[Use.getResNo()]; ++UI; Use.set(ToOp); + if (To->getNode()->isDivergent() != From->isDivergent()) + updateDivergence(User); } while (UI != UE && *UI == User); - // Now that we have modified User, add it back to the CSE maps. If it // already exists there, recursively merge the results together. AddModifiedNodeToCSEMaps(User); @@ -7385,8 +7769,9 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){ ++UI; Use.set(To); + if (To->isDivergent() != From->isDivergent()) + updateDivergence(User); } while (UI != UE && *UI == User); - // We are iterating over all uses of the From node, so if a use // doesn't use the specific value, no changes are made. if (!UserRemovedFromCSEMaps) @@ -7419,6 +7804,72 @@ namespace { } // end anonymous namespace +void SelectionDAG::updateDivergence(SDNode * N) +{ + if (TLI->isSDNodeAlwaysUniform(N)) + return; + bool IsDivergent = TLI->isSDNodeSourceOfDivergence(N, FLI, DA); + for (auto &Op : N->ops()) { + if (Op.Val.getValueType() != MVT::Other) + IsDivergent |= Op.getNode()->isDivergent(); + } + if (N->SDNodeBits.IsDivergent != IsDivergent) { + N->SDNodeBits.IsDivergent = IsDivergent; + for (auto U : N->uses()) { + updateDivergence(U); + } + } +} + + +void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode*>& Order) { + DenseMap<SDNode *, unsigned> Degree; + Order.reserve(AllNodes.size()); + for (auto & N : allnodes()) { + unsigned NOps = N.getNumOperands(); + Degree[&N] = NOps; + if (0 == NOps) + Order.push_back(&N); + } + for (std::vector<SDNode *>::iterator I = Order.begin(); + I!=Order.end();++I) { + SDNode * N = *I; + for (auto U : N->uses()) { + unsigned &UnsortedOps = Degree[U]; + if (0 == --UnsortedOps) + Order.push_back(U); + } + } +} + +void SelectionDAG::VerifyDAGDiverence() +{ + std::vector<SDNode*> TopoOrder; + CreateTopologicalOrder(TopoOrder); + const TargetLowering &TLI = getTargetLoweringInfo(); + DenseMap<const SDNode *, bool> DivergenceMap; + for (auto &N : allnodes()) { + DivergenceMap[&N] = false; + } + for (auto N : TopoOrder) { + bool IsDivergent = DivergenceMap[N]; + bool IsSDNodeDivergent = TLI.isSDNodeSourceOfDivergence(N, FLI, DA); + for (auto &Op : N->ops()) { + if (Op.Val.getValueType() != MVT::Other) + IsSDNodeDivergent |= DivergenceMap[Op.getNode()]; + } + if (!IsDivergent && IsSDNodeDivergent && !TLI.isSDNodeAlwaysUniform(N)) { + DivergenceMap[N] = true; + } + } + for (auto &N : allnodes()) { + (void)N; + assert(DivergenceMap[&N] == N.isDivergent() && + "Divergence bit inconsistency detected\n"); + } +} + + /// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving /// uses of other values produced by From.getNode() alone. The same value /// may appear in both the From and To list. The Deleted vector is @@ -7450,7 +7901,7 @@ void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From, } // Sort the uses, so that all the uses from a given User are together. - std::sort(Uses.begin(), Uses.end()); + llvm::sort(Uses.begin(), Uses.end()); for (unsigned UseIndex = 0, UseIndexEnd = Uses.size(); UseIndex != UseIndexEnd; ) { @@ -7579,6 +8030,10 @@ void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) { DbgInfo->add(DB, SD, isParameter); } +void SelectionDAG::AddDbgLabel(SDDbgLabel *DB) { + DbgInfo->add(DB); +} + SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, SDValue NewMemOp) { assert(isa<MemSDNode>(NewMemOp.getNode()) && "Expected a memop node"); @@ -7947,11 +8402,8 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD, if (VT.getSizeInBits() / 8 != Bytes) return false; - SDValue Loc = LD->getOperand(1); - SDValue BaseLoc = Base->getOperand(1); - - auto BaseLocDecomp = BaseIndexOffset::match(BaseLoc, *this); - auto LocDecomp = BaseIndexOffset::match(Loc, *this); + auto BaseLocDecomp = BaseIndexOffset::match(Base, *this); + auto LocDecomp = BaseIndexOffset::match(LD, *this); int64_t Offset = 0; if (BaseLocDecomp.equalBaseIndex(LocDecomp, *this, Offset)) @@ -7966,8 +8418,8 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { const GlobalValue *GV; int64_t GVOffset = 0; if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { - unsigned PtrWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType()); - KnownBits Known(PtrWidth); + unsigned IdxWidth = getDataLayout().getIndexTypeSizeInBits(GV->getType()); + KnownBits Known(IdxWidth); llvm::computeKnownBits(GV, Known, getDataLayout()); unsigned AlignBits = Known.countMinTrailingZeros(); unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0; @@ -8201,7 +8653,7 @@ bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) { return true; } -// \brief Returns the SDNode if it is a constant integer BuildVector +// Returns the SDNode if it is a constant integer BuildVector // or constant integer. SDNode *SelectionDAG::isConstantIntBuildVectorOrConstantInt(SDValue N) { if (isa<ConstantSDNode>(N)) @@ -8227,6 +8679,26 @@ SDNode *SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) { return nullptr; } +void SelectionDAG::createOperands(SDNode *Node, ArrayRef<SDValue> Vals) { + assert(!Node->OperandList && "Node already has operands"); + SDUse *Ops = OperandRecycler.allocate( + ArrayRecycler<SDUse>::Capacity::get(Vals.size()), OperandAllocator); + + bool IsDivergent = false; + for (unsigned I = 0; I != Vals.size(); ++I) { + Ops[I].setUser(Node); + Ops[I].setInitial(Vals[I]); + if (Ops[I].Val.getValueType() != MVT::Other) // Skip Chain. It does not carry divergence. + IsDivergent = IsDivergent || Ops[I].getNode()->isDivergent(); + } + Node->NumOperands = Vals.size(); + Node->OperandList = Ops; + IsDivergent |= TLI->isSDNodeSourceOfDivergence(Node, FLI, DA); + if (!TLI->isSDNodeAlwaysUniform(Node)) + Node->SDNodeBits.IsDivergent = IsDivergent; + checkForCycles(Node); +} + #ifndef NDEBUG static void checkForCyclesHelper(const SDNode *N, SmallPtrSetImpl<const SDNode*> &Visited, diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp index d5980919d03c..c859f16e74fe 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -21,6 +21,9 @@ using namespace llvm; bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other, const SelectionDAG &DAG, int64_t &Off) { + // Conservatively fail if we a match failed.. + if (!Base.getNode() || !Other.Base.getNode()) + return false; // Initial Offset difference. Off = Other.Offset - Offset; @@ -72,24 +75,67 @@ bool BaseIndexOffset::equalBaseIndex(BaseIndexOffset &Other, } /// Parses tree in Ptr for base, index, offset addresses. -BaseIndexOffset BaseIndexOffset::match(SDValue Ptr, const SelectionDAG &DAG) { +BaseIndexOffset BaseIndexOffset::match(LSBaseSDNode *N, + const SelectionDAG &DAG) { + SDValue Ptr = N->getBasePtr(); + // (((B + I*M) + c)) + c ... SDValue Base = DAG.getTargetLoweringInfo().unwrapAddress(Ptr); SDValue Index = SDValue(); int64_t Offset = 0; bool IsIndexSignExt = false; + // pre-inc/pre-dec ops are components of EA. + if (N->getAddressingMode() == ISD::PRE_INC) { + if (auto *C = dyn_cast<ConstantSDNode>(N->getOffset())) + Offset += C->getSExtValue(); + else // If unknown, give up now. + return BaseIndexOffset(SDValue(), SDValue(), 0, false); + } else if (N->getAddressingMode() == ISD::PRE_DEC) { + if (auto *C = dyn_cast<ConstantSDNode>(N->getOffset())) + Offset -= C->getSExtValue(); + else // If unknown, give up now. + return BaseIndexOffset(SDValue(), SDValue(), 0, false); + } + // Consume constant adds & ors with appropriate masking. - while (Base->getOpcode() == ISD::ADD || Base->getOpcode() == ISD::OR) { - if (auto *C = dyn_cast<ConstantSDNode>(Base->getOperand(1))) { + while (true) { + switch (Base->getOpcode()) { + case ISD::OR: // Only consider ORs which act as adds. - if (Base->getOpcode() == ISD::OR && - !DAG.MaskedValueIsZero(Base->getOperand(0), C->getAPIntValue())) - break; - Offset += C->getSExtValue(); - Base = Base->getOperand(0); - continue; + if (auto *C = dyn_cast<ConstantSDNode>(Base->getOperand(1))) + if (DAG.MaskedValueIsZero(Base->getOperand(0), C->getAPIntValue())) { + Offset += C->getSExtValue(); + Base = Base->getOperand(0); + continue; + } + break; + case ISD::ADD: + if (auto *C = dyn_cast<ConstantSDNode>(Base->getOperand(1))) { + Offset += C->getSExtValue(); + Base = Base->getOperand(0); + continue; + } + break; + case ISD::LOAD: + case ISD::STORE: { + auto *LSBase = cast<LSBaseSDNode>(Base.getNode()); + unsigned int IndexResNo = (Base->getOpcode() == ISD::LOAD) ? 1 : 0; + if (LSBase->isIndexed() && Base.getResNo() == IndexResNo) + if (auto *C = dyn_cast<ConstantSDNode>(LSBase->getOffset())) { + auto Off = C->getSExtValue(); + if (LSBase->getAddressingMode() == ISD::PRE_DEC || + LSBase->getAddressingMode() == ISD::POST_DEC) + Offset -= Off; + else + Offset += Off; + Base = LSBase->getBasePtr(); + continue; + } + break; + } } + // If we get here break out of the loop. break; } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 71cb8cb78f6d..1aa8df29af3b 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "SelectionDAGBuilder.h" +#include "SDNodeDbgValue.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" @@ -49,7 +50,6 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" @@ -102,6 +102,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetIntrinsicInfo.h" @@ -777,8 +778,8 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, EVT ValueVT = ValueVTs[Value]; unsigned NumRegs = RegCount[Value]; MVT RegisterVT = IsABIMangled - ? TLI.getRegisterTypeForCallingConv(RegVTs[Value]) - : RegVTs[Value]; + ? TLI.getRegisterTypeForCallingConv(*DAG.getContext(), RegVTs[Value]) + : RegVTs[Value]; Parts.resize(NumRegs); for (unsigned i = 0; i != NumRegs; ++i) { @@ -818,32 +819,15 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, // FIXME: We capture more information than the dag can represent. For // now, just use the tightest assertzext/assertsext possible. - bool isSExt = true; + bool isSExt; EVT FromVT(MVT::Other); - if (NumSignBits == RegSize) { - isSExt = true; // ASSERT SEXT 1 - FromVT = MVT::i1; - } else if (NumZeroBits >= RegSize - 1) { - isSExt = false; // ASSERT ZEXT 1 - FromVT = MVT::i1; - } else if (NumSignBits > RegSize - 8) { - isSExt = true; // ASSERT SEXT 8 - FromVT = MVT::i8; - } else if (NumZeroBits >= RegSize - 8) { - isSExt = false; // ASSERT ZEXT 8 - FromVT = MVT::i8; - } else if (NumSignBits > RegSize - 16) { - isSExt = true; // ASSERT SEXT 16 - FromVT = MVT::i16; - } else if (NumZeroBits >= RegSize - 16) { - isSExt = false; // ASSERT ZEXT 16 - FromVT = MVT::i16; - } else if (NumSignBits > RegSize - 32) { - isSExt = true; // ASSERT SEXT 32 - FromVT = MVT::i32; - } else if (NumZeroBits >= RegSize - 32) { - isSExt = false; // ASSERT ZEXT 32 - FromVT = MVT::i32; + if (NumZeroBits) { + FromVT = EVT::getIntegerVT(*DAG.getContext(), RegSize - NumZeroBits); + isSExt = false; + } else if (NumSignBits > 1) { + FromVT = + EVT::getIntegerVT(*DAG.getContext(), RegSize - NumSignBits + 1); + isSExt = true; } else { continue; } @@ -876,8 +860,8 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, unsigned NumParts = RegCount[Value]; MVT RegisterVT = IsABIMangled - ? TLI.getRegisterTypeForCallingConv(RegVTs[Value]) - : RegVTs[Value]; + ? TLI.getRegisterTypeForCallingConv(*DAG.getContext(), RegVTs[Value]) + : RegVTs[Value]; if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT)) ExtendKind = ISD::ZERO_EXTEND; @@ -970,6 +954,20 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, } } +SmallVector<std::pair<unsigned, unsigned>, 4> +RegsForValue::getRegsAndSizes() const { + SmallVector<std::pair<unsigned, unsigned>, 4> OutVec; + unsigned I = 0; + for (auto CountAndVT : zip_first(RegCount, RegVTs)) { + unsigned RegCount = std::get<0>(CountAndVT); + MVT RegisterVT = std::get<1>(CountAndVT); + unsigned RegisterSize = RegisterVT.getSizeInBits(); + for (unsigned E = I + RegCount; I != E; ++I) + OutVec.push_back(std::make_pair(Regs[I], RegisterSize)); + } + return OutVec; +} + void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis *aa, const TargetLibraryInfo *li) { AA = aa; @@ -1054,6 +1052,22 @@ void SelectionDAGBuilder::visit(const Instruction &I) { visit(I.getOpcode(), I); + if (auto *FPMO = dyn_cast<FPMathOperator>(&I)) { + // Propagate the fast-math-flags of this IR instruction to the DAG node that + // maps to this instruction. + // TODO: We could handle all flags (nsw, etc) here. + // TODO: If an IR instruction maps to >1 node, only the final node will have + // flags set. + if (SDNode *Node = getNodeForIRValue(&I)) { + SDNodeFlags IncomingFlags; + IncomingFlags.copyFMF(*FPMO); + if (!Node->getFlags().isDefined()) + Node->setFlags(IncomingFlags); + else + Node->intersectFlagsWith(IncomingFlags); + } + } + if (!isa<TerminatorInst>(&I) && !HasTailCall && !isStatepoint(&I)) // statepoints handle their exports internally CopyToExportRegsIfNeeded(&I); @@ -1077,14 +1091,39 @@ void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) { } } +void SelectionDAGBuilder::dropDanglingDebugInfo(const DILocalVariable *Variable, + const DIExpression *Expr) { + auto isMatchingDbgValue = [&](DanglingDebugInfo &DDI) { + const DbgValueInst *DI = DDI.getDI(); + DIVariable *DanglingVariable = DI->getVariable(); + DIExpression *DanglingExpr = DI->getExpression(); + if (DanglingVariable == Variable && Expr->fragmentsOverlap(DanglingExpr)) { + LLVM_DEBUG(dbgs() << "Dropping dangling debug info for " << *DI << "\n"); + return true; + } + return false; + }; + + for (auto &DDIMI : DanglingDebugInfoMap) { + DanglingDebugInfoVector &DDIV = DDIMI.second; + DDIV.erase(remove_if(DDIV, isMatchingDbgValue), DDIV.end()); + } +} + // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V, // generate the debug data structures now that we've seen its definition. void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, SDValue Val) { - DanglingDebugInfo &DDI = DanglingDebugInfoMap[V]; - if (DDI.getDI()) { + auto DanglingDbgInfoIt = DanglingDebugInfoMap.find(V); + if (DanglingDbgInfoIt == DanglingDebugInfoMap.end()) + return; + + DanglingDebugInfoVector &DDIV = DanglingDbgInfoIt->second; + for (auto &DDI : DDIV) { const DbgValueInst *DI = DDI.getDI(); + assert(DI && "Ill-formed DanglingDebugInfo"); DebugLoc dl = DDI.getdl(); + unsigned ValSDNodeOrder = Val.getNode()->getIROrder(); unsigned DbgSDNodeOrder = DDI.getSDNodeOrder(); DILocalVariable *Variable = DI->getVariable(); DIExpression *Expr = DI->getExpression(); @@ -1093,13 +1132,26 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, SDDbgValue *SDV; if (Val.getNode()) { if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, false, Val)) { - SDV = getDbgValue(Val, Variable, Expr, dl, DbgSDNodeOrder); + LLVM_DEBUG(dbgs() << "Resolve dangling debug info [order=" + << DbgSDNodeOrder << "] for:\n " << *DI << "\n"); + LLVM_DEBUG(dbgs() << " By mapping to:\n "; Val.dump()); + // Increase the SDNodeOrder for the DbgValue here to make sure it is + // inserted after the definition of Val when emitting the instructions + // after ISel. An alternative could be to teach + // ScheduleDAGSDNodes::EmitSchedule to delay the insertion properly. + LLVM_DEBUG(if (ValSDNodeOrder > DbgSDNodeOrder) dbgs() + << "changing SDNodeOrder from " << DbgSDNodeOrder << " to " + << ValSDNodeOrder << "\n"); + SDV = getDbgValue(Val, Variable, Expr, dl, + std::max(DbgSDNodeOrder, ValSDNodeOrder)); DAG.AddDbgValue(SDV, Val.getNode(), false); - } + } else + LLVM_DEBUG(dbgs() << "Resolved dangling debug info for " << *DI + << "in EmitFuncArgumentDbgValue\n"); } else - DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); - DanglingDebugInfoMap[V] = DanglingDebugInfo(); + LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); } + DDIV.clear(); } /// getCopyFromRegs - If there was virtual register allocated for the value V @@ -1315,12 +1367,18 @@ void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) { auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); bool IsMSVCCXX = Pers == EHPersonality::MSVC_CXX; bool IsCoreCLR = Pers == EHPersonality::CoreCLR; + bool IsSEH = isAsynchronousEHPersonality(Pers); + bool IsWasmCXX = Pers == EHPersonality::Wasm_CXX; MachineBasicBlock *CatchPadMBB = FuncInfo.MBB; + if (!IsSEH) + CatchPadMBB->setIsEHScopeEntry(); // In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues. if (IsMSVCCXX || IsCoreCLR) CatchPadMBB->setIsEHFuncletEntry(); - - DAG.setRoot(DAG.getNode(ISD::CATCHPAD, getCurSDLoc(), MVT::Other, getControlRoot())); + // Wasm does not need catchpads anymore + if (!IsWasmCXX) + DAG.setRoot(DAG.getNode(ISD::CATCHPAD, getCurSDLoc(), MVT::Other, + getControlRoot())); } void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) { @@ -1363,7 +1421,8 @@ void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) { void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) { // Don't emit any special code for the cleanuppad instruction. It just marks - // the start of a funclet. + // the start of an EH scope/funclet. + FuncInfo.MBB->setIsEHScopeEntry(); FuncInfo.MBB->setIsEHFuncletEntry(); FuncInfo.MBB->setIsCleanupFuncletEntry(); } @@ -1385,6 +1444,7 @@ static void findUnwindDestinations( classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX; bool IsCoreCLR = Personality == EHPersonality::CoreCLR; + bool IsSEH = isAsynchronousEHPersonality(Personality); while (EHPadBB) { const Instruction *Pad = EHPadBB->getFirstNonPHI(); @@ -1397,6 +1457,7 @@ static void findUnwindDestinations( // Stop on cleanup pads. Cleanups are always funclet entries for all known // personalities. UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob); + UnwindDests.back().first->setIsEHScopeEntry(); UnwindDests.back().first->setIsEHFuncletEntry(); break; } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) { @@ -1406,6 +1467,8 @@ static void findUnwindDestinations( // For MSVC++ and the CLR, catchblocks are funclets and need prologues. if (IsMSVCCXX || IsCoreCLR) UnwindDests.back().first->setIsEHFuncletEntry(); + if (!IsSEH) + UnwindDests.back().first->setIsEHScopeEntry(); } NewEHPadBB = CatchSwitch->getUnwindDest(); } else { @@ -1653,8 +1716,7 @@ SelectionDAGBuilder::getEdgeProbability(const MachineBasicBlock *Src, if (!BPI) { // If BPI is not available, set the default probability as 1 / N, where N is // the number of successors. - auto SuccSize = std::max<uint32_t>( - std::distance(succ_begin(SrcBB), succ_end(SrcBB)), 1); + auto SuccSize = std::max<uint32_t>(succ_size(SrcBB), 1); return BranchProbability(1, SuccSize); } return BPI->getEdgeProbability(SrcBB, DstBB); @@ -2489,8 +2551,8 @@ void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) { assert(CC.Low == CC.High && "Input clusters must be single-case"); #endif - std::sort(Clusters.begin(), Clusters.end(), - [](const CaseCluster &a, const CaseCluster &b) { + llvm::sort(Clusters.begin(), Clusters.end(), + [](const CaseCluster &a, const CaseCluster &b) { return a.Low->getValue().slt(b.Low->getValue()); }); @@ -2551,9 +2613,23 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { } void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) { - if (DAG.getTarget().Options.TrapUnreachable) - DAG.setRoot( - DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot())); + if (!DAG.getTarget().Options.TrapUnreachable) + return; + + // We may be able to ignore unreachable behind a noreturn call. + if (DAG.getTarget().Options.NoTrapAfterNoreturn) { + const BasicBlock &BB = *I.getParent(); + if (&I != &BB.front()) { + BasicBlock::const_iterator PredI = + std::prev(BasicBlock::const_iterator(&I)); + if (const CallInst *Call = dyn_cast<CallInst>(&*PredI)) { + if (Call->doesNotReturn()) + return; + } + } + } + + DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot())); } void SelectionDAGBuilder::visitFSub(const User &I) { @@ -2597,6 +2673,10 @@ static bool isVectorReductionOp(const User *I) { } unsigned ElemNum = Inst->getType()->getVectorNumElements(); + // Ensure the reduction size is a power of 2. + if (!isPowerOf2_32(ElemNum)) + return false; + unsigned ElemNumToReduce = ElemNum; // Do DFS search on the def-use chain from the given instruction. We only @@ -2682,7 +2762,7 @@ static bool isVectorReductionOp(const User *I) { return false; const ConstantInt *Val = dyn_cast<ConstantInt>(U->getOperand(1)); - if (!Val || Val->getZExtValue() != 0) + if (!Val || !Val->isZero()) return false; ReduxExtracted = true; @@ -2693,45 +2773,23 @@ static bool isVectorReductionOp(const User *I) { return ReduxExtracted; } -void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { - SDValue Op1 = getValue(I.getOperand(0)); - SDValue Op2 = getValue(I.getOperand(1)); - - bool nuw = false; - bool nsw = false; - bool exact = false; - bool vec_redux = false; - FastMathFlags FMF; - - if (const OverflowingBinaryOperator *OFBinOp = - dyn_cast<const OverflowingBinaryOperator>(&I)) { - nuw = OFBinOp->hasNoUnsignedWrap(); - nsw = OFBinOp->hasNoSignedWrap(); +void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) { + SDNodeFlags Flags; + if (auto *OFBinOp = dyn_cast<OverflowingBinaryOperator>(&I)) { + Flags.setNoSignedWrap(OFBinOp->hasNoSignedWrap()); + Flags.setNoUnsignedWrap(OFBinOp->hasNoUnsignedWrap()); + } + if (auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I)) { + Flags.setExact(ExactOp->isExact()); } - if (const PossiblyExactOperator *ExactOp = - dyn_cast<const PossiblyExactOperator>(&I)) - exact = ExactOp->isExact(); - if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(&I)) - FMF = FPOp->getFastMathFlags(); - if (isVectorReductionOp(&I)) { - vec_redux = true; - DEBUG(dbgs() << "Detected a reduction operation:" << I << "\n"); + Flags.setVectorReduction(true); + LLVM_DEBUG(dbgs() << "Detected a reduction operation:" << I << "\n"); } - SDNodeFlags Flags; - Flags.setExact(exact); - Flags.setNoSignedWrap(nsw); - Flags.setNoUnsignedWrap(nuw); - Flags.setVectorReduction(vec_redux); - Flags.setAllowReciprocal(FMF.allowReciprocal()); - Flags.setAllowContract(FMF.allowContract()); - Flags.setNoInfs(FMF.noInfs()); - Flags.setNoNaNs(FMF.noNaNs()); - Flags.setNoSignedZeros(FMF.noSignedZeros()); - Flags.setUnsafeAlgebra(FMF.isFast()); - - SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(), + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + SDValue BinNodeValue = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2, Flags); setValue(&I, BinNodeValue); } @@ -2823,13 +2881,12 @@ void SelectionDAGBuilder::visitFCmp(const User &I) { predicate = FCmpInst::Predicate(FC->getPredicate()); SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); - ISD::CondCode Condition = getFCmpCondCode(predicate); - // FIXME: Fcmp instructions have fast-math-flags in IR, so we should use them. - // FIXME: We should propagate the fast-math-flags to the DAG node itself for - // further optimization, but currently FMF is only applicable to binary nodes. - if (TM.Options.NoNaNsFPMath) + ISD::CondCode Condition = getFCmpCondCode(predicate); + auto *FPMO = dyn_cast<FPMathOperator>(&I); + if ((FPMO && FPMO->hasNoNaNs()) || TM.Options.NoNaNsFPMath) Condition = getFCmpCodeWithoutNaN(Condition); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), I.getType()); setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition)); @@ -3424,10 +3481,9 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { DAG.getConstant(Offset, dl, N.getValueType()), Flags); } } else { - MVT PtrTy = - DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout(), AS); - unsigned PtrSize = PtrTy.getSizeInBits(); - APInt ElementSize(PtrSize, DL->getTypeAllocSize(GTI.getIndexedType())); + unsigned IdxSize = DAG.getDataLayout().getIndexSizeInBits(AS); + MVT IdxTy = MVT::getIntegerVT(IdxSize); + APInt ElementSize(IdxSize, DL->getTypeAllocSize(GTI.getIndexedType())); // If this is a scalar constant or a splat vector of constants, // handle it quickly. @@ -3439,11 +3495,11 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (CI) { if (CI->isZero()) continue; - APInt Offs = ElementSize * CI->getValue().sextOrTrunc(PtrSize); + APInt Offs = ElementSize * CI->getValue().sextOrTrunc(IdxSize); LLVMContext &Context = *DAG.getContext(); SDValue OffsVal = VectorWidth ? - DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, PtrTy, VectorWidth)) : - DAG.getConstant(Offs, dl, PtrTy); + DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorWidth)) : + DAG.getConstant(Offs, dl, IdxTy); // In an inbouds GEP with an offset that is nonnegative even when // interpreted as signed, assume there is no unsigned overflow. @@ -3867,7 +3923,7 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, // extract the splat value and use it as a uniform base. // In all other cases the function returns 'false'. static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index, - SelectionDAGBuilder* SDB) { + SDValue &Scale, SelectionDAGBuilder* SDB) { SelectionDAG& DAG = SDB->DAG; LLVMContext &Context = *DAG.getContext(); @@ -3897,6 +3953,10 @@ static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index, if (!SDB->findValue(Ptr) || !SDB->findValue(IndexVal)) return false; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const DataLayout &DL = DAG.getDataLayout(); + Scale = DAG.getTargetConstant(DL.getTypeAllocSize(GEP->getResultElementType()), + SDB->getCurSDLoc(), TLI.getPointerTy(DL)); Base = SDB->getValue(Ptr); Index = SDB->getValue(IndexVal); @@ -3926,8 +3986,9 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { SDValue Base; SDValue Index; + SDValue Scale; const Value *BasePtr = Ptr; - bool UniformBase = getUniformBase(BasePtr, Base, Index, this); + bool UniformBase = getUniformBase(BasePtr, Base, Index, Scale, this); const Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr; MachineMemOperand *MMO = DAG.getMachineFunction(). @@ -3935,10 +3996,11 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { MachineMemOperand::MOStore, VT.getStoreSize(), Alignment, AAInfo); if (!UniformBase) { - Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); + Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(Ptr); + Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout())); } - SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index }; + SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index, Scale }; SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl, Ops, MMO); DAG.setRoot(Scatter); @@ -3997,10 +4059,8 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO, ISD::NON_EXTLOAD, IsExpanding); - if (AddToChain) { - SDValue OutChain = Load.getValue(1); - DAG.setRoot(OutChain); - } + if (AddToChain) + PendingLoads.push_back(Load.getValue(1)); setValue(&I, Load); } @@ -4025,8 +4085,9 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { SDValue Root = DAG.getRoot(); SDValue Base; SDValue Index; + SDValue Scale; const Value *BasePtr = Ptr; - bool UniformBase = getUniformBase(BasePtr, Base, Index, this); + bool UniformBase = getUniformBase(BasePtr, Base, Index, Scale, this); bool ConstantMemory = false; if (UniformBase && AA && AA->pointsToConstantMemory(MemoryLocation( @@ -4044,10 +4105,11 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { Alignment, AAInfo, Ranges); if (!UniformBase) { - Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); + Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(Ptr); + Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout())); } - SDValue Ops[] = { Root, Src0, Mask, Base, Index }; + SDValue Ops[] = { Root, Src0, Mask, Base, Index, Scale }; SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl, Ops, MMO); @@ -4868,26 +4930,18 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( const auto &TLI = DAG.getTargetLoweringInfo(); RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second, V->getType(), isABIRegCopy(V)); - unsigned NumRegs = - std::accumulate(RFV.RegCount.begin(), RFV.RegCount.end(), 0); - if (NumRegs > 1) { - unsigned I = 0; + if (RFV.occupiesMultipleRegs()) { unsigned Offset = 0; - auto RegisterVT = RFV.RegVTs.begin(); - for (auto RegCount : RFV.RegCount) { - unsigned RegisterSize = (RegisterVT++)->getSizeInBits(); - for (unsigned E = I + RegCount; I != E; ++I) { - // The vregs are guaranteed to be allocated in sequence. - Op = MachineOperand::CreateReg(VMI->second + I, false); - auto FragmentExpr = DIExpression::createFragmentExpression( - Expr, Offset, RegisterSize); - if (!FragmentExpr) - continue; - FuncInfo.ArgDbgValues.push_back( - BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsDbgDeclare, - Op->getReg(), Variable, *FragmentExpr)); - Offset += RegisterSize; - } + for (auto RegAndSize : RFV.getRegsAndSizes()) { + Op = MachineOperand::CreateReg(RegAndSize.first, false); + auto FragmentExpr = DIExpression::createFragmentExpression( + Expr, Offset, RegAndSize.second); + if (!FragmentExpr) + continue; + FuncInfo.ArgDbgValues.push_back( + BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsDbgDeclare, + Op->getReg(), Variable, *FragmentExpr)); + Offset += RegAndSize.second; } return true; } @@ -4901,17 +4955,10 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( assert(Variable->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); - if (Op->isReg()) - FuncInfo.ArgDbgValues.push_back( - BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect, - Op->getReg(), Variable, Expr)); - else - FuncInfo.ArgDbgValues.push_back( - BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE)) - .add(*Op) - .addImm(0) - .addMetadata(Variable) - .addMetadata(Expr)); + IsIndirect = (Op->isReg()) ? IsIndirect : true; + FuncInfo.ArgDbgValues.push_back( + BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect, + *Op, Variable, Expr)); return true; } @@ -4924,13 +4971,20 @@ SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N, unsigned DbgSDNodeOrder) { if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) { // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe - // stack slot locations as such instead of as indirectly addressed - // locations. - return DAG.getFrameIndexDbgValue(Variable, Expr, FISDN->getIndex(), dl, - DbgSDNodeOrder); + // stack slot locations. + // + // Consider "int x = 0; int *px = &x;". There are two kinds of interesting + // debug values here after optimization: + // + // dbg.value(i32* %px, !"int *px", !DIExpression()), and + // dbg.value(i32* %px, !"int x", !DIExpression(DW_OP_deref)) + // + // Both describe the direct values of their associated variables. + return DAG.getFrameIndexDbgValue(Variable, Expr, FISDN->getIndex(), + /*IsIndirect*/ false, dl, DbgSDNodeOrder); } - return DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(), false, dl, - DbgSDNodeOrder); + return DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(), + /*IsIndirect*/ false, dl, DbgSDNodeOrder); } // VisualStudio defines setjmp as _setjmp @@ -5000,14 +5054,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::longjmp: return &"_longjmp"[!TLI.usesUnderscoreLongJmp()]; case Intrinsic::memcpy: { + const auto &MCI = cast<MemCpyInst>(I); SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); - unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); - if (!Align) - Align = 1; // @llvm.memcpy defines 0 and 1 to both mean no alignment. - bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); + // @llvm.memcpy defines 0 and 1 to both mean no alignment. + unsigned DstAlign = std::max<unsigned>(MCI.getDestAlignment(), 1); + unsigned SrcAlign = std::max<unsigned>(MCI.getSourceAlignment(), 1); + unsigned Align = MinAlign(DstAlign, SrcAlign); + bool isVol = MCI.isVolatile(); bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + // FIXME: Support passing different dest/src alignments to the memcpy DAG + // node. SDValue MC = DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, false, isTC, MachinePointerInfo(I.getArgOperand(0)), @@ -5016,13 +5074,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } case Intrinsic::memset: { + const auto &MSI = cast<MemSetInst>(I); SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); - unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); - if (!Align) - Align = 1; // @llvm.memset defines 0 and 1 to both mean no alignment. - bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); + // @llvm.memset defines 0 and 1 to both mean no alignment. + unsigned Align = std::max<unsigned>(MSI.getDestAlignment(), 1); + bool isVol = MSI.isVolatile(); bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); SDValue MS = DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, isTC, MachinePointerInfo(I.getArgOperand(0))); @@ -5030,14 +5088,18 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } case Intrinsic::memmove: { + const auto &MMI = cast<MemMoveInst>(I); SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); - unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); - if (!Align) - Align = 1; // @llvm.memmove defines 0 and 1 to both mean no alignment. - bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); + // @llvm.memmove defines 0 and 1 to both mean no alignment. + unsigned DstAlign = std::max<unsigned>(MMI.getDestAlignment(), 1); + unsigned SrcAlign = std::max<unsigned>(MMI.getSourceAlignment(), 1); + unsigned Align = MinAlign(DstAlign, SrcAlign); + bool isVol = MMI.isVolatile(); bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + // FIXME: Support passing different dest/src alignments to the memmove DAG + // node. SDValue MM = DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, isTC, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1))); @@ -5050,36 +5112,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Src = getValue(MI.getRawSource()); SDValue Length = getValue(MI.getLength()); - // Emit a library call. - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); - Entry.Node = Dst; - Args.push_back(Entry); - - Entry.Node = Src; - Args.push_back(Entry); - - Entry.Ty = MI.getLength()->getType(); - Entry.Node = Length; - Args.push_back(Entry); - - uint64_t ElementSizeConstant = MI.getElementSizeInBytes(); - RTLIB::Libcall LibraryCall = - RTLIB::getMEMCPY_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant); - if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) - report_fatal_error("Unsupported element size"); - - TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee( - TLI.getLibcallCallingConv(LibraryCall), - Type::getVoidTy(*DAG.getContext()), - DAG.getExternalSymbol(TLI.getLibcallName(LibraryCall), - TLI.getPointerTy(DAG.getDataLayout())), - std::move(Args)); - - std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); - DAG.setRoot(CallResult.second); + unsigned DstAlign = MI.getDestAlignment(); + unsigned SrcAlign = MI.getSourceAlignment(); + Type *LengthTy = MI.getLength()->getType(); + unsigned ElemSz = MI.getElementSizeInBytes(); + bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + SDValue MC = DAG.getAtomicMemcpy(getRoot(), sdl, Dst, DstAlign, Src, + SrcAlign, Length, LengthTy, ElemSz, isTC, + MachinePointerInfo(MI.getRawDest()), + MachinePointerInfo(MI.getRawSource())); + updateDAGForMaybeTailCall(MC); return nullptr; } case Intrinsic::memmove_element_unordered_atomic: { @@ -5088,36 +5130,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Src = getValue(MI.getRawSource()); SDValue Length = getValue(MI.getLength()); - // Emit a library call. - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); - Entry.Node = Dst; - Args.push_back(Entry); - - Entry.Node = Src; - Args.push_back(Entry); - - Entry.Ty = MI.getLength()->getType(); - Entry.Node = Length; - Args.push_back(Entry); - - uint64_t ElementSizeConstant = MI.getElementSizeInBytes(); - RTLIB::Libcall LibraryCall = - RTLIB::getMEMMOVE_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant); - if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) - report_fatal_error("Unsupported element size"); - - TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee( - TLI.getLibcallCallingConv(LibraryCall), - Type::getVoidTy(*DAG.getContext()), - DAG.getExternalSymbol(TLI.getLibcallName(LibraryCall), - TLI.getPointerTy(DAG.getDataLayout())), - std::move(Args)); - - std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); - DAG.setRoot(CallResult.second); + unsigned DstAlign = MI.getDestAlignment(); + unsigned SrcAlign = MI.getSourceAlignment(); + Type *LengthTy = MI.getLength()->getType(); + unsigned ElemSz = MI.getElementSizeInBytes(); + bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + SDValue MC = DAG.getAtomicMemmove(getRoot(), sdl, Dst, DstAlign, Src, + SrcAlign, Length, LengthTy, ElemSz, isTC, + MachinePointerInfo(MI.getRawDest()), + MachinePointerInfo(MI.getRawSource())); + updateDAGForMaybeTailCall(MC); return nullptr; } case Intrinsic::memset_element_unordered_atomic: { @@ -5126,37 +5148,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Val = getValue(MI.getValue()); SDValue Length = getValue(MI.getLength()); - // Emit a library call. - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); - Entry.Node = Dst; - Args.push_back(Entry); - - Entry.Ty = Type::getInt8Ty(*DAG.getContext()); - Entry.Node = Val; - Args.push_back(Entry); - - Entry.Ty = MI.getLength()->getType(); - Entry.Node = Length; - Args.push_back(Entry); - - uint64_t ElementSizeConstant = MI.getElementSizeInBytes(); - RTLIB::Libcall LibraryCall = - RTLIB::getMEMSET_ELEMENT_UNORDERED_ATOMIC(ElementSizeConstant); - if (LibraryCall == RTLIB::UNKNOWN_LIBCALL) - report_fatal_error("Unsupported element size"); - - TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(sdl).setChain(getRoot()).setLibCallee( - TLI.getLibcallCallingConv(LibraryCall), - Type::getVoidTy(*DAG.getContext()), - DAG.getExternalSymbol(TLI.getLibcallName(LibraryCall), - TLI.getPointerTy(DAG.getDataLayout())), - std::move(Args)); - - std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); - DAG.setRoot(CallResult.second); + unsigned DstAlign = MI.getDestAlignment(); + Type *LengthTy = MI.getLength()->getType(); + unsigned ElemSz = MI.getElementSizeInBytes(); + bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + SDValue MC = DAG.getAtomicMemset(getRoot(), sdl, Dst, DstAlign, Val, Length, + LengthTy, ElemSz, isTC, + MachinePointerInfo(MI.getRawDest())); + updateDAGForMaybeTailCall(MC); return nullptr; } case Intrinsic::dbg_addr: @@ -5164,13 +5163,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { const DbgInfoIntrinsic &DI = cast<DbgInfoIntrinsic>(I); DILocalVariable *Variable = DI.getVariable(); DIExpression *Expression = DI.getExpression(); + dropDanglingDebugInfo(Variable, Expression); assert(Variable && "Missing variable"); // Check if address has undef value. const Value *Address = DI.getVariableLocation(); if (!Address || isa<UndefValue>(Address) || (Address->use_empty() && !isa<Argument>(Address))) { - DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); + LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); return nullptr; } @@ -5195,10 +5195,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // DBG_VALUE instructions. llvm.dbg.declare is handled as a frame index in // the MachineFunction variable table. if (FI != std::numeric_limits<int>::max()) { - if (Intrinsic == Intrinsic::dbg_addr) - DAG.AddDbgValue(DAG.getFrameIndexDbgValue(Variable, Expression, FI, dl, - SDNodeOrder), - getRoot().getNode(), isParameter); + if (Intrinsic == Intrinsic::dbg_addr) { + SDDbgValue *SDV = DAG.getFrameIndexDbgValue( + Variable, Expression, FI, /*IsIndirect*/ true, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, getRoot().getNode(), isParameter); + } return nullptr; } @@ -5214,8 +5215,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { auto FINode = dyn_cast<FrameIndexSDNode>(N.getNode()); if (isParameter && FINode) { // Byval parameter. We have a frame index at this point. - SDV = DAG.getFrameIndexDbgValue(Variable, Expression, - FINode->getIndex(), dl, SDNodeOrder); + SDV = + DAG.getFrameIndexDbgValue(Variable, Expression, FINode->getIndex(), + /*IsIndirect*/ true, dl, SDNodeOrder); } else if (isa<Argument>(Address)) { // Address is an argument, so try to emit its dbg value using // virtual register info from the FuncInfo.ValueMap. @@ -5231,17 +5233,28 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // virtual register info from the FuncInfo.ValueMap. if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true, N)) { - DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); + LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); } } return nullptr; } + case Intrinsic::dbg_label: { + const DbgLabelInst &DI = cast<DbgLabelInst>(I); + DILabel *Label = DI.getLabel(); + assert(Label && "Missing label"); + + SDDbgLabel *SDV; + SDV = DAG.getDbgLabel(Label, dl, SDNodeOrder); + DAG.AddDbgLabel(SDV); + return nullptr; + } case Intrinsic::dbg_value: { const DbgValueInst &DI = cast<DbgValueInst>(I); assert(DI.getVariable() && "Missing variable"); DILocalVariable *Variable = DI.getVariable(); DIExpression *Expression = DI.getExpression(); + dropDanglingDebugInfo(Variable, Expression); const Value *V = DI.getValue(); if (!V) return nullptr; @@ -5266,16 +5279,64 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return nullptr; } + // PHI nodes have already been selected, so we should know which VReg that + // is assigns to already. + if (isa<PHINode>(V)) { + auto VMI = FuncInfo.ValueMap.find(V); + if (VMI != FuncInfo.ValueMap.end()) { + unsigned Reg = VMI->second; + // The PHI node may be split up into several MI PHI nodes (in + // FunctionLoweringInfo::set). + RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, + V->getType(), false); + if (RFV.occupiesMultipleRegs()) { + unsigned Offset = 0; + unsigned BitsToDescribe = 0; + if (auto VarSize = Variable->getSizeInBits()) + BitsToDescribe = *VarSize; + if (auto Fragment = Expression->getFragmentInfo()) + BitsToDescribe = Fragment->SizeInBits; + for (auto RegAndSize : RFV.getRegsAndSizes()) { + unsigned RegisterSize = RegAndSize.second; + // Bail out if all bits are described already. + if (Offset >= BitsToDescribe) + break; + unsigned FragmentSize = (Offset + RegisterSize > BitsToDescribe) + ? BitsToDescribe - Offset + : RegisterSize; + auto FragmentExpr = DIExpression::createFragmentExpression( + Expression, Offset, FragmentSize); + if (!FragmentExpr) + continue; + SDV = DAG.getVRegDbgValue(Variable, *FragmentExpr, RegAndSize.first, + false, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, nullptr, false); + Offset += RegisterSize; + } + } else { + SDV = DAG.getVRegDbgValue(Variable, Expression, Reg, false, dl, + SDNodeOrder); + DAG.AddDbgValue(SDV, nullptr, false); + } + return nullptr; + } + } + + // TODO: When we get here we will either drop the dbg.value completely, or + // we try to move it forward by letting it dangle for awhile. So we should + // probably add an extra DbgValue to the DAG here, with a reference to + // "noreg", to indicate that we have lost the debug location for the + // variable. + if (!V->use_empty() ) { // Do not call getValue(V) yet, as we don't want to generate code. // Remember it for later. - DanglingDebugInfo DDI(&DI, dl, SDNodeOrder); - DanglingDebugInfoMap[V] = DDI; + DanglingDebugInfoMap[V].emplace_back(&DI, dl, SDNodeOrder); return nullptr; } - DEBUG(dbgs() << "Dropping debug location info for:\n " << DI << "\n"); - DEBUG(dbgs() << " Last seen at:\n " << *V << "\n"); + LLVM_DEBUG(dbgs() << "Dropping debug location info for:\n " << DI << "\n"); + LLVM_DEBUG(dbgs() << " Last seen at:\n " << *V << "\n"); return nullptr; } @@ -5609,6 +5670,52 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { setValue(&I, DAG.getNode(ISD::CTPOP, sdl, Ty, Arg)); return nullptr; } + case Intrinsic::fshl: + case Intrinsic::fshr: { + bool IsFSHL = Intrinsic == Intrinsic::fshl; + SDValue X = getValue(I.getArgOperand(0)); + SDValue Y = getValue(I.getArgOperand(1)); + SDValue Z = getValue(I.getArgOperand(2)); + EVT VT = X.getValueType(); + + // When X == Y, this is rotate. Create the node directly if legal. + // TODO: This should also be done if the operation is custom, but we have + // to make sure targets are handling the modulo shift amount as expected. + // TODO: If the rotate direction (left or right) corresponding to the shift + // is not available, adjust the shift value and invert the direction. + auto RotateOpcode = IsFSHL ? ISD::ROTL : ISD::ROTR; + if (X == Y && TLI.isOperationLegal(RotateOpcode, VT)) { + setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, Z)); + return nullptr; + } + + // Get the shift amount and inverse shift amount, modulo the bit-width. + SDValue BitWidthC = DAG.getConstant(VT.getScalarSizeInBits(), sdl, VT); + SDValue ShAmt = DAG.getNode(ISD::UREM, sdl, VT, Z, BitWidthC); + SDValue NegZ = DAG.getNode(ISD::SUB, sdl, VT, BitWidthC, Z); + SDValue InvShAmt = DAG.getNode(ISD::UREM, sdl, VT, NegZ, BitWidthC); + + // fshl: (X << (Z % BW)) | (Y >> ((BW - Z) % BW)) + // fshr: (X << ((BW - Z) % BW)) | (Y >> (Z % BW)) + SDValue ShX = DAG.getNode(ISD::SHL, sdl, VT, X, IsFSHL ? ShAmt : InvShAmt); + SDValue ShY = DAG.getNode(ISD::SRL, sdl, VT, Y, IsFSHL ? InvShAmt : ShAmt); + SDValue Res = DAG.getNode(ISD::OR, sdl, VT, ShX, ShY); + + // If (Z % BW == 0), then (BW - Z) % BW is also zero, so the result would + // be X | Y. If X == Y (rotate), that's fine. If not, we have to select. + if (X != Y) { + SDValue Zero = DAG.getConstant(0, sdl, VT); + EVT CCVT = MVT::i1; + if (VT.isVector()) + CCVT = EVT::getVectorVT(*Context, CCVT, VT.getVectorNumElements()); + // For fshl, 0 shift returns the 1st arg (X). + // For fshr, 0 shift returns the 2nd arg (Y). + SDValue IsZeroShift = DAG.getSetCC(sdl, CCVT, ShAmt, Zero, ISD::SETEQ); + Res = DAG.getSelect(sdl, VT, IsZeroShift, IsFSHL ? X : Y, Res); + } + setValue(&I, Res); + return nullptr; + } case Intrinsic::stacksave: { SDValue Op = getRoot(); Res = DAG.getNode( @@ -5703,7 +5810,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::annotation: case Intrinsic::ptr_annotation: - case Intrinsic::invariant_group_barrier: + case Intrinsic::launder_invariant_group: + case Intrinsic::strip_invariant_group: // Drop the intrinsic, but forward the value setValue(&I, getValue(I.getOperand(0))); return nullptr; @@ -5822,17 +5930,23 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Ops[5]; unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); auto Flags = rw == 0 ? MachineMemOperand::MOLoad :MachineMemOperand::MOStore; - Ops[0] = getRoot(); + Ops[0] = DAG.getRoot(); Ops[1] = getValue(I.getArgOperand(0)); Ops[2] = getValue(I.getArgOperand(1)); Ops[3] = getValue(I.getArgOperand(2)); Ops[4] = getValue(I.getArgOperand(3)); - DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, sdl, - DAG.getVTList(MVT::Other), Ops, - EVT::getIntegerVT(*Context, 8), - MachinePointerInfo(I.getArgOperand(0)), - 0, /* align */ - Flags)); + SDValue Result = DAG.getMemIntrinsicNode(ISD::PREFETCH, sdl, + DAG.getVTList(MVT::Other), Ops, + EVT::getIntegerVT(*Context, 8), + MachinePointerInfo(I.getArgOperand(0)), + 0, /* align */ + Flags); + + // Chain the prefetch in parallell with any pending loads, to stay out of + // the way of later optimizations. + PendingLoads.push_back(Result); + Result = getRoot(); + DAG.setRoot(Result); return nullptr; } case Intrinsic::lifetime_start: @@ -6004,6 +6118,41 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { setValue(&I, patchableNode); return nullptr; } + case Intrinsic::xray_typedevent: { + // Here we want to make sure that the intrinsic behaves as if it has a + // specific calling convention, and only for x86_64. + // FIXME: Support other platforms later. + const auto &Triple = DAG.getTarget().getTargetTriple(); + if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux()) + return nullptr; + + SDLoc DL = getCurSDLoc(); + SmallVector<SDValue, 8> Ops; + + // We want to say that we always want the arguments in registers. + // It's unclear to me how manipulating the selection DAG here forces callers + // to provide arguments in registers instead of on the stack. + SDValue LogTypeId = getValue(I.getArgOperand(0)); + SDValue LogEntryVal = getValue(I.getArgOperand(1)); + SDValue StrSizeVal = getValue(I.getArgOperand(2)); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue Chain = getRoot(); + Ops.push_back(LogTypeId); + Ops.push_back(LogEntryVal); + Ops.push_back(StrSizeVal); + Ops.push_back(Chain); + + // We need to enforce the calling convention for the callsite, so that + // argument ordering is enforced correctly, and that register allocation can + // see that some registers may be assumed clobbered and have to preserve + // them across calls to the intrinsic. + MachineSDNode *MN = DAG.getMachineNode( + TargetOpcode::PATCHABLE_TYPED_EVENT_CALL, DL, NodeTys, Ops); + SDValue patchableNode = SDValue(MN, 0); + DAG.setRoot(patchableNode); + setValue(&I, patchableNode); + return nullptr; + } case Intrinsic::experimental_deoptimize: LowerDeoptimizeCall(&I); return nullptr; @@ -6023,6 +6172,66 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::experimental_vector_reduce_fmin: visitVectorReduce(I, Intrinsic); return nullptr; + + case Intrinsic::icall_branch_funnel: { + SmallVector<SDValue, 16> Ops; + Ops.push_back(DAG.getRoot()); + Ops.push_back(getValue(I.getArgOperand(0))); + + int64_t Offset; + auto *Base = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset( + I.getArgOperand(1), Offset, DAG.getDataLayout())); + if (!Base) + report_fatal_error( + "llvm.icall.branch.funnel operand must be a GlobalValue"); + Ops.push_back(DAG.getTargetGlobalAddress(Base, getCurSDLoc(), MVT::i64, 0)); + + struct BranchFunnelTarget { + int64_t Offset; + SDValue Target; + }; + SmallVector<BranchFunnelTarget, 8> Targets; + + for (unsigned Op = 1, N = I.getNumArgOperands(); Op != N; Op += 2) { + auto *ElemBase = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset( + I.getArgOperand(Op), Offset, DAG.getDataLayout())); + if (ElemBase != Base) + report_fatal_error("all llvm.icall.branch.funnel operands must refer " + "to the same GlobalValue"); + + SDValue Val = getValue(I.getArgOperand(Op + 1)); + auto *GA = dyn_cast<GlobalAddressSDNode>(Val); + if (!GA) + report_fatal_error( + "llvm.icall.branch.funnel operand must be a GlobalValue"); + Targets.push_back({Offset, DAG.getTargetGlobalAddress( + GA->getGlobal(), getCurSDLoc(), + Val.getValueType(), GA->getOffset())}); + } + llvm::sort(Targets.begin(), Targets.end(), + [](const BranchFunnelTarget &T1, const BranchFunnelTarget &T2) { + return T1.Offset < T2.Offset; + }); + + for (auto &T : Targets) { + Ops.push_back(DAG.getTargetConstant(T.Offset, getCurSDLoc(), MVT::i32)); + Ops.push_back(T.Target); + } + + SDValue N(DAG.getMachineNode(TargetOpcode::ICALL_BRANCH_FUNNEL, + getCurSDLoc(), MVT::Other, Ops), + 0); + DAG.setRoot(N); + setValue(&I, N); + HasTailCall = true; + return nullptr; + } + + case Intrinsic::wasm_landingpad_index: { + // TODO store landing pad index in a map, which will be used when generating + // LSDA information + return nullptr; + } } } @@ -6172,7 +6381,10 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel)); // Inform MachineModuleInfo of range. - if (MF.hasEHFunclets()) { + auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); + // There is a platform (e.g. wasm) that uses funclet style IR but does not + // actually use outlined funclets and their LSDA info style. + if (MF.hasEHFunclets() && isFuncletEHPersonality(Pers)) { assert(CLI.CS); WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo(); EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CS.getInstruction()), @@ -6630,14 +6842,13 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { const char *RenameFn = nullptr; if (Function *F = I.getCalledFunction()) { if (F->isDeclaration()) { - if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo()) { - if (unsigned IID = II->getIntrinsicID(F)) { - RenameFn = visitIntrinsicCall(I, IID); - if (!RenameFn) - return; - } - } - if (Intrinsic::ID IID = F->getIntrinsicID()) { + // Is this an LLVM intrinsic or a target-specific intrinsic? + unsigned IID = F->getIntrinsicID(); + if (!IID) + if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo()) + IID = II->getIntrinsicID(F); + + if (IID) { RenameFn = visitIntrinsicCall(I, IID); if (!RenameFn) return; @@ -6989,27 +7200,37 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI, unsigned NumRegs = 1; if (OpInfo.ConstraintVT != MVT::Other) { - // If this is a FP input in an integer register (or visa versa) insert a bit - // cast of the input value. More generally, handle any case where the input - // value disagrees with the register class we plan to stick this in. - if (OpInfo.Type == InlineAsm::isInput && PhysReg.second && + // If this is a FP operand in an integer register (or visa versa), or more + // generally if the operand value disagrees with the register class we plan + // to stick it in, fix the operand type. + // + // If this is an input value, the bitcast to the new type is done now. + // Bitcast for output value is done at the end of visitInlineAsm(). + if ((OpInfo.Type == InlineAsm::isOutput || + OpInfo.Type == InlineAsm::isInput) && + PhysReg.second && !TRI.isTypeLegalForClass(*PhysReg.second, OpInfo.ConstraintVT)) { // Try to convert to the first EVT that the reg class contains. If the // types are identical size, use a bitcast to convert (e.g. two differing - // vector types). + // vector types). Note: output bitcast is done at the end of + // visitInlineAsm(). MVT RegVT = *TRI.legalclasstypes_begin(*PhysReg.second); - if (RegVT.getSizeInBits() == OpInfo.CallOperand.getValueSizeInBits()) { - OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL, - RegVT, OpInfo.CallOperand); + if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) { + // Exclude indirect inputs while they are unsupported because the code + // to perform the load is missing and thus OpInfo.CallOperand still + // refer to the input address rather than the pointed-to value. + if (OpInfo.Type == InlineAsm::isInput && !OpInfo.isIndirect) + OpInfo.CallOperand = + DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand); OpInfo.ConstraintVT = RegVT; + // If the operand is a FP value and we want it in integer registers, + // use the corresponding integer type. This turns an f64 value into + // i64, which can be passed with two i32 values on a 32-bit machine. } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) { - // If the input is a FP value and we want it in FP registers, do a - // bitcast to the corresponding integer type. This turns an f64 value - // into i64, which can be passed with two i32 values on a 32-bit - // machine. RegVT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits()); - OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL, - RegVT, OpInfo.CallOperand); + if (OpInfo.Type == InlineAsm::isInput) + OpInfo.CallOperand = + DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand); OpInfo.ConstraintVT = RegVT; } } @@ -7246,7 +7467,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { continue; // If this is a memory input, and if the operand is not indirect, do what we - // need to to provide an address for the memory input. + // need to provide an address for the memory input. if (OpInfo.ConstraintType == TargetLowering::C_Memory && !OpInfo.isIndirect) { assert((OpInfo.isMultipleAlternative || @@ -7521,12 +7742,18 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) { EVT ResultType = TLI.getValueType(DAG.getDataLayout(), CS.getType()); - // If any of the results of the inline asm is a vector, it may have the - // wrong width/num elts. This can happen for register classes that can - // contain multiple different value types. The preg or vreg allocated may - // not have the same VT as was expected. Convert it to the right type - // with bit_convert. - if (ResultType != Val.getValueType() && Val.getValueType().isVector()) { + // If the type of the inline asm call site return value is different but + // has same size as the type of the asm output bitcast it. One example + // of this is for vectors with different width / number of elements. + // This can happen for register classes that can contain multiple + // different value types. The preg or vreg allocated may not have the + // same VT as was expected. + // + // This can also happen for a return value that disagrees with the + // register class it is put in, eg. a double in a general-purpose + // register on a 32-bit machine. + if (ResultType != Val.getValueType() && + ResultType.getSizeInBits() == Val.getValueSizeInBits()) { Val = DAG.getNode(ISD::BITCAST, getCurSDLoc(), ResultType, Val); @@ -7581,8 +7808,17 @@ void SelectionDAGBuilder::emitInlineAsmError(ImmutableCallSite CS, // Make sure we leave the DAG in a valid state const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - auto VT = TLI.getValueType(DAG.getDataLayout(), CS.getType()); - setValue(CS.getInstruction(), DAG.getUNDEF(VT)); + SmallVector<EVT, 1> ValueVTs; + ComputeValueVTs(TLI, DAG.getDataLayout(), CS->getType(), ValueVTs); + + if (ValueVTs.empty()) + return; + + SmallVector<SDValue, 1> Ops; + for (unsigned i = 0, e = ValueVTs.size(); i != e; ++i) + Ops.push_back(DAG.getUNDEF(ValueVTs[i])); + + setValue(CS.getInstruction(), DAG.getMergeValues(Ops, getCurSDLoc())); } void SelectionDAGBuilder::visitVAStart(const CallInst &I) { @@ -7656,7 +7892,7 @@ SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG, return DAG.getMergeValues(Ops, SL); } -/// \brief Populate a CallLowerinInfo (into \p CLI) based on the properties of +/// Populate a CallLowerinInfo (into \p CLI) based on the properties of /// the call being lowered. /// /// This is a helper for lowering intrinsics that follow a target calling @@ -7680,7 +7916,7 @@ void SelectionDAGBuilder::populateCallLoweringInfo( TargetLowering::ArgListEntry Entry; Entry.Node = getValue(V); Entry.Ty = V->getType(); - Entry.setAttributes(&CS, ArgIdx); + Entry.setAttributes(&CS, ArgI); Args.push_back(Entry); } @@ -7691,7 +7927,7 @@ void SelectionDAGBuilder::populateCallLoweringInfo( .setIsPatchPoint(IsPatchPoint); } -/// \brief Add a stack map intrinsic call's live variable operands to a stackmap +/// Add a stack map intrinsic call's live variable operands to a stackmap /// or patchpoint target node's operand list. /// /// Constants are converted to TargetConstants purely as an optimization to @@ -7727,7 +7963,7 @@ static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx, } } -/// \brief Lower llvm.experimental.stackmap directly to its target opcode. +/// Lower llvm.experimental.stackmap directly to its target opcode. void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { // void @llvm.experimental.stackmap(i32 <id>, i32 <numShadowBytes>, // [live variables...]) @@ -7790,7 +8026,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { FuncInfo.MF->getFrameInfo().setHasStackMap(); } -/// \brief Lower llvm.experimental.patchpoint directly to its target opcode. +/// Lower llvm.experimental.patchpoint directly to its target opcode. void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, const BasicBlock *EHPadBB) { // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>, @@ -7954,8 +8190,6 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I, FastMathFlags FMF; if (isa<FPMathOperator>(I)) FMF = I.getFastMathFlags(); - SDNodeFlags SDFlags; - SDFlags.setNoNaNs(FMF.noNaNs()); switch (Intrinsic) { case Intrinsic::experimental_vector_reduce_fadd: @@ -7998,10 +8232,10 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I, Res = DAG.getNode(ISD::VECREDUCE_UMIN, dl, VT, Op1); break; case Intrinsic::experimental_vector_reduce_fmax: - Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1, SDFlags); + Res = DAG.getNode(ISD::VECREDUCE_FMAX, dl, VT, Op1); break; case Intrinsic::experimental_vector_reduce_fmin: - Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1, SDFlags); + Res = DAG.getNode(ISD::VECREDUCE_FMIN, dl, VT, Op1); break; default: llvm_unreachable("Unhandled vector reduce intrinsic"); @@ -8220,8 +8454,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { else if (Args[i].IsZExt) ExtendKind = ISD::ZERO_EXTEND; - // Conservatively only handle 'returned' on non-vectors for now - if (Args[i].IsReturned && !Op.getValueType().isVector()) { + // Conservatively only handle 'returned' on non-vectors that can be lowered, + // for now. + if (Args[i].IsReturned && !Op.getValueType().isVector() && + CanLowerReturn) { assert(CLI.RetTy == Args[i].Ty && RetTys.size() == NumValues && "unexpected use of 'returned'"); // Before passing 'returned' to the target lowering code, ensure that @@ -8500,7 +8736,8 @@ findArgumentCopyElisionCandidates(const DataLayout &DL, continue; } - DEBUG(dbgs() << "Found argument copy elision candidate: " << *AI << '\n'); + LLVM_DEBUG(dbgs() << "Found argument copy elision candidate: " << *AI + << '\n'); // Mark this alloca and store for argument copy elision. *Info = StaticAllocaInfo::Elidable; @@ -8541,8 +8778,9 @@ static void tryToElideArgumentCopy( int OldIndex = AllocaIndex; MachineFrameInfo &MFI = FuncInfo->MF->getFrameInfo(); if (MFI.getObjectSize(FixedIndex) != MFI.getObjectSize(OldIndex)) { - DEBUG(dbgs() << " argument copy elision failed due to bad fixed stack " - "object size\n"); + LLVM_DEBUG( + dbgs() << " argument copy elision failed due to bad fixed stack " + "object size\n"); return; } unsigned RequiredAlignment = AI->getAlignment(); @@ -8551,16 +8789,16 @@ static void tryToElideArgumentCopy( AI->getAllocatedType()); } if (MFI.getObjectAlignment(FixedIndex) < RequiredAlignment) { - DEBUG(dbgs() << " argument copy elision failed: alignment of alloca " - "greater than stack argument alignment (" - << RequiredAlignment << " vs " - << MFI.getObjectAlignment(FixedIndex) << ")\n"); + LLVM_DEBUG(dbgs() << " argument copy elision failed: alignment of alloca " + "greater than stack argument alignment (" + << RequiredAlignment << " vs " + << MFI.getObjectAlignment(FixedIndex) << ")\n"); return; } // Perform the elision. Delete the old stack object and replace its only use // in the variable info map. Mark the stack object as mutable. - DEBUG({ + LLVM_DEBUG({ dbgs() << "Eliding argument copy from " << Arg << " to " << *AI << '\n' << " Replacing frame index " << OldIndex << " with " << FixedIndex << '\n'; @@ -8732,14 +8970,14 @@ void SelectionDAGISel::LowerArguments(const Function &F) { "LowerFormalArguments didn't return a valid chain!"); assert(InVals.size() == Ins.size() && "LowerFormalArguments didn't emit the correct number of values!"); - DEBUG({ - for (unsigned i = 0, e = Ins.size(); i != e; ++i) { - assert(InVals[i].getNode() && - "LowerFormalArguments emitted a null value!"); - assert(EVT(Ins[i].VT) == InVals[i].getValueType() && - "LowerFormalArguments emitted a value with the wrong type!"); - } - }); + LLVM_DEBUG({ + for (unsigned i = 0, e = Ins.size(); i != e; ++i) { + assert(InVals[i].getNode() && + "LowerFormalArguments emitted a null value!"); + assert(EVT(Ins[i].VT) == InVals[i].getValueType() && + "LowerFormalArguments emitted a value with the wrong type!"); + } + }); // Update the DAG with the new chain value resulting from argument lowering. DAG.setRoot(NewRoot); @@ -8940,17 +9178,17 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // At this point we know that there is a 1-1 correspondence between LLVM PHI // nodes and Machine PHI nodes, but the incoming operands have not been // emitted yet. - for (BasicBlock::const_iterator I = SuccBB->begin(); - const PHINode *PN = dyn_cast<PHINode>(I); ++I) { + for (const PHINode &PN : SuccBB->phis()) { // Ignore dead phi's. - if (PN->use_empty()) continue; + if (PN.use_empty()) + continue; // Skip empty types - if (PN->getType()->isEmptyTy()) + if (PN.getType()->isEmptyTy()) continue; unsigned Reg; - const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB); + const Value *PHIOp = PN.getIncomingValueForBlock(LLVMBB); if (const Constant *C = dyn_cast<Constant>(PHIOp)) { unsigned &RegOut = ConstantsOut[C]; @@ -8977,7 +9215,7 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // the input for this MBB. SmallVector<EVT, 4> ValueVTs; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - ComputeValueVTs(TLI, DAG.getDataLayout(), PN->getType(), ValueVTs); + ComputeValueVTs(TLI, DAG.getDataLayout(), PN.getType(), ValueVTs); for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { EVT VT = ValueVTs[vti]; unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT); @@ -9351,7 +9589,7 @@ bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters, } BitTestInfo BTI; - std::sort(CBV.begin(), CBV.end(), [](const CaseBits &a, const CaseBits &b) { + llvm::sort(CBV.begin(), CBV.end(), [](const CaseBits &a, const CaseBits &b) { // Sort by probability first, number of bits second, bit mask third. if (a.ExtraProb != b.ExtraProb) return a.ExtraProb > b.ExtraProb; @@ -9550,15 +9788,15 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, // checked first. However, two clusters can have the same probability in // which case their relative ordering is non-deterministic. So we use Low // as a tie-breaker as clusters are guaranteed to never overlap. - std::sort(W.FirstCluster, W.LastCluster + 1, - [](const CaseCluster &a, const CaseCluster &b) { + llvm::sort(W.FirstCluster, W.LastCluster + 1, + [](const CaseCluster &a, const CaseCluster &b) { return a.Prob != b.Prob ? a.Prob > b.Prob : a.Low->getValue().slt(b.Low->getValue()); }); // Rearrange the case blocks so that the last one falls through if possible - // without without changing the order of probabilities. + // without changing the order of probabilities. for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster; ) { --I; if (I->Prob > W.LastCluster->Prob) @@ -9883,8 +10121,8 @@ MachineBasicBlock *SelectionDAGBuilder::peelDominantCaseCluster( if (!SwitchPeeled) return SwitchMBB; - DEBUG(dbgs() << "Peeled one top case in switch stmt, prob: " << TopCaseProb - << "\n"); + LLVM_DEBUG(dbgs() << "Peeled one top case in switch stmt, prob: " + << TopCaseProb << "\n"); // Record the MBB for the peeled switch statement. MachineFunction::iterator BBI(SwitchMBB); @@ -9901,10 +10139,11 @@ MachineBasicBlock *SelectionDAGBuilder::peelDominantCaseCluster( Clusters.erase(PeeledCaseIt); for (CaseCluster &CC : Clusters) { - DEBUG(dbgs() << "Scale the probablity for one cluster, before scaling: " - << CC.Prob << "\n"); + LLVM_DEBUG( + dbgs() << "Scale the probablity for one cluster, before scaling: " + << CC.Prob << "\n"); CC.Prob = scaleCaseProbality(CC.Prob, TopCaseProb); - DEBUG(dbgs() << "After scaling: " << CC.Prob << "\n"); + LLVM_DEBUG(dbgs() << "After scaling: " << CC.Prob << "\n"); } PeeledCaseProb = TopCaseProb; return PeeledSwitchMBB; @@ -9983,11 +10222,13 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { findJumpTables(Clusters, &SI, DefaultMBB); findBitTestClusters(Clusters, &SI); - DEBUG({ + LLVM_DEBUG({ dbgs() << "Case clusters: "; for (const CaseCluster &C : Clusters) { - if (C.Kind == CC_JumpTable) dbgs() << "JT:"; - if (C.Kind == CC_BitTests) dbgs() << "BT:"; + if (C.Kind == CC_JumpTable) + dbgs() << "JT:"; + if (C.Kind == CC_BitTests) + dbgs() << "BT:"; C.Low->getValue().print(dbgs(), true); if (C.Low != C.High) { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 9e7c2bc6821b..e421984b8af2 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -21,7 +21,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/ISDOpcodes.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetLowering.h" @@ -33,6 +32,7 @@ #include "llvm/Support/BranchProbability.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachineValueType.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -116,9 +116,12 @@ class SelectionDAGBuilder { unsigned getSDNodeOrder() { return SDNodeOrder; } }; + /// DanglingDebugInfoVector - Helper type for DanglingDebugInfoMap. + typedef std::vector<DanglingDebugInfo> DanglingDebugInfoVector; + /// DanglingDebugInfoMap - Keeps track of dbg_values for which we have not /// yet seen the referent. We defer handling these until we do see it. - DenseMap<const Value*, DanglingDebugInfo> DanglingDebugInfoMap; + DenseMap<const Value*, DanglingDebugInfoVector> DanglingDebugInfoMap; public: /// PendingLoads - Loads are not emitted to the program immediately. We bunch @@ -671,6 +674,12 @@ public: /// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise. SDValue getCopyFromRegs(const Value *V, Type *Ty); + /// If we have dangling debug info that describes \p Variable, or an + /// overlapping part of variable considering the \p Expr, then this method + /// weill drop that debug info as it isn't valid any longer. + void dropDanglingDebugInfo(const DILocalVariable *Variable, + const DIExpression *Expr); + // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V, // generate the debug data structures now that we've seen its definition. void resolveDanglingDebugInfo(const Value *V, SDValue Val); @@ -678,6 +687,13 @@ public: SDValue getValue(const Value *V); bool findValue(const Value *V) const; + /// Return the SDNode for the specified IR value if it exists. + SDNode *getNodeForIRValue(const Value *V) { + if (NodeMap.find(V) == NodeMap.end()) + return nullptr; + return NodeMap[V].getNode(); + } + SDValue getNonRegisterValue(const Value *V); SDValue getValueImpl(const Value *V); @@ -696,13 +712,13 @@ public: void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB, - Instruction::BinaryOps Opc, BranchProbability TW, - BranchProbability FW, bool InvertCond); + Instruction::BinaryOps Opc, BranchProbability TProb, + BranchProbability FProb, bool InvertCond); void EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB, - BranchProbability TW, BranchProbability FW, + BranchProbability TProb, BranchProbability FProb, bool InvertCond); bool ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases); bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB); @@ -774,11 +790,11 @@ public: }; /// Lower \p SLI into a STATEPOINT instruction. - SDValue LowerAsSTATEPOINT(StatepointLoweringInfo &SLI); + SDValue LowerAsSTATEPOINT(StatepointLoweringInfo &SI); // This function is responsible for the whole statepoint lowering process. // It uniformly handles invoke and call statepoints. - void LowerStatepoint(ImmutableStatepoint Statepoint, + void LowerStatepoint(ImmutableStatepoint ISP, const BasicBlock *EHPadBB = nullptr); void LowerCallSiteWithDeoptBundle(ImmutableCallSite CS, SDValue Callee, @@ -838,7 +854,7 @@ private: void visitInvoke(const InvokeInst &I); void visitResume(const ResumeInst &I); - void visitBinary(const User &I, unsigned OpCode); + void visitBinary(const User &I, unsigned Opcode); void visitShift(const User &I, unsigned Opcode); void visitAdd(const User &I) { visitBinary(I, ISD::ADD); } void visitFAdd(const User &I) { visitBinary(I, ISD::FADD); } @@ -881,7 +897,7 @@ private: void visitExtractValue(const User &I); void visitInsertValue(const User &I); - void visitLandingPad(const LandingPadInst &I); + void visitLandingPad(const LandingPadInst &LP); void visitGetElementPtr(const User &I); void visitSelect(const User &I); @@ -926,7 +942,7 @@ private: const BasicBlock *EHPadBB = nullptr); // These two are implemented in StatepointLowering.cpp - void visitGCRelocate(const GCRelocateInst &I); + void visitGCRelocate(const GCRelocateInst &Relocate); void visitGCResult(const GCResultInst &I); void visitVectorReduce(const CallInst &I, unsigned Intrinsic); @@ -1036,9 +1052,17 @@ struct RegsForValue { /// Add this value to the specified inlineasm node operand list. This adds the /// code marker, matching input operand index (if applicable), and includes /// the number of values added into it. - void AddInlineAsmOperands(unsigned Kind, bool HasMatching, + void AddInlineAsmOperands(unsigned Code, bool HasMatching, unsigned MatchingIdx, const SDLoc &dl, SelectionDAG &DAG, std::vector<SDValue> &Ops) const; + + /// Check if the total RegCount is greater than one. + bool occupiesMultipleRegs() const { + return std::accumulate(RegCount.begin(), RegCount.end(), 0) > 1; + } + + /// Return a list of registers and their sizes. + SmallVector<std::pair<unsigned, unsigned>, 4> getRegsAndSizes() const; }; } // end namespace llvm diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index dd30dc16378c..fa341e8b5fa5 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -20,7 +20,6 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetInstrInfo.h" @@ -28,18 +27,21 @@ #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/ValueTypes.h" +#include "llvm/Config/llvm-config.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Function.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/ModuleSlotTracker.h" #include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachineValueType.h" #include "llvm/Support/Printable.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetIntrinsicInfo.h" @@ -85,6 +87,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd"; case ISD::ATOMIC_LOAD_SUB: return "AtomicLoadSub"; case ISD::ATOMIC_LOAD_AND: return "AtomicLoadAnd"; + case ISD::ATOMIC_LOAD_CLR: return "AtomicLoadClr"; case ISD::ATOMIC_LOAD_OR: return "AtomicLoadOr"; case ISD::ATOMIC_LOAD_XOR: return "AtomicLoadXor"; case ISD::ATOMIC_LOAD_NAND: return "AtomicLoadNand"; @@ -176,20 +179,30 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FMAXNAN: return "fmaxnan"; case ISD::FNEG: return "fneg"; case ISD::FSQRT: return "fsqrt"; + case ISD::STRICT_FSQRT: return "strict_fsqrt"; case ISD::FSIN: return "fsin"; + case ISD::STRICT_FSIN: return "strict_fsin"; case ISD::FCOS: return "fcos"; + case ISD::STRICT_FCOS: return "strict_fcos"; case ISD::FSINCOS: return "fsincos"; case ISD::FTRUNC: return "ftrunc"; case ISD::FFLOOR: return "ffloor"; case ISD::FCEIL: return "fceil"; case ISD::FRINT: return "frint"; + case ISD::STRICT_FRINT: return "strict_frint"; case ISD::FNEARBYINT: return "fnearbyint"; + case ISD::STRICT_FNEARBYINT: return "strict_fnearbyint"; case ISD::FROUND: return "fround"; case ISD::FEXP: return "fexp"; + case ISD::STRICT_FEXP: return "strict_fexp"; case ISD::FEXP2: return "fexp2"; + case ISD::STRICT_FEXP2: return "strict_fexp2"; case ISD::FLOG: return "flog"; + case ISD::STRICT_FLOG: return "strict_flog"; case ISD::FLOG2: return "flog2"; + case ISD::STRICT_FLOG2: return "strict_flog2"; case ISD::FLOG10: return "flog10"; + case ISD::STRICT_FLOG10: return "strict_flog10"; // Binary operators case ISD::ADD: return "add"; @@ -214,24 +227,31 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::ROTL: return "rotl"; case ISD::ROTR: return "rotr"; case ISD::FADD: return "fadd"; + case ISD::STRICT_FADD: return "strict_fadd"; case ISD::FSUB: return "fsub"; + case ISD::STRICT_FSUB: return "strict_fsub"; case ISD::FMUL: return "fmul"; + case ISD::STRICT_FMUL: return "strict_fmul"; case ISD::FDIV: return "fdiv"; + case ISD::STRICT_FDIV: return "strict_fdiv"; case ISD::FMA: return "fma"; + case ISD::STRICT_FMA: return "strict_fma"; case ISD::FMAD: return "fmad"; case ISD::FREM: return "frem"; + case ISD::STRICT_FREM: return "strict_frem"; case ISD::FCOPYSIGN: return "fcopysign"; case ISD::FGETSIGN: return "fgetsign"; case ISD::FCANONICALIZE: return "fcanonicalize"; case ISD::FPOW: return "fpow"; + case ISD::STRICT_FPOW: return "strict_fpow"; case ISD::SMIN: return "smin"; case ISD::SMAX: return "smax"; case ISD::UMIN: return "umin"; case ISD::UMAX: return "umax"; case ISD::FPOWI: return "fpowi"; + case ISD::STRICT_FPOWI: return "strict_fpowi"; case ISD::SETCC: return "setcc"; - case ISD::SETCCE: return "setcce"; case ISD::SETCCCARRY: return "setcccarry"; case ISD::SELECT: return "select"; case ISD::VSELECT: return "vselect"; @@ -366,7 +386,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::SETFALSE2: return "setfalse2"; } case ISD::VECREDUCE_FADD: return "vecreduce_fadd"; + case ISD::VECREDUCE_STRICT_FADD: return "vecreduce_strict_fadd"; case ISD::VECREDUCE_FMUL: return "vecreduce_fmul"; + case ISD::VECREDUCE_STRICT_FMUL: return "vecreduce_strict_fmul"; case ISD::VECREDUCE_ADD: return "vecreduce_add"; case ISD::VECREDUCE_MUL: return "vecreduce_mul"; case ISD::VECREDUCE_AND: return "vecreduce_and"; @@ -401,6 +423,32 @@ static Printable PrintNodeId(const SDNode &Node) { }); } +// Print the MMO with more information from the SelectionDAG. +static void printMemOperand(raw_ostream &OS, const MachineMemOperand &MMO, + const MachineFunction *MF, const Module *M, + const MachineFrameInfo *MFI, + const TargetInstrInfo *TII, LLVMContext &Ctx) { + ModuleSlotTracker MST(M); + if (MF) + MST.incorporateFunction(MF->getFunction()); + SmallVector<StringRef, 0> SSNs; + MMO.print(OS, MST, SSNs, Ctx, MFI, TII); +} + +static void printMemOperand(raw_ostream &OS, const MachineMemOperand &MMO, + const SelectionDAG *G) { + if (G) { + const MachineFunction *MF = &G->getMachineFunction(); + return printMemOperand(OS, MMO, MF, MF->getFunction().getParent(), + &MF->getFrameInfo(), G->getSubtarget().getInstrInfo(), + *G->getContext()); + } else { + LLVMContext Ctx; + return printMemOperand(OS, MMO, /*MF=*/nullptr, /*M=*/nullptr, + /*MFI=*/nullptr, /*TII=*/nullptr, Ctx); + } +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void SDNode::dump() const { dump(nullptr); } @@ -430,9 +478,6 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { if (getFlags().hasExact()) OS << " exact"; - if (getFlags().hasUnsafeAlgebra()) - OS << " unsafe"; - if (getFlags().hasNoNaNs()) OS << " nnan"; @@ -448,6 +493,12 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { if (getFlags().hasAllowContract()) OS << " contract"; + if (getFlags().hasApproximateFuncs()) + OS << " afn"; + + if (getFlags().hasAllowReassociation()) + OS << " reassoc"; + if (getFlags().hasVectorReduction()) OS << " vector-reduction"; @@ -457,7 +508,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { OS << "Mem:"; for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(), e = MN->memoperands_end(); i != e; ++i) { - OS << **i; + printMemOperand(OS, **i, G); if (std::next(i) != e) OS << " "; } @@ -549,7 +600,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { OS << ":" << N->getVT().getEVTString(); } else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) { - OS << "<" << *LD->getMemOperand(); + OS << "<"; + + printMemOperand(OS, *LD->getMemOperand(), G); bool doExt = true; switch (LD->getExtensionType()) { @@ -567,7 +620,8 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { OS << ">"; } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) { - OS << "<" << *ST->getMemOperand(); + OS << "<"; + printMemOperand(OS, *ST->getMemOperand(), G); if (ST->isTruncatingStore()) OS << ", trunc to " << ST->getMemoryVT().getEVTString(); @@ -578,7 +632,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { OS << ">"; } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) { - OS << "<" << *M->getMemOperand() << ">"; + OS << "<"; + printMemOperand(OS, *M->getMemOperand(), G); + OS << ">"; } else if (const BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(this)) { int64_t offset = BA->getOffset(); @@ -608,6 +664,8 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { if (getNodeId() != -1) OS << " [ID=" << getNodeId() << ']'; + if (!(isa<ConstantSDNode>(this) || (isa<ConstantFPSDNode>(this)))) + OS << "# D:" << isDivergent(); if (!G) return; @@ -779,4 +837,8 @@ void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const { if (i) OS << ", "; else OS << " "; printOperand(OS, G, getOperand(i)); } + if (DebugLoc DL = getDebugLoc()) { + OS << ", "; + DL.print(OS); + } } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index d13ccc263718..f7bd8847bee3 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -29,6 +29,7 @@ #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" @@ -43,7 +44,6 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachinePassRegistry.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" @@ -82,6 +82,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" +#include "llvm/Support/MachineValueType.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetIntrinsicInfo.h" @@ -196,7 +197,7 @@ defaultListDAGScheduler("default", "Best scheduler for the target", namespace llvm { //===--------------------------------------------------------------------===// - /// \brief This class is used by SelectionDAGISel to temporarily override + /// This class is used by SelectionDAGISel to temporarily override /// the optimization level on a per-function basis. class OptLevelChanger { SelectionDAGISel &IS; @@ -211,26 +212,27 @@ namespace llvm { return; IS.OptLevel = NewOptLevel; IS.TM.setOptLevel(NewOptLevel); - DEBUG(dbgs() << "\nChanging optimization level for Function " - << IS.MF->getFunction().getName() << "\n"); - DEBUG(dbgs() << "\tBefore: -O" << SavedOptLevel - << " ; After: -O" << NewOptLevel << "\n"); + LLVM_DEBUG(dbgs() << "\nChanging optimization level for Function " + << IS.MF->getFunction().getName() << "\n"); + LLVM_DEBUG(dbgs() << "\tBefore: -O" << SavedOptLevel << " ; After: -O" + << NewOptLevel << "\n"); SavedFastISel = IS.TM.Options.EnableFastISel; if (NewOptLevel == CodeGenOpt::None) { IS.TM.setFastISel(IS.TM.getO0WantsFastISel()); - DEBUG(dbgs() << "\tFastISel is " - << (IS.TM.Options.EnableFastISel ? "enabled" : "disabled") - << "\n"); + LLVM_DEBUG( + dbgs() << "\tFastISel is " + << (IS.TM.Options.EnableFastISel ? "enabled" : "disabled") + << "\n"); } } ~OptLevelChanger() { if (IS.OptLevel == SavedOptLevel) return; - DEBUG(dbgs() << "\nRestoring optimization level for Function " - << IS.MF->getFunction().getName() << "\n"); - DEBUG(dbgs() << "\tBefore: -O" << IS.OptLevel - << " ; After: -O" << SavedOptLevel << "\n"); + LLVM_DEBUG(dbgs() << "\nRestoring optimization level for Function " + << IS.MF->getFunction().getName() << "\n"); + LLVM_DEBUG(dbgs() << "\tBefore: -O" << IS.OptLevel << " ; After: -O" + << SavedOptLevel << "\n"); IS.OptLevel = SavedOptLevel; IS.TM.setOptLevel(SavedOptLevel); IS.TM.setFastISel(SavedFastISel); @@ -326,9 +328,9 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<AAResultsWrapperPass>(); AU.addRequired<GCModuleInfo>(); AU.addRequired<StackProtector>(); - AU.addPreserved<StackProtector>(); AU.addPreserved<GCModuleInfo>(); AU.addRequired<TargetLibraryInfoWrapperPass>(); + AU.addRequired<TargetTransformInfoWrapperPass>(); if (UseMBPI && OptLevel != CodeGenOpt::None) AU.addRequired<BranchProbabilityInfoWrapperPass>(); MachineFunctionPass::getAnalysisUsage(AU); @@ -410,11 +412,12 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>(); LoopInfo *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; - DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); + LLVM_DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); SplitCriticalSideEffectEdges(const_cast<Function &>(Fn), DT, LI); - CurDAG->init(*MF, *ORE, this); + CurDAG->init(*MF, *ORE, this, LibInfo, + getAnalysisIfAvailable<DivergenceAnalysis>()); FuncInfo->set(Fn, *MF, CurDAG); // Now get the optional analyzes if we want to. @@ -513,8 +516,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // FIXME: VR def may not be in entry block. Def->getParent()->insert(std::next(InsertPos), MI); } else - DEBUG(dbgs() << "Dropping debug info for dead vreg" - << TargetRegisterInfo::virtReg2Index(Reg) << "\n"); + LLVM_DEBUG(dbgs() << "Dropping debug info for dead vreg" + << TargetRegisterInfo::virtReg2Index(Reg) << "\n"); } // If Reg is live-in then update debug info to track its copy in a vreg. @@ -621,8 +624,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // at this point. FuncInfo->clear(); - DEBUG(dbgs() << "*** MachineFunction at end of ISel ***\n"); - DEBUG(MF->print(dbgs())); + LLVM_DEBUG(dbgs() << "*** MachineFunction at end of ISel ***\n"); + LLVM_DEBUG(MF->print(dbgs())); return true; } @@ -711,6 +714,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { int BlockNumber = -1; (void)BlockNumber; bool MatchFilterBB = false; (void)MatchFilterBB; + TargetTransformInfo &TTI = + getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*FuncInfo->Fn); // Pre-type legalization allow creation of any node types. CurDAG->NewNodesMustHaveLegalTypes = false; @@ -718,7 +723,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { #ifndef NDEBUG MatchFilterBB = (FilterDAGBasicBlockName.empty() || FilterDAGBasicBlockName == - FuncInfo->MBB->getBasicBlock()->getName().str()); + FuncInfo->MBB->getBasicBlock()->getName()); #endif #ifdef NDEBUG if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewLegalizeDAGs || @@ -730,9 +735,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { BlockName = (MF->getName() + ":" + FuncInfo->MBB->getBasicBlock()->getName()).str(); } - DEBUG(dbgs() << "Initial selection DAG: " << printMBBReference(*FuncInfo->MBB) - << " '" << BlockName << "'\n"; - CurDAG->dump()); + LLVM_DEBUG(dbgs() << "Initial selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); if (ViewDAGCombine1 && MatchFilterBB) CurDAG->viewGraph("dag-combine1 input for " + BlockName); @@ -744,10 +750,13 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(BeforeLegalizeTypes, AA, OptLevel); } - DEBUG(dbgs() << "Optimized lowered selection DAG: " - << printMBBReference(*FuncInfo->MBB) << " '" << BlockName - << "'\n"; - CurDAG->dump()); + if (TTI.hasBranchDivergence()) + CurDAG->VerifyDAGDiverence(); + + LLVM_DEBUG(dbgs() << "Optimized lowered selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); // Second step, hack on the DAG until it only uses operations and types that // the target supports. @@ -761,10 +770,13 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { Changed = CurDAG->LegalizeTypes(); } - DEBUG(dbgs() << "Type-legalized selection DAG: " - << printMBBReference(*FuncInfo->MBB) << " '" << BlockName - << "'\n"; - CurDAG->dump()); + if (TTI.hasBranchDivergence()) + CurDAG->VerifyDAGDiverence(); + + LLVM_DEBUG(dbgs() << "Type-legalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); // Only allow creation of legal node types. CurDAG->NewNodesMustHaveLegalTypes = true; @@ -780,10 +792,13 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(AfterLegalizeTypes, AA, OptLevel); } - DEBUG(dbgs() << "Optimized type-legalized selection DAG: " - << printMBBReference(*FuncInfo->MBB) << " '" << BlockName - << "'\n"; - CurDAG->dump()); + if (TTI.hasBranchDivergence()) + CurDAG->VerifyDAGDiverence(); + + LLVM_DEBUG(dbgs() << "Optimized type-legalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); } { @@ -793,10 +808,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { } if (Changed) { - DEBUG(dbgs() << "Vector-legalized selection DAG: " - << printMBBReference(*FuncInfo->MBB) << " '" << BlockName - << "'\n"; - CurDAG->dump()); + LLVM_DEBUG(dbgs() << "Vector-legalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); { NamedRegionTimer T("legalize_types2", "Type Legalization 2", GroupName, @@ -804,10 +819,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->LegalizeTypes(); } - DEBUG(dbgs() << "Vector/type-legalized selection DAG: " - << printMBBReference(*FuncInfo->MBB) << " '" << BlockName - << "'\n"; - CurDAG->dump()); + LLVM_DEBUG(dbgs() << "Vector/type-legalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); if (ViewDAGCombineLT && MatchFilterBB) CurDAG->viewGraph("dag-combine-lv input for " + BlockName); @@ -819,10 +834,13 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(AfterLegalizeVectorOps, AA, OptLevel); } - DEBUG(dbgs() << "Optimized vector-legalized selection DAG: " - << printMBBReference(*FuncInfo->MBB) << " '" << BlockName - << "'\n"; - CurDAG->dump()); + LLVM_DEBUG(dbgs() << "Optimized vector-legalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); + + if (TTI.hasBranchDivergence()) + CurDAG->VerifyDAGDiverence(); } if (ViewLegalizeDAGs && MatchFilterBB) @@ -834,10 +852,13 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Legalize(); } - DEBUG(dbgs() << "Legalized selection DAG: " - << printMBBReference(*FuncInfo->MBB) << " '" << BlockName - << "'\n"; - CurDAG->dump()); + if (TTI.hasBranchDivergence()) + CurDAG->VerifyDAGDiverence(); + + LLVM_DEBUG(dbgs() << "Legalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); if (ViewDAGCombine2 && MatchFilterBB) CurDAG->viewGraph("dag-combine2 input for " + BlockName); @@ -849,10 +870,13 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(AfterLegalizeDAG, AA, OptLevel); } - DEBUG(dbgs() << "Optimized legalized selection DAG: " - << printMBBReference(*FuncInfo->MBB) << " '" << BlockName - << "'\n"; - CurDAG->dump()); + if (TTI.hasBranchDivergence()) + CurDAG->VerifyDAGDiverence(); + + LLVM_DEBUG(dbgs() << "Optimized legalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); if (OptLevel != CodeGenOpt::None) ComputeLiveOutVRegInfo(); @@ -868,10 +892,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { DoInstructionSelection(); } - DEBUG(dbgs() << "Selected selection DAG: " - << printMBBReference(*FuncInfo->MBB) << " '" << BlockName - << "'\n"; - CurDAG->dump()); + LLVM_DEBUG(dbgs() << "Selected selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); if (ViewSchedDAGs && MatchFilterBB) CurDAG->viewGraph("scheduler input for " + BlockName); @@ -937,10 +961,62 @@ public: } // end anonymous namespace +// This function is used to enforce the topological node id property +// property leveraged during Instruction selection. Before selection all +// nodes are given a non-negative id such that all nodes have a larger id than +// their operands. As this holds transitively we can prune checks that a node N +// is a predecessor of M another by not recursively checking through M's +// operands if N's ID is larger than M's ID. This is significantly improves +// performance of for various legality checks (e.g. IsLegalToFold / +// UpdateChains). + +// However, when we fuse multiple nodes into a single node +// during selection we may induce a predecessor relationship between inputs and +// outputs of distinct nodes being merged violating the topological property. +// Should a fused node have a successor which has yet to be selected, our +// legality checks would be incorrect. To avoid this we mark all unselected +// sucessor nodes, i.e. id != -1 as invalid for pruning by bit-negating (x => +// (-(x+1))) the ids and modify our pruning check to ignore negative Ids of M. +// We use bit-negation to more clearly enforce that node id -1 can only be +// achieved by selected nodes). As the conversion is reversable the original Id, +// topological pruning can still be leveraged when looking for unselected nodes. +// This method is call internally in all ISel replacement calls. +void SelectionDAGISel::EnforceNodeIdInvariant(SDNode *Node) { + SmallVector<SDNode *, 4> Nodes; + Nodes.push_back(Node); + + while (!Nodes.empty()) { + SDNode *N = Nodes.pop_back_val(); + for (auto *U : N->uses()) { + auto UId = U->getNodeId(); + if (UId > 0) { + InvalidateNodeId(U); + Nodes.push_back(U); + } + } + } +} + +// InvalidateNodeId - As discusses in EnforceNodeIdInvariant, mark a +// NodeId with the equivalent node id which is invalid for topological +// pruning. +void SelectionDAGISel::InvalidateNodeId(SDNode *N) { + int InvalidId = -(N->getNodeId() + 1); + N->setNodeId(InvalidId); +} + +// getUninvalidatedNodeId - get original uninvalidated node id. +int SelectionDAGISel::getUninvalidatedNodeId(SDNode *N) { + int Id = N->getNodeId(); + if (Id < -1) + return -(Id + 1); + return Id; +} + void SelectionDAGISel::DoInstructionSelection() { - DEBUG(dbgs() << "===== Instruction selection begins: " - << printMBBReference(*FuncInfo->MBB) << " '" - << FuncInfo->MBB->getName() << "'\n"); + LLVM_DEBUG(dbgs() << "===== Instruction selection begins: " + << printMBBReference(*FuncInfo->MBB) << " '" + << FuncInfo->MBB->getName() << "'\n"); PreprocessISelDAG(); @@ -972,6 +1048,33 @@ void SelectionDAGISel::DoInstructionSelection() { if (Node->use_empty()) continue; +#ifndef NDEBUG + SmallVector<SDNode *, 4> Nodes; + Nodes.push_back(Node); + + while (!Nodes.empty()) { + auto N = Nodes.pop_back_val(); + if (N->getOpcode() == ISD::TokenFactor || N->getNodeId() < 0) + continue; + for (const SDValue &Op : N->op_values()) { + if (Op->getOpcode() == ISD::TokenFactor) + Nodes.push_back(Op.getNode()); + else { + // We rely on topological ordering of node ids for checking for + // cycles when fusing nodes during selection. All unselected nodes + // successors of an already selected node should have a negative id. + // This assertion will catch such cases. If this assertion triggers + // it is likely you using DAG-level Value/Node replacement functions + // (versus equivalent ISEL replacement) in backend-specific + // selections. See comment in EnforceNodeIdInvariant for more + // details. + assert(Op->getNodeId() != -1 && + "Node has already selected predecessor node"); + } + } + } +#endif + // When we are using non-default rounding modes or FP exception behavior // FP operations are represented by StrictFP pseudo-operations. They // need to be simplified here so that the target-specific instruction @@ -985,13 +1088,16 @@ void SelectionDAGISel::DoInstructionSelection() { if (Node->isStrictFPOpcode()) Node = CurDAG->mutateStrictFPToFP(Node); + LLVM_DEBUG(dbgs() << "\nISEL: Starting selection on root node: "; + Node->dump(CurDAG)); + Select(Node); } CurDAG->setRoot(Dummy.getValue()); } - DEBUG(dbgs() << "===== Instruction selection ends:\n"); + LLVM_DEBUG(dbgs() << "\n===== Instruction selection ends:\n"); PostprocessISelDAG(); } @@ -1264,7 +1370,7 @@ static void propagateSwiftErrorVRegs(FunctionLoweringInfo *FuncInfo) { } auto DLoc = isa<Instruction>(SwiftErrorVal) - ? dyn_cast<Instruction>(SwiftErrorVal)->getDebugLoc() + ? cast<Instruction>(SwiftErrorVal)->getDebugLoc() : DebugLoc(); const auto *TII = FuncInfo->MF->getSubtarget().getInstrInfo(); @@ -1380,8 +1486,10 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FastISelFailed = false; // Initialize the Fast-ISel state, if needed. FastISel *FastIS = nullptr; - if (TM.Options.EnableFastISel) + if (TM.Options.EnableFastISel) { + LLVM_DEBUG(dbgs() << "Enabling fast-isel\n"); FastIS = TLI->createFastISel(*FuncInfo, LibInfo); + } setupSwiftErrorVals(Fn, TLI, FuncInfo); @@ -1396,6 +1504,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FuncInfo->MBB = FuncInfo->MBBMap[&Fn.getEntryBlock()]; FuncInfo->InsertPt = FuncInfo->MBB->begin(); + CurDAG->setFunctionLoweringInfo(FuncInfo); + if (!FastIS) { LowerArguments(Fn); } else { @@ -1433,6 +1543,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { processDbgDeclares(FuncInfo); // Iterate over all basic blocks in the function. + StackProtector &SP = getAnalysis<StackProtector>(); for (const BasicBlock *LLVMBB : RPOT) { if (OptLevel != CodeGenOpt::None) { bool AllPredsVisited = true; @@ -1445,13 +1556,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { } if (AllPredsVisited) { - for (BasicBlock::const_iterator I = LLVMBB->begin(); - const PHINode *PN = dyn_cast<PHINode>(I); ++I) - FuncInfo->ComputePHILiveOutRegInfo(PN); + for (const PHINode &PN : LLVMBB->phis()) + FuncInfo->ComputePHILiveOutRegInfo(&PN); } else { - for (BasicBlock::const_iterator I = LLVMBB->begin(); - const PHINode *PN = dyn_cast<PHINode>(I); ++I) - FuncInfo->InvalidatePHILiveOutRegInfo(PN); + for (const PHINode &PN : LLVMBB->phis()) + FuncInfo->InvalidatePHILiveOutRegInfo(&PN); } FuncInfo->VisitedBBs.insert(LLVMBB); @@ -1604,7 +1713,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FastIS->recomputeInsertPt(); } - if (getAnalysis<StackProtector>().shouldEmitSDCheck(*LLVMBB)) { + if (SP.shouldEmitSDCheck(*LLVMBB)) { bool FunctionBasedInstrumentation = TLI->getSSPStackGuardCheck(*Fn.getParent()); SDB->SPDescriptor.initialize(LLVMBB, FuncInfo->MBBMap[LLVMBB], @@ -1630,11 +1739,15 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FastIS->removeDeadCode(FuncInfo->InsertPt, FuncInfo->MBB->end()); } + if (FastIS) + FastIS->finishBasicBlock(); FinishBasicBlock(); FuncInfo->PHINodesToUpdate.clear(); ElidedArgCopyInstrs.clear(); } + SP.copyToMachineFrameInfo(MF->getFrameInfo()); + propagateSwiftErrorVRegs(FuncInfo); delete FastIS; @@ -1728,12 +1841,12 @@ FindSplitPointForStackProtector(MachineBasicBlock *BB) { void SelectionDAGISel::FinishBasicBlock() { - DEBUG(dbgs() << "Total amount of phi nodes to update: " - << FuncInfo->PHINodesToUpdate.size() << "\n"; - for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) - dbgs() << "Node " << i << " : (" - << FuncInfo->PHINodesToUpdate[i].first - << ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n"); + LLVM_DEBUG(dbgs() << "Total amount of phi nodes to update: " + << FuncInfo->PHINodesToUpdate.size() << "\n"; + for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; + ++i) dbgs() + << "Node " << i << " : (" << FuncInfo->PHINodesToUpdate[i].first + << ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n"); // Next, now that we know what the last MBB the LLVM BB expanded is, update // PHI nodes in successors. @@ -2012,7 +2125,7 @@ bool SelectionDAGISel::CheckAndMask(SDValue LHS, ConstantSDNode *RHS, return true; // If the actual AND mask is allowing unallowed bits, this doesn't match. - if (ActualMask.intersects(~DesiredMask)) + if (!ActualMask.isSubsetOf(DesiredMask)) return false; // Otherwise, the DAG Combiner may have proven that the value coming in is @@ -2041,7 +2154,7 @@ bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS, return true; // If the actual AND mask is allowing unallowed bits, this doesn't match. - if (ActualMask.intersects(~DesiredMask)) + if (!ActualMask.isSubsetOf(DesiredMask)) return false; // Otherwise, the DAG Combiner may have proven that the value coming in is @@ -2134,52 +2247,44 @@ static SDNode *findGlueUse(SDNode *N) { return nullptr; } -/// findNonImmUse - Return true if "Use" is a non-immediate use of "Def". -/// This function iteratively traverses up the operand chain, ignoring -/// certain nodes. -static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse, - SDNode *Root, SmallPtrSetImpl<SDNode*> &Visited, +/// findNonImmUse - Return true if "Def" is a predecessor of "Root" via a path +/// beyond "ImmedUse". We may ignore chains as they are checked separately. +static bool findNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse, bool IgnoreChains) { - // The NodeID's are given uniques ID's where a node ID is guaranteed to be - // greater than all of its (recursive) operands. If we scan to a point where - // 'use' is smaller than the node we're scanning for, then we know we will - // never find it. - // - // The Use may be -1 (unassigned) if it is a newly allocated node. This can - // happen because we scan down to newly selected nodes in the case of glue - // uses. - std::vector<SDNode *> WorkList; - WorkList.push_back(Use); - - while (!WorkList.empty()) { - Use = WorkList.back(); - WorkList.pop_back(); - if (Use->getNodeId() < Def->getNodeId() && Use->getNodeId() != -1) - continue; + SmallPtrSet<const SDNode *, 16> Visited; + SmallVector<const SDNode *, 16> WorkList; + // Only check if we have non-immediate uses of Def. + if (ImmedUse->isOnlyUserOf(Def)) + return false; - // Don't revisit nodes if we already scanned it and didn't fail, we know we - // won't fail if we scan it again. - if (!Visited.insert(Use).second) + // We don't care about paths to Def that go through ImmedUse so mark it + // visited and mark non-def operands as used. + Visited.insert(ImmedUse); + for (const SDValue &Op : ImmedUse->op_values()) { + SDNode *N = Op.getNode(); + // Ignore chain deps (they are validated by + // HandleMergeInputChains) and immediate uses + if ((Op.getValueType() == MVT::Other && IgnoreChains) || N == Def) continue; + if (!Visited.insert(N).second) + continue; + WorkList.push_back(N); + } - for (const SDValue &Op : Use->op_values()) { - // Ignore chain uses, they are validated by HandleMergeInputChains. - if (Op.getValueType() == MVT::Other && IgnoreChains) - continue; - + // Initialize worklist to operands of Root. + if (Root != ImmedUse) { + for (const SDValue &Op : Root->op_values()) { SDNode *N = Op.getNode(); - if (N == Def) { - if (Use == ImmedUse || Use == Root) - continue; // We are not looking for immediate use. - assert(N != Root); - return true; - } - - // Traverse up the operand chain. + // Ignore chains (they are validated by HandleMergeInputChains) + if ((Op.getValueType() == MVT::Other && IgnoreChains) || N == Def) + continue; + if (!Visited.insert(N).second) + continue; WorkList.push_back(N); } } - return false; + + return SDNode::hasPredecessorHelper(Def, Visited, WorkList, 0, true); } /// IsProfitableToFold - Returns true if it's profitable to fold the specific @@ -2199,7 +2304,7 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, // If Root use can somehow reach N through a path that that doesn't contain // U then folding N would create a cycle. e.g. In the following - // diagram, Root can reach N through X. If N is folded into into Root, then + // diagram, Root can reach N through X. If N is folded into Root, then // X is both a predecessor and a successor of U. // // [N*] // @@ -2251,13 +2356,12 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, // If our query node has a glue result with a use, we've walked up it. If // the user (which has already been selected) has a chain or indirectly uses - // the chain, our WalkChainUsers predicate will not consider it. Because of + // the chain, HandleMergeInputChains will not consider it. Because of // this, we cannot ignore chains in this predicate. IgnoreChains = false; } - SmallPtrSet<SDNode*, 16> Visited; - return !findNonImmUse(Root, N.getNode(), U, Root, Visited, IgnoreChains); + return !findNonImmUse(Root, N.getNode(), U, IgnoreChains); } void SelectionDAGISel::Select_INLINEASM(SDNode *N) { @@ -2360,7 +2464,8 @@ void SelectionDAGISel::UpdateChains( std::replace(ChainNodesMatched.begin(), ChainNodesMatched.end(), N, static_cast<SDNode *>(nullptr)); }); - CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain); + if (ChainNode->getOpcode() != ISD::TokenFactor) + ReplaceUses(ChainVal, InputChain); // If the node became dead and we haven't already seen it, delete it. if (ChainNode != NodeToMatch && ChainNode->use_empty() && @@ -2372,144 +2477,7 @@ void SelectionDAGISel::UpdateChains( if (!NowDeadNodes.empty()) CurDAG->RemoveDeadNodes(NowDeadNodes); - DEBUG(dbgs() << "ISEL: Match complete!\n"); -} - -enum ChainResult { - CR_Simple, - CR_InducesCycle, - CR_LeadsToInteriorNode -}; - -/// WalkChainUsers - Walk down the users of the specified chained node that is -/// part of the pattern we're matching, looking at all of the users we find. -/// This determines whether something is an interior node, whether we have a -/// non-pattern node in between two pattern nodes (which prevent folding because -/// it would induce a cycle) and whether we have a TokenFactor node sandwiched -/// between pattern nodes (in which case the TF becomes part of the pattern). -/// -/// The walk we do here is guaranteed to be small because we quickly get down to -/// already selected nodes "below" us. -static ChainResult -WalkChainUsers(const SDNode *ChainedNode, - SmallVectorImpl<SDNode *> &ChainedNodesInPattern, - DenseMap<const SDNode *, ChainResult> &TokenFactorResult, - SmallVectorImpl<SDNode *> &InteriorChainedNodes) { - ChainResult Result = CR_Simple; - - for (SDNode::use_iterator UI = ChainedNode->use_begin(), - E = ChainedNode->use_end(); UI != E; ++UI) { - // Make sure the use is of the chain, not some other value we produce. - if (UI.getUse().getValueType() != MVT::Other) continue; - - SDNode *User = *UI; - - if (User->getOpcode() == ISD::HANDLENODE) // Root of the graph. - continue; - - // If we see an already-selected machine node, then we've gone beyond the - // pattern that we're selecting down into the already selected chunk of the - // DAG. - unsigned UserOpcode = User->getOpcode(); - if (User->isMachineOpcode() || - UserOpcode == ISD::CopyToReg || - UserOpcode == ISD::CopyFromReg || - UserOpcode == ISD::INLINEASM || - UserOpcode == ISD::EH_LABEL || - UserOpcode == ISD::LIFETIME_START || - UserOpcode == ISD::LIFETIME_END) { - // If their node ID got reset to -1 then they've already been selected. - // Treat them like a MachineOpcode. - if (User->getNodeId() == -1) - continue; - } - - // If we have a TokenFactor, we handle it specially. - if (User->getOpcode() != ISD::TokenFactor) { - // If the node isn't a token factor and isn't part of our pattern, then it - // must be a random chained node in between two nodes we're selecting. - // This happens when we have something like: - // x = load ptr - // call - // y = x+4 - // store y -> ptr - // Because we structurally match the load/store as a read/modify/write, - // but the call is chained between them. We cannot fold in this case - // because it would induce a cycle in the graph. - if (!std::count(ChainedNodesInPattern.begin(), - ChainedNodesInPattern.end(), User)) - return CR_InducesCycle; - - // Otherwise we found a node that is part of our pattern. For example in: - // x = load ptr - // y = x+4 - // store y -> ptr - // This would happen when we're scanning down from the load and see the - // store as a user. Record that there is a use of ChainedNode that is - // part of the pattern and keep scanning uses. - Result = CR_LeadsToInteriorNode; - InteriorChainedNodes.push_back(User); - continue; - } - - // If we found a TokenFactor, there are two cases to consider: first if the - // TokenFactor is just hanging "below" the pattern we're matching (i.e. no - // uses of the TF are in our pattern) we just want to ignore it. Second, - // the TokenFactor can be sandwiched in between two chained nodes, like so: - // [Load chain] - // ^ - // | - // [Load] - // ^ ^ - // | \ DAG's like cheese - // / \ do you? - // / | - // [TokenFactor] [Op] - // ^ ^ - // | | - // \ / - // \ / - // [Store] - // - // In this case, the TokenFactor becomes part of our match and we rewrite it - // as a new TokenFactor. - // - // To distinguish these two cases, do a recursive walk down the uses. - auto MemoizeResult = TokenFactorResult.find(User); - bool Visited = MemoizeResult != TokenFactorResult.end(); - // Recursively walk chain users only if the result is not memoized. - if (!Visited) { - auto Res = WalkChainUsers(User, ChainedNodesInPattern, TokenFactorResult, - InteriorChainedNodes); - MemoizeResult = TokenFactorResult.insert(std::make_pair(User, Res)).first; - } - switch (MemoizeResult->second) { - case CR_Simple: - // If the uses of the TokenFactor are just already-selected nodes, ignore - // it, it is "below" our pattern. - continue; - case CR_InducesCycle: - // If the uses of the TokenFactor lead to nodes that are not part of our - // pattern that are not selected, folding would turn this into a cycle, - // bail out now. - return CR_InducesCycle; - case CR_LeadsToInteriorNode: - break; // Otherwise, keep processing. - } - - // Okay, we know we're in the interesting interior case. The TokenFactor - // is now going to be considered part of the pattern so that we rewrite its - // uses (it may have uses that are not part of the pattern) with the - // ultimate chain result of the generated code. We will also add its chain - // inputs as inputs to the ultimate TokenFactor we create. - Result = CR_LeadsToInteriorNode; - if (!Visited) { - ChainedNodesInPattern.push_back(User); - InteriorChainedNodes.push_back(User); - } - } - - return Result; + LLVM_DEBUG(dbgs() << "ISEL: Match complete!\n"); } /// HandleMergeInputChains - This implements the OPC_EmitMergeInputChains @@ -2521,47 +2489,56 @@ WalkChainUsers(const SDNode *ChainedNode, static SDValue HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched, SelectionDAG *CurDAG) { - // Used for memoization. Without it WalkChainUsers could take exponential - // time to run. - DenseMap<const SDNode *, ChainResult> TokenFactorResult; - // Walk all of the chained nodes we've matched, recursively scanning down the - // users of the chain result. This adds any TokenFactor nodes that are caught - // in between chained nodes to the chained and interior nodes list. - SmallVector<SDNode*, 3> InteriorChainedNodes; - for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) { - if (WalkChainUsers(ChainNodesMatched[i], ChainNodesMatched, - TokenFactorResult, - InteriorChainedNodes) == CR_InducesCycle) - return SDValue(); // Would induce a cycle. - } - // Okay, we have walked all the matched nodes and collected TokenFactor nodes - // that we are interested in. Form our input TokenFactor node. + SmallPtrSet<const SDNode *, 16> Visited; + SmallVector<const SDNode *, 8> Worklist; SmallVector<SDValue, 3> InputChains; - for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) { - // Add the input chain of this node to the InputChains list (which will be - // the operands of the generated TokenFactor) if it's not an interior node. - SDNode *N = ChainNodesMatched[i]; - if (N->getOpcode() != ISD::TokenFactor) { - if (std::count(InteriorChainedNodes.begin(),InteriorChainedNodes.end(),N)) - continue; + unsigned int Max = 8192; - // Otherwise, add the input chain. - SDValue InChain = ChainNodesMatched[i]->getOperand(0); - assert(InChain.getValueType() == MVT::Other && "Not a chain"); - InputChains.push_back(InChain); - continue; - } + // Quick exit on trivial merge. + if (ChainNodesMatched.size() == 1) + return ChainNodesMatched[0]->getOperand(0); - // If we have a token factor, we want to add all inputs of the token factor - // that are not part of the pattern we're matching. - for (const SDValue &Op : N->op_values()) { - if (!std::count(ChainNodesMatched.begin(), ChainNodesMatched.end(), - Op.getNode())) - InputChains.push_back(Op); - } + // Add chains that aren't already added (internal). Peek through + // token factors. + std::function<void(const SDValue)> AddChains = [&](const SDValue V) { + if (V.getValueType() != MVT::Other) + return; + if (V->getOpcode() == ISD::EntryToken) + return; + if (!Visited.insert(V.getNode()).second) + return; + if (V->getOpcode() == ISD::TokenFactor) { + for (const SDValue &Op : V->op_values()) + AddChains(Op); + } else + InputChains.push_back(V); + }; + + for (auto *N : ChainNodesMatched) { + Worklist.push_back(N); + Visited.insert(N); } + while (!Worklist.empty()) + AddChains(Worklist.pop_back_val()->getOperand(0)); + + // Skip the search if there are no chain dependencies. + if (InputChains.size() == 0) + return CurDAG->getEntryNode(); + + // If one of these chains is a successor of input, we must have a + // node that is both the predecessor and successor of the + // to-be-merged nodes. Fail. + Visited.clear(); + for (SDValue V : InputChains) + Worklist.push_back(V.getNode()); + + for (auto *N : ChainNodesMatched) + if (SDNode::hasPredecessorHelper(N, Visited, Worklist, Max, true)) + return SDValue(); + + // Return merged chain. if (InputChains.size() == 1) return InputChains[0]; return CurDAG->getNode(ISD::TokenFactor, SDLoc(ChainNodesMatched[0]), @@ -2606,8 +2583,8 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList, // Move the glue if needed. if ((EmitNodeInfo & OPFL_GlueOutput) && OldGlueResultNo != -1 && (unsigned)OldGlueResultNo != ResNumResults-1) - CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldGlueResultNo), - SDValue(Res, ResNumResults-1)); + ReplaceUses(SDValue(Node, OldGlueResultNo), + SDValue(Res, ResNumResults - 1)); if ((EmitNodeInfo & OPFL_GlueOutput) != 0) --ResNumResults; @@ -2615,14 +2592,15 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList, // Move the chain reference if needed. if ((EmitNodeInfo & OPFL_Chain) && OldChainResultNo != -1 && (unsigned)OldChainResultNo != ResNumResults-1) - CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldChainResultNo), - SDValue(Res, ResNumResults-1)); + ReplaceUses(SDValue(Node, OldChainResultNo), + SDValue(Res, ResNumResults - 1)); // Otherwise, no replacement happened because the node already exists. Replace // Uses of the old node with the new one. if (Res != Node) { - CurDAG->ReplaceAllUsesWith(Node, Res); - CurDAG->RemoveDeadNode(Node); + ReplaceNode(Node, Res); + } else { + EnforceNodeIdInvariant(Res); } return Res; @@ -2861,7 +2839,7 @@ struct MatchScope { bool HasChainNodesMatched; }; -/// \\brief A DAG update listener to keep the matching state +/// \A DAG update listener to keep the matching state /// (i.e. RecordedNodes and MatchScope) uptodate if the target is allowed to /// change the DAG while matching. X86 addressing mode matcher is an example /// for this. @@ -2939,8 +2917,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, return; case ISD::AssertSext: case ISD::AssertZext: - CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, 0), - NodeToMatch->getOperand(0)); + ReplaceUses(SDValue(NodeToMatch, 0), NodeToMatch->getOperand(0)); CurDAG->RemoveDeadNode(NodeToMatch); return; case ISD::INLINEASM: @@ -2988,9 +2965,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, // update the chain results when the pattern is complete. SmallVector<SDNode*, 3> ChainNodesMatched; - DEBUG(dbgs() << "ISEL: Starting pattern match on root node: "; - NodeToMatch->dump(CurDAG); - dbgs() << '\n'); + LLVM_DEBUG(dbgs() << "ISEL: Starting pattern match\n"); // Determine where to start the interpreter. Normally we start at opcode #0, // but if the state machine starts with an OPC_SwitchOpcode, then we @@ -3002,7 +2977,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, // Already computed the OpcodeOffset table, just index into it. if (N.getOpcode() < OpcodeOffset.size()) MatcherIndex = OpcodeOffset[N.getOpcode()]; - DEBUG(dbgs() << " Initial Opcode index to " << MatcherIndex << "\n"); + LLVM_DEBUG(dbgs() << " Initial Opcode index to " << MatcherIndex << "\n"); } else if (MatcherTable[0] == OPC_SwitchOpcode) { // Otherwise, the table isn't computed, but the state machine does start @@ -3069,9 +3044,10 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, if (!Result) break; - DEBUG(dbgs() << " Skipped scope entry (due to false predicate) at " - << "index " << MatcherIndexOfPredicate - << ", continuing at " << FailIndex << "\n"); + LLVM_DEBUG( + dbgs() << " Skipped scope entry (due to false predicate) at " + << "index " << MatcherIndexOfPredicate << ", continuing at " + << FailIndex << "\n"); ++NumDAGIselRetries; // Otherwise, we know that this case of the Scope is guaranteed to fail, @@ -3120,11 +3096,8 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, if (auto *MN = dyn_cast<MemSDNode>(N)) MatchedMemRefs.push_back(MN->getMemOperand()); else { - DEBUG( - dbgs() << "Expected MemSDNode "; - N->dump(CurDAG); - dbgs() << '\n' - ); + LLVM_DEBUG(dbgs() << "Expected MemSDNode "; N->dump(CurDAG); + dbgs() << '\n'); } continue; @@ -3245,8 +3218,8 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, if (CaseSize == 0) break; // Otherwise, execute the case we found. - DEBUG(dbgs() << " OpcodeSwitch from " << SwitchStart - << " to " << MatcherIndex << "\n"); + LLVM_DEBUG(dbgs() << " OpcodeSwitch from " << SwitchStart << " to " + << MatcherIndex << "\n"); continue; } @@ -3277,8 +3250,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, if (CaseSize == 0) break; // Otherwise, execute the case we found. - DEBUG(dbgs() << " TypeSwitch[" << EVT(CurNodeVT).getEVTString() - << "] from " << SwitchStart << " to " << MatcherIndex<<'\n'); + LLVM_DEBUG(dbgs() << " TypeSwitch[" << EVT(CurNodeVT).getEVTString() + << "] from " << SwitchStart << " to " << MatcherIndex + << '\n'); continue; } case OPC_CheckChild0Type: case OPC_CheckChild1Type: @@ -3658,16 +3632,11 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, Res->setMemRefs(MemRefs, MemRefs + NumMemRefs); } - DEBUG( - if (!MatchedMemRefs.empty() && Res->memoperands_empty()) - dbgs() << " Dropping mem operands\n"; - dbgs() << " " - << (IsMorphNodeTo ? "Morphed" : "Created") - << " node: "; - Res->dump(CurDAG); - - dbgs() << '\n'; - ); + LLVM_DEBUG(if (!MatchedMemRefs.empty() && Res->memoperands_empty()) dbgs() + << " Dropping mem operands\n"; + dbgs() << " " << (IsMorphNodeTo ? "Morphed" : "Created") + << " node: "; + Res->dump(CurDAG);); // If this was a MorphNodeTo then we're completely done! if (IsMorphNodeTo) { @@ -3702,7 +3671,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, NodeToMatch->getValueType(i).getSizeInBits() == Res.getValueSizeInBits()) && "invalid replacement"); - CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, i), Res); + ReplaceUses(SDValue(NodeToMatch, i), Res); } // Update chain uses. @@ -3715,8 +3684,8 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, if (NodeToMatch->getValueType(NodeToMatch->getNumValues() - 1) == MVT::Glue && InputGlue.getNode()) - CurDAG->ReplaceAllUsesOfValueWith( - SDValue(NodeToMatch, NodeToMatch->getNumValues() - 1), InputGlue); + ReplaceUses(SDValue(NodeToMatch, NodeToMatch->getNumValues() - 1), + InputGlue); assert(NodeToMatch->use_empty() && "Didn't replace all uses of the node?"); @@ -3729,7 +3698,8 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, // If the code reached this point, then the match failed. See if there is // another child to try in the current 'Scope', otherwise pop it until we // find a case to check. - DEBUG(dbgs() << " Match failed at index " << CurrentOpcodeIndex << "\n"); + LLVM_DEBUG(dbgs() << " Match failed at index " << CurrentOpcodeIndex + << "\n"); ++NumDAGIselRetries; while (true) { if (MatchScopes.empty()) { @@ -3749,7 +3719,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, MatchedMemRefs.resize(LastScope.NumMatchedMemRefs); MatcherIndex = LastScope.FailIndex; - DEBUG(dbgs() << " Continuing at " << MatcherIndex << "\n"); + LLVM_DEBUG(dbgs() << " Continuing at " << MatcherIndex << "\n"); InputChain = LastScope.InputChain; InputGlue = LastScope.InputGlue; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index be4ab094bf49..3b19bff4743d 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -229,7 +229,7 @@ bool SelectionDAG::setSubgraphColorHelper(SDNode *N, const char *Color, DenseSet if (level >= 20) { if (!printed) { printed = true; - DEBUG(dbgs() << "setSubgraphColor hit max level\n"); + LLVM_DEBUG(dbgs() << "setSubgraphColor hit max level\n"); } return true; } diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 3f64b49e3555..5cf06e62b80c 100644 --- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -28,7 +28,6 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineValueType.h" #include "llvm/CodeGen/RuntimeLibcalls.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" @@ -43,6 +42,7 @@ #include "llvm/IR/Statepoint.h" #include "llvm/IR/Type.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/MachineValueType.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include <cassert> diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index d76e52d78870..fa867fcec366 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -20,7 +20,6 @@ #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/TargetLoweringObjectFile.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" @@ -32,6 +31,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include <cctype> using namespace llvm; @@ -96,7 +96,7 @@ bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI, return true; } -/// \brief Set CallLoweringInfo attribute flags based on a call instruction +/// Set CallLoweringInfo attribute flags based on a call instruction /// and called function attributes. void TargetLoweringBase::ArgListEntry::setAttributes(ImmutableCallSite *CS, unsigned ArgIdx) { @@ -524,6 +524,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } // Other users may use these bits. + EVT VT = Op.getValueType(); if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) { if (Depth != 0) { // If not at the root, Just compute the Known bits to @@ -537,7 +538,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } else if (DemandedMask == 0) { // Not demanding any bits from Op. if (!Op.isUndef()) - return TLO.CombineTo(Op, TLO.DAG.getUNDEF(Op.getValueType())); + return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); return false; } else if (Depth == 6) { // Limit search depth. return false; @@ -580,7 +581,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, KnownBits LHSKnown; // Do not increment Depth here; that can cause an infinite loop. TLO.DAG.computeKnownBits(Op0, LHSKnown, Depth); - // If the LHS already has zeros where RHSC does, this and is dead. + // If the LHS already has zeros where RHSC does, this 'and' is dead. if ((LHSKnown.Zero & NewMask) == (~RHSC->getAPIntValue() & NewMask)) return TLO.CombineTo(Op, Op0); @@ -596,8 +597,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1 if (isBitwiseNot(Op0) && Op0.hasOneUse() && LHSKnown.One == ~RHSC->getAPIntValue()) { - SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, Op.getValueType(), - Op0.getOperand(0), Op.getOperand(1)); + SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), + Op.getOperand(1)); return TLO.CombineTo(Op, Xor); } } @@ -618,7 +619,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, return TLO.CombineTo(Op, Op.getOperand(1)); // If all of the demanded bits in the inputs are known zeros, return zero. if (NewMask.isSubsetOf(Known.Zero | Known2.Zero)) - return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, Op.getValueType())); + return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT)); // If the RHS is a constant, see if we can simplify it. if (ShrinkDemandedConstant(Op, ~Known2.Zero & NewMask, TLO)) return true; @@ -680,7 +681,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // (but not both) turn this into an *inclusive* or. // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 if ((NewMask & ~Known.Zero & ~Known2.Zero) == 0) - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, Op.getValueType(), + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op.getOperand(0), Op.getOperand(1))); @@ -696,7 +697,6 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // NB: it is okay if more bits are known than are requested if (NewMask.isSubsetOf(Known.Zero|Known.One)) { // all known on one side if (Known.One == Known2.One) { // set bits are the same on both sides - EVT VT = Op.getValueType(); SDValue ANDC = TLO.DAG.getConstant(~Known.One & NewMask, dl, VT); return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), ANDC)); @@ -710,7 +710,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (C && !C->isAllOnesValue()) { if (NewMask.isSubsetOf(C->getAPIntValue())) { // We're flipping all demanded bits. Flip the undemanded bits too. - SDValue New = TLO.DAG.getNOT(dl, Op.getOperand(0), Op.getValueType()); + SDValue New = TLO.DAG.getNOT(dl, Op.getOperand(0), VT); return TLO.CombineTo(Op, New); } // If we can't turn this into a 'not', try to shrink the constant. @@ -761,7 +761,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // width as the setcc result, and (3) the result of a setcc conforms to 0 or // -1, we may be able to bypass the setcc. if (NewMask.isSignMask() && Op0.getScalarValueSizeInBits() == BitWidth && - getBooleanContents(Op.getValueType()) == + getBooleanContents(VT) == BooleanContent::ZeroOrNegativeOneBooleanContent) { // If we're testing X < 0, then this compare isn't needed - just use X! // FIXME: We're limiting to integer types here, but this should also work @@ -807,7 +807,6 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, SDValue NewSA = TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType()); - EVT VT = Op.getValueType(); return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, InOp.getOperand(0), NewSA)); @@ -835,8 +834,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, TLO.DAG.getConstant(ShAmt, dl, ShTy)); return TLO.CombineTo(Op, - TLO.DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), - NarrowShl)); + TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl)); } // Repeat the SHL optimization above in cases where an extension // intervenes: (shl (anyext (shr x, c1)), c2) to @@ -854,7 +852,6 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, SDValue NewSA = TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, Op.getOperand(1).getValueType()); - EVT VT = Op.getValueType(); SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, InnerOp.getOperand(0)); return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, @@ -904,7 +901,6 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, SDValue NewSA = TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType()); - EVT VT = Op.getValueType(); return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, InOp.getOperand(0), NewSA)); @@ -930,12 +926,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // the shift amount is >= the size of the datatype, which is undefined. if (NewMask.isOneValue()) return TLO.CombineTo(Op, - TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(), - Op.getOperand(0), Op.getOperand(1))); + TLO.DAG.getNode(ISD::SRL, dl, VT, Op.getOperand(0), + Op.getOperand(1))); if (ConstantSDNode *SA = isConstOrConstSplat(Op.getOperand(1))) { - EVT VT = Op.getValueType(); - // If the shift count is an invalid immediate, don't do anything. if (SA->getAPIntValue().uge(BitWidth)) break; @@ -1000,14 +994,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (!AlreadySignExtended) { // Compute the correct shift amount type, which must be getShiftAmountTy // for scalar types after legalization. - EVT ShiftAmtTy = Op.getValueType(); + EVT ShiftAmtTy = VT; if (TLO.LegalTypes() && !ShiftAmtTy.isVector()) ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL); SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ExVTBits, dl, ShiftAmtTy); - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, - Op.getValueType(), InOp, + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, InOp, ShiftAmt)); } } @@ -1072,8 +1065,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If none of the top bits are demanded, convert this into an any_extend. if (NewMask.getActiveBits() <= OperandBitWidth) - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, - Op.getValueType(), + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op.getOperand(0))); APInt InMask = NewMask.trunc(OperandBitWidth); @@ -1089,8 +1081,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If none of the top bits are demanded, convert this into an any_extend. if (NewMask.getActiveBits() <= InBits) - return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl, - Op.getValueType(), + return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op.getOperand(0))); // Since some of the sign extended bits are demanded, we know that the sign @@ -1107,8 +1098,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If the sign bit is known zero, convert this to a zero extend. if (Known.isNonNegative()) - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, - Op.getValueType(), + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0))); break; } @@ -1139,8 +1129,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, case ISD::SRL: // Shrink SRL by a constant if none of the high bits shifted in are // demanded. - if (TLO.LegalTypes() && - !isTypeDesirableForOp(ISD::SRL, Op.getValueType())) + if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT)) // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is // undesirable. break; @@ -1150,8 +1139,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, SDValue Shift = In.getOperand(1); if (TLO.LegalTypes()) { uint64_t ShVal = ShAmt->getZExtValue(); - Shift = TLO.DAG.getConstant(ShVal, dl, - getShiftAmountTy(Op.getValueType(), DL)); + Shift = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL)); } if (ShAmt->getZExtValue() < BitWidth) { @@ -1163,12 +1151,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (!(HighBits & NewMask)) { // None of the shifted in bits are needed. Add a truncate of the // shift input, then shift it. - SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl, - Op.getValueType(), + SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, In.getOperand(0)); - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, - Op.getValueType(), - NewTrunc, + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, Shift)); } } @@ -1182,9 +1167,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, case ISD::AssertZext: { // AssertZext demands all of the high bits, plus any of the low bits // demanded by its users. - EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); - APInt InMask = APInt::getLowBitsSet(BitWidth, - VT.getSizeInBits()); + EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits()); if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | NewMask, Known, TLO, Depth+1)) return true; @@ -1196,40 +1180,45 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, case ISD::BITCAST: // If this is an FP->Int bitcast and if the sign bit is the only // thing demanded, turn this into a FGETSIGN. - if (!TLO.LegalOperations() && - !Op.getValueType().isVector() && + if (!TLO.LegalOperations() && !VT.isVector() && !Op.getOperand(0).getValueType().isVector() && NewMask == APInt::getSignMask(Op.getValueSizeInBits()) && Op.getOperand(0).getValueType().isFloatingPoint()) { - bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType()); + bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT); bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32); - if ((OpVTLegal || i32Legal) && Op.getValueType().isSimple() && + if ((OpVTLegal || i32Legal) && VT.isSimple() && + Op.getOperand(0).getValueType() != MVT::f16 && Op.getOperand(0).getValueType() != MVT::f128) { // Cannot eliminate/lower SHL for f128 yet. - EVT Ty = OpVTLegal ? Op.getValueType() : MVT::i32; + EVT Ty = OpVTLegal ? VT : MVT::i32; // Make a FGETSIGN + SHL to move the sign bit into the appropriate // place. We expect the SHL to be eliminated by other optimizations. SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Op.getOperand(0)); unsigned OpVTSizeInBits = Op.getValueSizeInBits(); if (!OpVTLegal && OpVTSizeInBits > 32) - Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), Sign); + Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign); unsigned ShVal = Op.getValueSizeInBits() - 1; - SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, Op.getValueType()); - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, - Op.getValueType(), - Sign, ShAmt)); + SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt)); } } + // If this is a bitcast, let computeKnownBits handle it. Only do this on a + // recursive call where Known may be useful to the caller. + if (Depth > 0) { + TLO.DAG.computeKnownBits(Op, Known, Depth); + return false; + } break; case ISD::ADD: case ISD::MUL: case ISD::SUB: { // Add, Sub, and Mul don't demand any bits in positions beyond that // of the highest bit demanded of them. - APInt LoMask = APInt::getLowBitsSet(BitWidth, - BitWidth - NewMask.countLeadingZeros()); - if (SimplifyDemandedBits(Op.getOperand(0), LoMask, Known2, TLO, Depth+1) || - SimplifyDemandedBits(Op.getOperand(1), LoMask, Known2, TLO, Depth+1) || + SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1); + unsigned NewMaskLZ = NewMask.countLeadingZeros(); + APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - NewMaskLZ); + if (SimplifyDemandedBits(Op0, LoMask, Known2, TLO, Depth + 1) || + SimplifyDemandedBits(Op1, LoMask, Known2, TLO, Depth + 1) || // See if the operation should be performed at a smaller bit width. ShrinkDemandedOp(Op, BitWidth, NewMask, TLO)) { SDNodeFlags Flags = Op.getNode()->getFlags(); @@ -1238,13 +1227,33 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // won't wrap after simplification. Flags.setNoSignedWrap(false); Flags.setNoUnsignedWrap(false); - SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), - Op.getOperand(0), Op.getOperand(1), + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags); return TLO.CombineTo(Op, NewOp); } return true; } + + // If we have a constant operand, we may be able to turn it into -1 if we + // do not demand the high bits. This can make the constant smaller to + // encode, allow more general folding, or match specialized instruction + // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that + // is probably not useful (and could be detrimental). + ConstantSDNode *C = isConstOrConstSplat(Op1); + APInt HighMask = APInt::getHighBitsSet(NewMask.getBitWidth(), NewMaskLZ); + if (C && !C->isAllOnesValue() && !C->isOne() && + (C->getAPIntValue() | HighMask).isAllOnesValue()) { + SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT); + // We can't guarantee that the new math op doesn't wrap, so explicitly + // clear those flags to prevent folding with a potential existing node + // that has those flags set. + SDNodeFlags Flags; + Flags.setNoSignedWrap(false); + Flags.setNoUnsignedWrap(false); + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags); + return TLO.CombineTo(Op, NewOp); + } + LLVM_FALLTHROUGH; } default: @@ -1265,10 +1274,384 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (C->isOpaque()) return false; } - return TLO.CombineTo(Op, - TLO.DAG.getConstant(Known.One, dl, Op.getValueType())); + return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT)); + } + + return false; +} + +bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op, + const APInt &DemandedElts, + APInt &KnownUndef, + APInt &KnownZero, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), + !DCI.isBeforeLegalizeOps()); + + bool Simplified = + SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO); + if (Simplified) + DCI.CommitTargetLoweringOpt(TLO); + return Simplified; +} + +bool TargetLowering::SimplifyDemandedVectorElts( + SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, + APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth, + bool AssumeSingleUse) const { + EVT VT = Op.getValueType(); + APInt DemandedElts = DemandedEltMask; + unsigned NumElts = DemandedElts.getBitWidth(); + assert(VT.isVector() && "Expected vector op"); + assert(VT.getVectorNumElements() == NumElts && + "Mask size mismatches value type element count!"); + + KnownUndef = KnownZero = APInt::getNullValue(NumElts); + + // Undef operand. + if (Op.isUndef()) { + KnownUndef.setAllBits(); + return false; + } + + // If Op has other users, assume that all elements are needed. + if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) + DemandedElts.setAllBits(); + + // Not demanding any elements from Op. + if (DemandedElts == 0) { + KnownUndef.setAllBits(); + return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); + } + + // Limit search depth. + if (Depth >= 6) + return false; + + SDLoc DL(Op); + unsigned EltSizeInBits = VT.getScalarSizeInBits(); + + switch (Op.getOpcode()) { + case ISD::SCALAR_TO_VECTOR: { + if (!DemandedElts[0]) { + KnownUndef.setAllBits(); + return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); + } + KnownUndef.setHighBits(NumElts - 1); + break; + } + case ISD::BITCAST: { + SDValue Src = Op.getOperand(0); + EVT SrcVT = Src.getValueType(); + + // We only handle vectors here. + // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits? + if (!SrcVT.isVector()) + break; + + // Fast handling of 'identity' bitcasts. + unsigned NumSrcElts = SrcVT.getVectorNumElements(); + if (NumSrcElts == NumElts) + return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, + KnownZero, TLO, Depth + 1); + + APInt SrcZero, SrcUndef; + APInt SrcDemandedElts = APInt::getNullValue(NumSrcElts); + + // Bitcast from 'large element' src vector to 'small element' vector, we + // must demand a source element if any DemandedElt maps to it. + if ((NumElts % NumSrcElts) == 0) { + unsigned Scale = NumElts / NumSrcElts; + for (unsigned i = 0; i != NumElts; ++i) + if (DemandedElts[i]) + SrcDemandedElts.setBit(i / Scale); + + if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero, + TLO, Depth + 1)) + return true; + + // If the src element is zero/undef then all the output elements will be - + // only demanded elements are guaranteed to be correct. + for (unsigned i = 0; i != NumSrcElts; ++i) { + if (SrcDemandedElts[i]) { + if (SrcZero[i]) + KnownZero.setBits(i * Scale, (i + 1) * Scale); + if (SrcUndef[i]) + KnownUndef.setBits(i * Scale, (i + 1) * Scale); + } + } + } + + // Bitcast from 'small element' src vector to 'large element' vector, we + // demand all smaller source elements covered by the larger demanded element + // of this vector. + if ((NumSrcElts % NumElts) == 0) { + unsigned Scale = NumSrcElts / NumElts; + for (unsigned i = 0; i != NumElts; ++i) + if (DemandedElts[i]) + SrcDemandedElts.setBits(i * Scale, (i + 1) * Scale); + + if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero, + TLO, Depth + 1)) + return true; + + // If all the src elements covering an output element are zero/undef, then + // the output element will be as well, assuming it was demanded. + for (unsigned i = 0; i != NumElts; ++i) { + if (DemandedElts[i]) { + if (SrcZero.extractBits(Scale, i * Scale).isAllOnesValue()) + KnownZero.setBit(i); + if (SrcUndef.extractBits(Scale, i * Scale).isAllOnesValue()) + KnownUndef.setBit(i); + } + } + } + break; + } + case ISD::BUILD_VECTOR: { + // Check all elements and simplify any unused elements with UNDEF. + if (!DemandedElts.isAllOnesValue()) { + // Don't simplify BROADCASTS. + if (llvm::any_of(Op->op_values(), + [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) { + SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end()); + bool Updated = false; + for (unsigned i = 0; i != NumElts; ++i) { + if (!DemandedElts[i] && !Ops[i].isUndef()) { + Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType()); + KnownUndef.setBit(i); + Updated = true; + } + } + if (Updated) + return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops)); + } + } + for (unsigned i = 0; i != NumElts; ++i) { + SDValue SrcOp = Op.getOperand(i); + if (SrcOp.isUndef()) { + KnownUndef.setBit(i); + } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() && + (isNullConstant(SrcOp) || isNullFPConstant(SrcOp))) { + KnownZero.setBit(i); + } + } + break; + } + case ISD::CONCAT_VECTORS: { + EVT SubVT = Op.getOperand(0).getValueType(); + unsigned NumSubVecs = Op.getNumOperands(); + unsigned NumSubElts = SubVT.getVectorNumElements(); + for (unsigned i = 0; i != NumSubVecs; ++i) { + SDValue SubOp = Op.getOperand(i); + APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts); + APInt SubUndef, SubZero; + if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO, + Depth + 1)) + return true; + KnownUndef.insertBits(SubUndef, i * NumSubElts); + KnownZero.insertBits(SubZero, i * NumSubElts); + } + break; + } + case ISD::INSERT_SUBVECTOR: { + if (!isa<ConstantSDNode>(Op.getOperand(2))) + break; + SDValue Base = Op.getOperand(0); + SDValue Sub = Op.getOperand(1); + EVT SubVT = Sub.getValueType(); + unsigned NumSubElts = SubVT.getVectorNumElements(); + const APInt& Idx = cast<ConstantSDNode>(Op.getOperand(2))->getAPIntValue(); + if (Idx.uge(NumElts - NumSubElts)) + break; + unsigned SubIdx = Idx.getZExtValue(); + APInt SubElts = DemandedElts.extractBits(NumSubElts, SubIdx); + APInt SubUndef, SubZero; + if (SimplifyDemandedVectorElts(Sub, SubElts, SubUndef, SubZero, TLO, + Depth + 1)) + return true; + APInt BaseElts = DemandedElts; + BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx); + if (SimplifyDemandedVectorElts(Base, BaseElts, KnownUndef, KnownZero, TLO, + Depth + 1)) + return true; + KnownUndef.insertBits(SubUndef, SubIdx); + KnownZero.insertBits(SubZero, SubIdx); + break; + } + case ISD::EXTRACT_SUBVECTOR: { + if (!isa<ConstantSDNode>(Op.getOperand(1))) + break; + SDValue Src = Op.getOperand(0); + unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); + const APInt& Idx = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue(); + if (Idx.uge(NumSrcElts - NumElts)) + break; + // Offset the demanded elts by the subvector index. + uint64_t SubIdx = Idx.getZExtValue(); + APInt SrcElts = DemandedElts.zext(NumSrcElts).shl(SubIdx); + APInt SrcUndef, SrcZero; + if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO, + Depth + 1)) + return true; + KnownUndef = SrcUndef.extractBits(NumElts, SubIdx); + KnownZero = SrcZero.extractBits(NumElts, SubIdx); + break; + } + case ISD::INSERT_VECTOR_ELT: { + SDValue Vec = Op.getOperand(0); + SDValue Scl = Op.getOperand(1); + auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2)); + + // For a legal, constant insertion index, if we don't need this insertion + // then strip it, else remove it from the demanded elts. + if (CIdx && CIdx->getAPIntValue().ult(NumElts)) { + unsigned Idx = CIdx->getZExtValue(); + if (!DemandedElts[Idx]) + return TLO.CombineTo(Op, Vec); + DemandedElts.clearBit(Idx); + + if (SimplifyDemandedVectorElts(Vec, DemandedElts, KnownUndef, + KnownZero, TLO, Depth + 1)) + return true; + + KnownUndef.clearBit(Idx); + if (Scl.isUndef()) + KnownUndef.setBit(Idx); + + KnownZero.clearBit(Idx); + if (isNullConstant(Scl) || isNullFPConstant(Scl)) + KnownZero.setBit(Idx); + break; + } + + APInt VecUndef, VecZero; + if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO, + Depth + 1)) + return true; + // Without knowing the insertion index we can't set KnownUndef/KnownZero. + break; + } + case ISD::VSELECT: { + APInt DemandedLHS(DemandedElts); + APInt DemandedRHS(DemandedElts); + + // TODO - add support for constant vselect masks. + + // See if we can simplify either vselect operand. + APInt UndefLHS, ZeroLHS; + APInt UndefRHS, ZeroRHS; + if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedLHS, UndefLHS, + ZeroLHS, TLO, Depth + 1)) + return true; + if (SimplifyDemandedVectorElts(Op.getOperand(2), DemandedRHS, UndefRHS, + ZeroRHS, TLO, Depth + 1)) + return true; + + KnownUndef = UndefLHS & UndefRHS; + KnownZero = ZeroLHS & ZeroRHS; + break; + } + case ISD::VECTOR_SHUFFLE: { + ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask(); + + // Collect demanded elements from shuffle operands.. + APInt DemandedLHS(NumElts, 0); + APInt DemandedRHS(NumElts, 0); + for (unsigned i = 0; i != NumElts; ++i) { + int M = ShuffleMask[i]; + if (M < 0 || !DemandedElts[i]) + continue; + assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range"); + if (M < (int)NumElts) + DemandedLHS.setBit(M); + else + DemandedRHS.setBit(M - NumElts); + } + + // See if we can simplify either shuffle operand. + APInt UndefLHS, ZeroLHS; + APInt UndefRHS, ZeroRHS; + if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, UndefLHS, + ZeroLHS, TLO, Depth + 1)) + return true; + if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, UndefRHS, + ZeroRHS, TLO, Depth + 1)) + return true; + + // Simplify mask using undef elements from LHS/RHS. + bool Updated = false; + bool IdentityLHS = true, IdentityRHS = true; + SmallVector<int, 32> NewMask(ShuffleMask.begin(), ShuffleMask.end()); + for (unsigned i = 0; i != NumElts; ++i) { + int &M = NewMask[i]; + if (M < 0) + continue; + if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) || + (M >= (int)NumElts && UndefRHS[M - NumElts])) { + Updated = true; + M = -1; + } + IdentityLHS &= (M < 0) || (M == (int)i); + IdentityRHS &= (M < 0) || ((M - NumElts) == i); + } + + // Update legal shuffle masks based on demanded elements if it won't reduce + // to Identity which can cause premature removal of the shuffle mask. + if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps && + isShuffleMaskLegal(NewMask, VT)) + return TLO.CombineTo(Op, + TLO.DAG.getVectorShuffle(VT, DL, Op.getOperand(0), + Op.getOperand(1), NewMask)); + + // Propagate undef/zero elements from LHS/RHS. + for (unsigned i = 0; i != NumElts; ++i) { + int M = ShuffleMask[i]; + if (M < 0) { + KnownUndef.setBit(i); + } else if (M < (int)NumElts) { + if (UndefLHS[M]) + KnownUndef.setBit(i); + if (ZeroLHS[M]) + KnownZero.setBit(i); + } else { + if (UndefRHS[M - NumElts]) + KnownUndef.setBit(i); + if (ZeroRHS[M - NumElts]) + KnownZero.setBit(i); + } + } + break; + } + case ISD::ADD: + case ISD::SUB: { + APInt SrcUndef, SrcZero; + if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef, + SrcZero, TLO, Depth + 1)) + return true; + if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef, + KnownZero, TLO, Depth + 1)) + return true; + KnownZero &= SrcZero; + KnownUndef &= SrcUndef; + break; + } + case ISD::TRUNCATE: + if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef, + KnownZero, TLO, Depth + 1)) + return true; + break; + default: { + if (Op.getOpcode() >= ISD::BUILTIN_OP_END) + if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef, + KnownZero, TLO, Depth)) + return true; + break; + } } + assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero"); return false; } @@ -1316,6 +1699,18 @@ unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, return 1; } +bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode( + SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero, + TargetLoweringOpt &TLO, unsigned Depth) const { + assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || + Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_VOID) && + "Should use SimplifyDemandedVectorElts if you don't know whether Op" + " is a target node!"); + return false; +} + // FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must // work with truncating build vectors and vectors with elements of less than // 8 bits. @@ -1353,16 +1748,6 @@ bool TargetLowering::isConstTrueVal(const SDNode *N) const { llvm_unreachable("Invalid boolean contents"); } -SDValue TargetLowering::getConstTrueVal(SelectionDAG &DAG, EVT VT, - const SDLoc &DL) const { - unsigned ElementWidth = VT.getScalarSizeInBits(); - APInt TrueInt = - getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent - ? APInt(ElementWidth, 1) - : APInt::getAllOnesValue(ElementWidth); - return DAG.getConstant(TrueInt, DL, VT); -} - bool TargetLowering::isConstFalseVal(const SDNode *N) const { if (!N) return false; @@ -1466,6 +1851,89 @@ SDValue TargetLowering::simplifySetCCWithAnd(EVT VT, SDValue N0, SDValue N1, return SDValue(); } +/// There are multiple IR patterns that could be checking whether certain +/// truncation of a signed number would be lossy or not. The pattern which is +/// best at IR level, may not lower optimally. Thus, we want to unfold it. +/// We are looking for the following pattern: (KeptBits is a constant) +/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits) +/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false. +/// KeptBits also can't be 1, that would have been folded to %x dstcond 0 +/// We will unfold it into the natural trunc+sext pattern: +/// ((%x << C) a>> C) dstcond %x +/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x) +SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck( + EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI, + const SDLoc &DL) const { + // We must be comparing with a constant. + ConstantSDNode *C1; + if (!(C1 = dyn_cast<ConstantSDNode>(N1))) + return SDValue(); + + // N0 should be: add %x, (1 << (KeptBits-1)) + if (N0->getOpcode() != ISD::ADD) + return SDValue(); + + // And we must be 'add'ing a constant. + ConstantSDNode *C01; + if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1)))) + return SDValue(); + + SDValue X = N0->getOperand(0); + EVT XVT = X.getValueType(); + + // Validate constants ... + + APInt I1 = C1->getAPIntValue(); + + ISD::CondCode NewCond; + if (Cond == ISD::CondCode::SETULT) { + NewCond = ISD::CondCode::SETEQ; + } else if (Cond == ISD::CondCode::SETULE) { + NewCond = ISD::CondCode::SETEQ; + // But need to 'canonicalize' the constant. + I1 += 1; + } else if (Cond == ISD::CondCode::SETUGT) { + NewCond = ISD::CondCode::SETNE; + // But need to 'canonicalize' the constant. + I1 += 1; + } else if (Cond == ISD::CondCode::SETUGE) { + NewCond = ISD::CondCode::SETNE; + } else + return SDValue(); + + const APInt &I01 = C01->getAPIntValue(); + // Both of them must be power-of-two, and the constant from setcc is bigger. + if (!(I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2())) + return SDValue(); + + // They are power-of-two, so which bit is set? + const unsigned KeptBits = I1.logBase2(); + const unsigned KeptBitsMinusOne = I01.logBase2(); + + // Magic! + if (KeptBits != (KeptBitsMinusOne + 1)) + return SDValue(); + assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable"); + + // We don't want to do this in every single case. + SelectionDAG &DAG = DCI.DAG; + if (!DAG.getTargetLoweringInfo().shouldTransformSignedTruncationCheck( + XVT, KeptBits)) + return SDValue(); + + const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits; + assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable"); + + // Unfold into: ((%x << C) a>> C) cond %x + // Where 'cond' will be either 'eq' or 'ne'. + SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT); + SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt); + SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt); + SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond); + + return T2; +} + /// Try to simplify a setcc built with the specified operands and cc. If it is /// unable to simplify it, return a null SDValue. SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, @@ -1473,25 +1941,21 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, DAGCombinerInfo &DCI, const SDLoc &dl) const { SelectionDAG &DAG = DCI.DAG; + EVT OpVT = N0.getValueType(); // These setcc operations always fold. switch (Cond) { default: break; case ISD::SETFALSE: - case ISD::SETFALSE2: return DAG.getConstant(0, dl, VT); + case ISD::SETFALSE2: return DAG.getBoolConstant(false, dl, VT, OpVT); case ISD::SETTRUE: - case ISD::SETTRUE2: { - TargetLowering::BooleanContent Cnt = - getBooleanContents(N0->getValueType(0)); - return DAG.getConstant( - Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, dl, - VT); - } + case ISD::SETTRUE2: return DAG.getBoolConstant(true, dl, VT, OpVT); } // Ensure that the constant occurs on the RHS and fold constant comparisons. + // TODO: Handle non-splat vector constants. All undef causes trouble. ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond); - if (isa<ConstantSDNode>(N0.getNode()) && + if (isConstOrConstSplat(N0) && (DCI.isBeforeLegalizeOps() || isCondCodeLegal(SwappedCC, N0.getSimpleValueType()))) return DAG.getSetCC(dl, VT, N1, N0, SwappedCC); @@ -1737,7 +2201,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, EVT newVT = N0.getOperand(0).getValueType(); if (DCI.isBeforeLegalizeOps() || (isOperationLegal(ISD::SETCC, newVT) && - getCondCodeAction(Cond, newVT.getSimpleVT()) == Legal)) { + isCondCodeLegal(Cond, newVT.getSimpleVT()))) { EVT NewSetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), newVT); SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT); @@ -1867,8 +2331,18 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } } + if (SDValue V = + optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl)) + return V; + } + + // These simplifications apply to splat vectors as well. + // TODO: Handle more splat vector cases. + if (auto *N1C = isConstOrConstSplat(N1)) { + const APInt &C1 = N1C->getAPIntValue(); + APInt MinVal, MaxVal; - unsigned OperandBitSize = N1C->getValueType(0).getSizeInBits(); + unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits(); if (ISD::isSignedIntSetCC(Cond)) { MinVal = APInt::getSignedMinValue(OperandBitSize); MaxVal = APInt::getSignedMaxValue(OperandBitSize); @@ -1881,84 +2355,105 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (Cond == ISD::SETGE || Cond == ISD::SETUGE) { // X >= MIN --> true if (C1 == MinVal) - return DAG.getConstant(1, dl, VT); - - // X >= C0 --> X > (C0 - 1) - APInt C = C1 - 1; - ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT; - if ((DCI.isBeforeLegalizeOps() || - isCondCodeLegal(NewCC, VT.getSimpleVT())) && - (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 && - isLegalICmpImmediate(C.getSExtValue())))) { - return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(C, dl, N1.getValueType()), - NewCC); + return DAG.getBoolConstant(true, dl, VT, OpVT); + + if (!VT.isVector()) { // TODO: Support this for vectors. + // X >= C0 --> X > (C0 - 1) + APInt C = C1 - 1; + ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT; + if ((DCI.isBeforeLegalizeOps() || + isCondCodeLegal(NewCC, VT.getSimpleVT())) && + (!N1C->isOpaque() || (C.getBitWidth() <= 64 && + isLegalICmpImmediate(C.getSExtValue())))) { + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(C, dl, N1.getValueType()), + NewCC); + } } } if (Cond == ISD::SETLE || Cond == ISD::SETULE) { // X <= MAX --> true if (C1 == MaxVal) - return DAG.getConstant(1, dl, VT); + return DAG.getBoolConstant(true, dl, VT, OpVT); // X <= C0 --> X < (C0 + 1) - APInt C = C1 + 1; - ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT; - if ((DCI.isBeforeLegalizeOps() || - isCondCodeLegal(NewCC, VT.getSimpleVT())) && - (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 && - isLegalICmpImmediate(C.getSExtValue())))) { - return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(C, dl, N1.getValueType()), - NewCC); - } - } - - if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal) - return DAG.getConstant(0, dl, VT); // X < MIN --> false - if ((Cond == ISD::SETGE || Cond == ISD::SETUGE) && C1 == MinVal) - return DAG.getConstant(1, dl, VT); // X >= MIN --> true - if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal) - return DAG.getConstant(0, dl, VT); // X > MAX --> false - if ((Cond == ISD::SETLE || Cond == ISD::SETULE) && C1 == MaxVal) - return DAG.getConstant(1, dl, VT); // X <= MAX --> true - - // Canonicalize setgt X, Min --> setne X, Min - if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MinVal) - return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE); - // Canonicalize setlt X, Max --> setne X, Max - if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MaxVal) - return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE); - - // If we have setult X, 1, turn it into seteq X, 0 - if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1) - return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(MinVal, dl, N0.getValueType()), - ISD::SETEQ); - // If we have setugt X, Max-1, turn it into seteq X, Max - if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1) - return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(MaxVal, dl, N0.getValueType()), - ISD::SETEQ); + if (!VT.isVector()) { // TODO: Support this for vectors. + APInt C = C1 + 1; + ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT; + if ((DCI.isBeforeLegalizeOps() || + isCondCodeLegal(NewCC, VT.getSimpleVT())) && + (!N1C->isOpaque() || (C.getBitWidth() <= 64 && + isLegalICmpImmediate(C.getSExtValue())))) { + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(C, dl, N1.getValueType()), + NewCC); + } + } + } - // If we have "setcc X, C0", check to see if we can shrink the immediate - // by changing cc. + if (Cond == ISD::SETLT || Cond == ISD::SETULT) { + if (C1 == MinVal) + return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false + + // TODO: Support this for vectors after legalize ops. + if (!VT.isVector() || DCI.isBeforeLegalizeOps()) { + // Canonicalize setlt X, Max --> setne X, Max + if (C1 == MaxVal) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE); + + // If we have setult X, 1, turn it into seteq X, 0 + if (C1 == MinVal+1) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(MinVal, dl, N0.getValueType()), + ISD::SETEQ); + } + } - // SETUGT X, SINTMAX -> SETLT X, 0 - if (Cond == ISD::SETUGT && - C1 == APInt::getSignedMaxValue(OperandBitSize)) - return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(0, dl, N1.getValueType()), - ISD::SETLT); + if (Cond == ISD::SETGT || Cond == ISD::SETUGT) { + if (C1 == MaxVal) + return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false + + // TODO: Support this for vectors after legalize ops. + if (!VT.isVector() || DCI.isBeforeLegalizeOps()) { + // Canonicalize setgt X, Min --> setne X, Min + if (C1 == MinVal) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE); + + // If we have setugt X, Max-1, turn it into seteq X, Max + if (C1 == MaxVal-1) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(MaxVal, dl, N0.getValueType()), + ISD::SETEQ); + } + } - // SETULT X, SINTMIN -> SETGT X, -1 - if (Cond == ISD::SETULT && - C1 == APInt::getSignedMinValue(OperandBitSize)) { - SDValue ConstMinusOne = - DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), dl, - N1.getValueType()); - return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT); + // If we have "setcc X, C0", check to see if we can shrink the immediate + // by changing cc. + // TODO: Support this for vectors after legalize ops. + if (!VT.isVector() || DCI.isBeforeLegalizeOps()) { + // SETUGT X, SINTMAX -> SETLT X, 0 + if (Cond == ISD::SETUGT && + C1 == APInt::getSignedMaxValue(OperandBitSize)) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(0, dl, N1.getValueType()), + ISD::SETLT); + + // SETULT X, SINTMIN -> SETGT X, -1 + if (Cond == ISD::SETULT && + C1 == APInt::getSignedMinValue(OperandBitSize)) { + SDValue ConstMinusOne = + DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), dl, + N1.getValueType()); + return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT); + } } + } + + // Back to non-vector simplifications. + // TODO: Can we do these for vector splats? + if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) { + const APInt &C1 = N1C->getAPIntValue(); // Fold bit comparisons when we can. if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && @@ -1967,9 +2462,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, N0.getOpcode() == ISD::AND) { auto &DL = DAG.getDataLayout(); if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { - EVT ShiftTy = DCI.isBeforeLegalize() - ? getPointerTy(DL) - : getShiftAmountTy(N0.getValueType(), DL); + EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL, + !DCI.isBeforeLegalize()); if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3 // Perform the xform if the AND RHS is a single bit. if (AndRHS->getAPIntValue().isPowerOf2()) { @@ -2001,9 +2495,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) { unsigned ShiftBits = AndRHSC.countTrailingZeros(); auto &DL = DAG.getDataLayout(); - EVT ShiftTy = DCI.isBeforeLegalize() - ? getPointerTy(DL) - : getShiftAmountTy(N0.getValueType(), DL); + EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL, + !DCI.isBeforeLegalize()); EVT CmpTy = N0.getValueType(); SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0), DAG.getConstant(ShiftBits, dl, @@ -2033,9 +2526,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (ShiftBits && NewC.getMinSignedBits() <= 64 && isLegalICmpImmediate(NewC.getSExtValue())) { auto &DL = DAG.getDataLayout(); - EVT ShiftTy = DCI.isBeforeLegalize() - ? getPointerTy(DL) - : getShiftAmountTy(N0.getValueType(), DL); + EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL, + !DCI.isBeforeLegalize()); EVT CmpTy = N0.getValueType(); SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0, DAG.getConstant(ShiftBits, dl, ShiftTy)); @@ -2058,9 +2550,9 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, switch (ISD::getUnorderedFlavor(Cond)) { default: llvm_unreachable("Unknown flavor!"); case 0: // Known false. - return DAG.getConstant(0, dl, VT); + return DAG.getBoolConstant(false, dl, VT, OpVT); case 1: // Known true. - return DAG.getConstant(1, dl, VT); + return DAG.getBoolConstant(true, dl, VT, OpVT); case 2: // Undefined. return DAG.getUNDEF(VT); } @@ -2124,31 +2616,24 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (N0 == N1) { // The sext(setcc()) => setcc() optimization relies on the appropriate // constant being emitted. - uint64_t EqVal = 0; - switch (getBooleanContents(N0.getValueType())) { - case UndefinedBooleanContent: - case ZeroOrOneBooleanContent: - EqVal = ISD::isTrueWhenEqual(Cond); - break; - case ZeroOrNegativeOneBooleanContent: - EqVal = ISD::isTrueWhenEqual(Cond) ? -1 : 0; - break; - } + + bool EqTrue = ISD::isTrueWhenEqual(Cond); // We can always fold X == X for integer setcc's. - if (N0.getValueType().isInteger()) { - return DAG.getConstant(EqVal, dl, VT); - } + if (N0.getValueType().isInteger()) + return DAG.getBoolConstant(EqTrue, dl, VT, OpVT); + unsigned UOF = ISD::getUnorderedFlavor(Cond); if (UOF == 2) // FP operators that are undefined on NaNs. - return DAG.getConstant(EqVal, dl, VT); - if (UOF == unsigned(ISD::isTrueWhenEqual(Cond))) - return DAG.getConstant(EqVal, dl, VT); + return DAG.getBoolConstant(EqTrue, dl, VT, OpVT); + if (UOF == unsigned(EqTrue)) + return DAG.getBoolConstant(EqTrue, dl, VT, OpVT); // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO // if it is not already. ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO; - if (NewCond != Cond && (DCI.isBeforeLegalizeOps() || - getCondCodeAction(NewCond, N0.getSimpleValueType()) == Legal)) + if (NewCond != Cond && + (DCI.isBeforeLegalizeOps() || + isCondCodeLegal(NewCond, N0.getSimpleValueType()))) return DAG.getSetCC(dl, VT, N0, N1, NewCond); } @@ -2237,7 +2722,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, SDValue SH = DAG.getNode( ISD::SHL, dl, N1.getValueType(), N1, DAG.getConstant(1, dl, - getShiftAmountTy(N1.getValueType(), DL))); + getShiftAmountTy(N1.getValueType(), DL, + !DCI.isBeforeLegalize()))); if (!DCI.isCalledByLegalizer()) DCI.AddToWorklist(SH.getNode()); return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond); @@ -2262,7 +2748,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // X == (Z-X) --> X<<1 == Z SDValue SH = DAG.getNode( ISD::SHL, dl, N1.getValueType(), N0, - DAG.getConstant(1, dl, getShiftAmountTy(N0.getValueType(), DL))); + DAG.getConstant(1, dl, getShiftAmountTy(N0.getValueType(), DL, + !DCI.isBeforeLegalize()))); if (!DCI.isCalledByLegalizer()) DCI.AddToWorklist(SH.getNode()); return DAG.getSetCC(dl, VT, SH, N1.getOperand(0), Cond); @@ -2276,50 +2763,52 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // Fold away ALL boolean setcc's. SDValue Temp; - if (N0.getValueType() == MVT::i1 && foldBooleans) { + if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) { + EVT OpVT = N0.getValueType(); switch (Cond) { default: llvm_unreachable("Unknown integer setcc!"); case ISD::SETEQ: // X == Y -> ~(X^Y) - Temp = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1); - N0 = DAG.getNOT(dl, Temp, MVT::i1); + Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1); + N0 = DAG.getNOT(dl, Temp, OpVT); if (!DCI.isCalledByLegalizer()) DCI.AddToWorklist(Temp.getNode()); break; case ISD::SETNE: // X != Y --> (X^Y) - N0 = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1); + N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1); break; case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y - Temp = DAG.getNOT(dl, N0, MVT::i1); - N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N1, Temp); + Temp = DAG.getNOT(dl, N0, OpVT); + N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp); if (!DCI.isCalledByLegalizer()) DCI.AddToWorklist(Temp.getNode()); break; case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X - Temp = DAG.getNOT(dl, N1, MVT::i1); - N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N0, Temp); + Temp = DAG.getNOT(dl, N1, OpVT); + N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp); if (!DCI.isCalledByLegalizer()) DCI.AddToWorklist(Temp.getNode()); break; case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y - Temp = DAG.getNOT(dl, N0, MVT::i1); - N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N1, Temp); + Temp = DAG.getNOT(dl, N0, OpVT); + N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp); if (!DCI.isCalledByLegalizer()) DCI.AddToWorklist(Temp.getNode()); break; case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X - Temp = DAG.getNOT(dl, N1, MVT::i1); - N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N0, Temp); + Temp = DAG.getNOT(dl, N1, OpVT); + N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp); break; } - if (VT != MVT::i1) { + if (VT.getScalarType() != MVT::i1) { if (!DCI.isCalledByLegalizer()) DCI.AddToWorklist(N0.getNode()); // FIXME: If running after legalize, we probably can't do this. - N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0); + ISD::NodeType ExtendCode = getExtendForContent(getBooleanContents(OpVT)); + N0 = DAG.getNode(ExtendCode, dl, VT, N0); } return N0; } @@ -2928,7 +3417,7 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, } } -/// \brief Given an exact SDIV by a constant, create a multiplication +/// Given an exact SDIV by a constant, create a multiplication /// with the multiplicative inverse of the constant. static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d, const SDLoc &dl, SelectionDAG &DAG, @@ -2970,7 +3459,7 @@ SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, return SDValue(); } -/// \brief Given an ISD::SDIV node expressing a divide by constant, +/// Given an ISD::SDIV node expressing a divide by constant, /// return a DAG expression to select that will generate the same value by /// multiplying by a magic number. /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". @@ -3034,7 +3523,7 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor, return DAG.getNode(ISD::ADD, dl, VT, Q, T); } -/// \brief Given an ISD::UDIV node expressing a divide by constant, +/// Given an ISD::UDIV node expressing a divide by constant, /// return a DAG expression to select that will generate the same value by /// multiplying by a magic number. /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". @@ -3413,9 +3902,6 @@ SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD, return DAG.getMergeValues({ Value, NewChain }, SL); } -// FIXME: This relies on each element having a byte size, otherwise the stride -// is 0 and just overwrites the same location. ExpandStore currently expects -// this broken behavior. SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const { SDLoc SL(ST); @@ -3432,11 +3918,43 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST, // The type of data as saved in memory. EVT MemSclVT = StVT.getScalarType(); - // Store Stride in bytes - unsigned Stride = MemSclVT.getSizeInBits() / 8; EVT IdxVT = getVectorIdxTy(DAG.getDataLayout()); unsigned NumElem = StVT.getVectorNumElements(); + // A vector must always be stored in memory as-is, i.e. without any padding + // between the elements, since various code depend on it, e.g. in the + // handling of a bitcast of a vector type to int, which may be done with a + // vector store followed by an integer load. A vector that does not have + // elements that are byte-sized must therefore be stored as an integer + // built out of the extracted vector elements. + if (!MemSclVT.isByteSized()) { + unsigned NumBits = StVT.getSizeInBits(); + EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits); + + SDValue CurrVal = DAG.getConstant(0, SL, IntVT); + + for (unsigned Idx = 0; Idx < NumElem; ++Idx) { + SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value, + DAG.getConstant(Idx, SL, IdxVT)); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt); + SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc); + unsigned ShiftIntoIdx = + (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx); + SDValue ShiftAmount = + DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT); + SDValue ShiftedElt = + DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount); + CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt); + } + + return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(), + ST->getAlignment(), ST->getMemOperand()->getFlags(), + ST->getAAInfo()); + } + + // Store Stride in bytes + unsigned Stride = MemSclVT.getSizeInBits() / 8; + assert (Stride && "Zero stride!"); // Extract each of the elements from the original vector and save them into // memory individually. SmallVector<SDValue, 8> Stores; @@ -3475,6 +3993,8 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { if (!isOperationLegalOrCustom(ISD::LOAD, intVT)) { // Scalarize the load and let the individual components be handled. SDValue Scalarized = scalarizeVectorLoad(LD, DAG); + if (Scalarized->getOpcode() == ISD::MERGE_VALUES) + return std::make_pair(Scalarized.getOperand(0), Scalarized.getOperand(1)); return std::make_pair(Scalarized.getValue(0), Scalarized.getValue(1)); } |