diff options
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG')
27 files changed, 6683 insertions, 4522 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index e5bc08b9280ab..f14b3dba4f318 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -30,6 +30,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/DAGCombine.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -124,17 +125,29 @@ static cl::opt<unsigned> StoreMergeDependenceLimit( cl::desc("Limit the number of times for the same StoreNode and RootNode " "to bail out in store merging dependence check")); +static cl::opt<bool> EnableReduceLoadOpStoreWidth( + "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true), + cl::desc("DAG cominber enable reducing the width of load/op/store " + "sequence")); + +static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore( + "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true), + cl::desc("DAG cominber enable load/<replace bytes>/store with " + "a narrower store")); + namespace { class DAGCombiner { SelectionDAG &DAG; const TargetLowering &TLI; + const SelectionDAGTargetInfo *STI; CombineLevel Level; CodeGenOpt::Level OptLevel; bool LegalDAG = false; bool LegalOperations = false; bool LegalTypes = false; bool ForCodeSize; + bool DisableGenericCombines; /// Worklist of all of the nodes that need to be simplified. /// @@ -222,9 +235,11 @@ namespace { public: DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL) - : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), - OptLevel(OL), AA(AA) { + : DAG(D), TLI(D.getTargetLoweringInfo()), + STI(D.getSubtarget().getSelectionDAGInfo()), + Level(BeforeLegalizeTypes), OptLevel(OL), AA(AA) { ForCodeSize = DAG.shouldOptForSize(); + DisableGenericCombines = STI && STI->disableGenericCombines(OptLevel); MaximumLegalStoreInBits = 0; // We use the minimum store size here, since that's all we can guarantee @@ -307,23 +322,34 @@ namespace { } bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) { - EVT VT = Op.getValueType(); - unsigned NumElts = VT.isVector() ? VT.getVectorNumElements() : 1; - APInt DemandedElts = APInt::getAllOnesValue(NumElts); - return SimplifyDemandedBits(Op, DemandedBits, DemandedElts); + TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); + KnownBits Known; + if (!TLI.SimplifyDemandedBits(Op, DemandedBits, Known, TLO, 0, false)) + return false; + + // Revisit the node. + AddToWorklist(Op.getNode()); + + CommitTargetLoweringOpt(TLO); + return true; } /// Check the specified vector node value to see if it can be simplified or /// if things it uses can be simplified as it only uses some of the /// elements. If so, return true. bool SimplifyDemandedVectorElts(SDValue Op) { + // TODO: For now just pretend it cannot be simplified. + if (Op.getValueType().isScalableVector()) + return false; + unsigned NumElts = Op.getValueType().getVectorNumElements(); APInt DemandedElts = APInt::getAllOnesValue(NumElts); return SimplifyDemandedVectorElts(Op, DemandedElts); } bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, - const APInt &DemandedElts); + const APInt &DemandedElts, + bool AssumeSingleUse = false); bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts, bool AssumeSingleUse = false); @@ -429,11 +455,13 @@ namespace { SDValue visitZERO_EXTEND(SDNode *N); SDValue visitANY_EXTEND(SDNode *N); SDValue visitAssertExt(SDNode *N); + SDValue visitAssertAlign(SDNode *N); SDValue visitSIGN_EXTEND_INREG(SDNode *N); SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N); SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N); SDValue visitTRUNCATE(SDNode *N); SDValue visitBITCAST(SDNode *N); + SDValue visitFREEZE(SDNode *N); SDValue visitBUILD_PAIR(SDNode *N); SDValue visitFADD(SDNode *N); SDValue visitFSUB(SDNode *N); @@ -522,9 +550,8 @@ namespace { SDValue rebuildSetCC(SDValue N); bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, - SDValue &CC) const; + SDValue &CC, bool MatchStrict = false) const; bool isOneUseSetCC(SDValue N) const; - bool isCheaperToUseNegatedFPOps(SDValue X, SDValue Y); SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, unsigned HiOp); @@ -553,6 +580,10 @@ namespace { SDValue InnerPos, SDValue InnerNeg, unsigned PosOpcode, unsigned NegOpcode, const SDLoc &DL); + SDValue MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, SDValue Neg, + SDValue InnerPos, SDValue InnerNeg, + unsigned PosOpcode, unsigned NegOpcode, + const SDLoc &DL); SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL); SDValue MatchLoadCombine(SDNode *N); SDValue MatchStoreCombine(StoreSDNode *N); @@ -562,6 +593,7 @@ namespace { SDValue TransformFPLoadStorePair(SDNode *N); SDValue convertBuildVecZextToZext(SDNode *N); SDValue reduceBuildVecExtToExtBuildVec(SDNode *N); + SDValue reduceBuildVecTruncToBitCast(SDNode *N); SDValue reduceBuildVecToShuffle(SDNode *N); SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N, ArrayRef<int> VectorMask, SDValue VecIn1, @@ -606,6 +638,19 @@ namespace { : MemNode(N), OffsetFromBase(Offset) {} }; + // Classify the origin of a stored value. + enum class StoreSource { Unknown, Constant, Extract, Load }; + StoreSource getStoreSource(SDValue StoreVal) { + if (isa<ConstantSDNode>(StoreVal) || isa<ConstantFPSDNode>(StoreVal)) + return StoreSource::Constant; + if (StoreVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT || + StoreVal.getOpcode() == ISD::EXTRACT_SUBVECTOR) + return StoreSource::Extract; + if (isa<LoadSDNode>(StoreVal)) + return StoreSource::Load; + return StoreSource::Unknown; + } + /// This is a helper function for visitMUL to check the profitability /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2). /// MulNode is the original multiply, AddNode is (add x, c1), @@ -633,43 +678,66 @@ namespace { /// can be combined into narrow loads. bool BackwardsPropagateMask(SDNode *N); - /// Helper function for MergeConsecutiveStores which merges the - /// component store chains. + /// Helper function for mergeConsecutiveStores which merges the component + /// store chains. SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores); - /// This is a helper function for MergeConsecutiveStores. When the - /// source elements of the consecutive stores are all constants or - /// all extracted vector elements, try to merge them into one - /// larger store introducing bitcasts if necessary. \return True - /// if a merged store was created. - bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes, + /// This is a helper function for mergeConsecutiveStores. When the source + /// elements of the consecutive stores are all constants or all extracted + /// vector elements, try to merge them into one larger store introducing + /// bitcasts if necessary. \return True if a merged store was created. + bool mergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores, bool IsConstantSrc, bool UseVector, bool UseTrunc); - /// This is a helper function for MergeConsecutiveStores. Stores - /// that potentially may be merged with St are placed in - /// StoreNodes. RootNode is a chain predecessor to all store - /// candidates. + /// This is a helper function for mergeConsecutiveStores. Stores that + /// potentially may be merged with St are placed in StoreNodes. RootNode is + /// a chain predecessor to all store candidates. void getStoreMergeCandidates(StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes, SDNode *&Root); - /// Helper function for MergeConsecutiveStores. Checks if - /// candidate stores have indirect dependency through their - /// operands. RootNode is the predecessor to all stores calculated - /// by getStoreMergeCandidates and is used to prune the dependency check. - /// \return True if safe to merge. + /// Helper function for mergeConsecutiveStores. Checks if candidate stores + /// have indirect dependency through their operands. RootNode is the + /// predecessor to all stores calculated by getStoreMergeCandidates and is + /// used to prune the dependency check. \return True if safe to merge. bool checkMergeStoreCandidatesForDependencies( SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores, SDNode *RootNode); + /// This is a helper function for mergeConsecutiveStores. Given a list of + /// store candidates, find the first N that are consecutive in memory. + /// Returns 0 if there are not at least 2 consecutive stores to try merging. + unsigned getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes, + int64_t ElementSizeBytes) const; + + /// This is a helper function for mergeConsecutiveStores. It is used for + /// store chains that are composed entirely of constant values. + bool tryStoreMergeOfConstants(SmallVectorImpl<MemOpLink> &StoreNodes, + unsigned NumConsecutiveStores, + EVT MemVT, SDNode *Root, bool AllowVectors); + + /// This is a helper function for mergeConsecutiveStores. It is used for + /// store chains that are composed entirely of extracted vector elements. + /// When extracting multiple vector elements, try to store them in one + /// vector store rather than a sequence of scalar stores. + bool tryStoreMergeOfExtracts(SmallVectorImpl<MemOpLink> &StoreNodes, + unsigned NumConsecutiveStores, EVT MemVT, + SDNode *Root); + + /// This is a helper function for mergeConsecutiveStores. It is used for + /// store chains that are composed entirely of loaded values. + bool tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes, + unsigned NumConsecutiveStores, EVT MemVT, + SDNode *Root, bool AllowVectors, + bool IsNonTemporalStore, bool IsNonTemporalLoad); + /// Merge consecutive store operations into a wide store. /// This optimization uses wide integers or vectors when possible. - /// \return number of stores that were merged into a merged store (the - /// affected nodes are stored as a prefix in \p StoreNodes). - bool MergeConsecutiveStores(StoreSDNode *St); + /// \return true if stores were merged. + bool mergeConsecutiveStores(StoreSDNode *St); /// Try to transform a truncation where C is a constant: /// (trunc (and X, C)) -> (and (trunc X), (trunc C)) @@ -814,7 +882,7 @@ static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) { // the appropriate nodes based on the type of node we are checking. This // simplifies life a bit for the callers. bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, - SDValue &CC) const { + SDValue &CC, bool MatchStrict) const { if (N.getOpcode() == ISD::SETCC) { LHS = N.getOperand(0); RHS = N.getOperand(1); @@ -822,6 +890,15 @@ bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, return true; } + if (MatchStrict && + (N.getOpcode() == ISD::STRICT_FSETCC || + N.getOpcode() == ISD::STRICT_FSETCCS)) { + LHS = N.getOperand(1); + RHS = N.getOperand(2); + CC = N.getOperand(3); + return true; + } + if (N.getOpcode() != ISD::SELECT_CC || !TLI.isConstTrueVal(N.getOperand(2).getNode()) || !TLI.isConstFalseVal(N.getOperand(3).getNode())) @@ -886,6 +963,13 @@ static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) { ISD::isBuildVectorOfConstantFPSDNodes(V.getNode()); } +// Determine if this an indexed load with an opaque target constant index. +static bool canSplitIdx(LoadSDNode *LD) { + return MaySplitLoadIndex && + (LD->getOperand(2).getOpcode() != ISD::TargetConstant || + !cast<ConstantSDNode>(LD->getOperand(2))->isOpaque()); +} + bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc, const SDLoc &DL, SDValue N0, @@ -951,14 +1035,11 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, if (N0.getOpcode() != Opc) return SDValue(); - // Don't reassociate reductions. - if (N0->getFlags().hasVectorReduction()) - return SDValue(); - - if (SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) { - if (SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N1)) { + if (DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) { + if (DAG.isConstantIntBuildVectorOrConstantInt(N1)) { // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2)) - if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, C1, C2)) + if (SDValue OpNode = + DAG.FoldConstantArithmetic(Opc, DL, VT, {N0.getOperand(1), N1})) return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); return SDValue(); } @@ -978,9 +1059,6 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, SDValue N1, SDNodeFlags Flags) { assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative."); - // Don't reassociate reductions. - if (Flags.hasVectorReduction()) - return SDValue(); // Floating-point reassociation is not allowed without loose FP math. if (N0.getValueType().isFloatingPoint() || @@ -1029,6 +1107,12 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, void DAGCombiner:: CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { + // Replace the old value with the new one. + ++NodesCombined; + LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG); + dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG); + dbgs() << '\n'); + // Replace all uses. If any nodes become isomorphic to other nodes and // are deleted, make sure to remove them from our worklist. WorklistRemover DeadNodes(*this); @@ -1047,21 +1131,17 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { /// Check the specified integer node value to see if it can be simplified or if /// things it uses can be simplified by bit propagation. If so, return true. bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, - const APInt &DemandedElts) { + const APInt &DemandedElts, + bool AssumeSingleUse) { TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); KnownBits Known; - if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO)) + if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, 0, + AssumeSingleUse)) return false; // Revisit the node. AddToWorklist(Op.getNode()); - // Replace the old value with the new one. - ++NodesCombined; - LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG); - dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG); - dbgs() << '\n'); - CommitTargetLoweringOpt(TLO); return true; } @@ -1081,12 +1161,6 @@ bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op, // Revisit the node. AddToWorklist(Op.getNode()); - // Replace the old value with the new one. - ++NodesCombined; - LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG); - dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG); - dbgs() << '\n'); - CommitTargetLoweringOpt(TLO); return true; } @@ -1210,8 +1284,11 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { SDValue RV = DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1)); - // We are always replacing N0/N1's use in N and only need - // additional replacements if there are additional uses. + // We are always replacing N0/N1's use in N and only need additional + // replacements if there are additional uses. + // Note: We are checking uses of the *nodes* (SDNode) rather than values + // (SDValue) here because the node may reference multiple values + // (for example, the chain value of a load node). Replace0 &= !N0->hasOneUse(); Replace1 &= (N0 != N1) && !N1->hasOneUse(); @@ -1561,6 +1638,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::ANY_EXTEND: return visitANY_EXTEND(N); case ISD::AssertSext: case ISD::AssertZext: return visitAssertExt(N); + case ISD::AssertAlign: return visitAssertAlign(N); case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N); case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N); @@ -1610,6 +1688,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::LIFETIME_END: return visitLIFETIME_END(N); case ISD::FP_TO_FP16: return visitFP_TO_FP16(N); case ISD::FP16_TO_FP: return visitFP16_TO_FP(N); + case ISD::FREEZE: return visitFREEZE(N); case ISD::VECREDUCE_FADD: case ISD::VECREDUCE_FMUL: case ISD::VECREDUCE_ADD: @@ -1628,7 +1707,9 @@ SDValue DAGCombiner::visit(SDNode *N) { } SDValue DAGCombiner::combine(SDNode *N) { - SDValue RV = visit(N); + SDValue RV; + if (!DisableGenericCombines) + RV = visit(N); // If nothing happened, try a target-specific DAG combine. if (!RV.getNode()) { @@ -2046,12 +2127,11 @@ static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) { // We need a constant operand for the add/sub, and the other operand is a // logical shift right: add (srl), C or sub C, (srl). - // TODO - support non-uniform vector amounts. bool IsAdd = N->getOpcode() == ISD::ADD; SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0); SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1); - ConstantSDNode *C = isConstOrConstSplat(ConstantOp); - if (!C || ShiftOp.getOpcode() != ISD::SRL) + if (!DAG.isConstantIntBuildVectorOrConstantInt(ConstantOp) || + ShiftOp.getOpcode() != ISD::SRL) return SDValue(); // The shift must be of a 'not' value. @@ -2072,8 +2152,11 @@ static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) { SDLoc DL(N); auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL; SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt); - APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1; - return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT)); + if (SDValue NewC = + DAG.FoldConstantArithmetic(IsAdd ? ISD::ADD : ISD::SUB, DL, VT, + {ConstantOp, DAG.getConstant(1, DL, VT)})) + return DAG.getNode(ISD::ADD, DL, VT, NewShift, NewC); + return SDValue(); } /// Try to fold a node that behaves like an ADD (note that N isn't necessarily @@ -2109,8 +2192,7 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) { if (!DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::ADD, DL, VT, N1, N0); // fold (add c1, c2) -> c1+c2 - return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(), - N1.getNode()); + return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0, N1}); } // fold (add x, 0) -> x @@ -2121,8 +2203,8 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) { // fold ((A-c1)+c2) -> (A+(c2-c1)) if (N0.getOpcode() == ISD::SUB && isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) { - SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N1.getNode(), - N0.getOperand(1).getNode()); + SDValue Sub = + DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N1, N0.getOperand(1)}); assert(Sub && "Constant folding failed"); return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub); } @@ -2130,8 +2212,8 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) { // fold ((c1-A)+c2) -> (c1+c2)-A if (N0.getOpcode() == ISD::SUB && isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) { - SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N1.getNode(), - N0.getOperand(0).getNode()); + SDValue Add = + DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N0.getOperand(0)}); assert(Add && "Constant folding failed"); return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1)); } @@ -2152,13 +2234,14 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) { } } - // Undo the add -> or combine to merge constant offsets from a frame index. + // Fold (add (or x, c0), c1) -> (add x, (c0 + c1)) if (or x, c0) is + // equivalent to (add x, c0). if (N0.getOpcode() == ISD::OR && - isa<FrameIndexSDNode>(N0.getOperand(0)) && - isa<ConstantSDNode>(N0.getOperand(1)) && + isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true) && DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) { - SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1)); - return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0); + if (SDValue Add0 = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, + {N1, N0.getOperand(1)})) + return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0); } } @@ -2317,6 +2400,23 @@ SDValue DAGCombiner::visitADD(SDNode *N) { DAG.haveNoCommonBitsSet(N0, N1)) return DAG.getNode(ISD::OR, DL, VT, N0, N1); + // Fold (add (vscale * C0), (vscale * C1)) to (vscale * (C0 + C1)). + if (N0.getOpcode() == ISD::VSCALE && N1.getOpcode() == ISD::VSCALE) { + APInt C0 = N0->getConstantOperandAPInt(0); + APInt C1 = N1->getConstantOperandAPInt(0); + return DAG.getVScale(DL, VT, C0 + C1); + } + + // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2) + if ((N0.getOpcode() == ISD::ADD) && + (N0.getOperand(1).getOpcode() == ISD::VSCALE) && + (N1.getOpcode() == ISD::VSCALE)) { + auto VS0 = N0.getOperand(1)->getConstantOperandAPInt(0); + auto VS1 = N1->getConstantOperandAPInt(0); + auto VS = DAG.getVScale(DL, VT, VS0 + VS1); + return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), VS); + } + return SDValue(); } @@ -2347,8 +2447,7 @@ SDValue DAGCombiner::visitADDSAT(SDNode *N) { if (!DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(Opcode, DL, VT, N1, N0); // fold (add_sat c1, c2) -> c3 - return DAG.FoldConstantArithmetic(Opcode, DL, VT, N0.getNode(), - N1.getNode()); + return DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1}); } // fold (add_sat x, 0) -> x @@ -2968,12 +3067,10 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { // FIXME: Refactor this and xor and other similar operations together. if (N0 == N1) return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations); - if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && - DAG.isConstantIntBuildVectorOrConstantInt(N1)) { - // fold (sub c1, c2) -> c1-c2 - return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(), - N1.getNode()); - } + + // fold (sub c1, c2) -> c3 + if (SDValue C = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N1})) + return C; if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; @@ -3040,8 +3137,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (N0.getOpcode() == ISD::ADD && isConstantOrConstantVector(N1, /* NoOpaques */ true) && isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) { - SDValue NewC = DAG.FoldConstantArithmetic( - ISD::SUB, DL, VT, N0.getOperand(1).getNode(), N1.getNode()); + SDValue NewC = + DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(1), N1}); assert(NewC && "Constant folding failed"); return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC); } @@ -3051,8 +3148,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { SDValue N11 = N1.getOperand(1); if (isConstantOrConstantVector(N0, /* NoOpaques */ true) && isConstantOrConstantVector(N11, /* NoOpaques */ true)) { - SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(), - N11.getNode()); + SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0, N11}); assert(NewC && "Constant folding failed"); return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0)); } @@ -3062,8 +3158,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (N0.getOpcode() == ISD::SUB && isConstantOrConstantVector(N1, /* NoOpaques */ true) && isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) { - SDValue NewC = DAG.FoldConstantArithmetic( - ISD::ADD, DL, VT, N0.getOperand(1).getNode(), N1.getNode()); + SDValue NewC = + DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N0.getOperand(1), N1}); assert(NewC && "Constant folding failed"); return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC); } @@ -3072,8 +3168,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (N0.getOpcode() == ISD::SUB && isConstantOrConstantVector(N1, /* NoOpaques */ true) && isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) { - SDValue NewC = DAG.FoldConstantArithmetic( - ISD::SUB, DL, VT, N0.getOperand(0).getNode(), N1.getNode()); + SDValue NewC = + DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, {N0.getOperand(0), N1}); assert(NewC && "Constant folding failed"); return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1)); } @@ -3244,6 +3340,12 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { } } + // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C)) + if (N1.getOpcode() == ISD::VSCALE) { + APInt IntVal = N1.getConstantOperandAPInt(0); + return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal)); + } + // Prefer an add for more folding potential and possibly better codegen: // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1) if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) { @@ -3294,12 +3396,9 @@ SDValue DAGCombiner::visitSUBSAT(SDNode *N) { if (N0 == N1) return DAG.getConstant(0, DL, VT); - if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && - DAG.isConstantIntBuildVectorOrConstantInt(N1)) { - // fold (sub_sat c1, c2) -> c3 - return DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, N0.getNode(), - N1.getNode()); - } + // fold (sub_sat c1, c2) -> c3 + if (SDValue C = DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, {N0, N1})) + return C; // fold (sub_sat x, 0) -> x if (isNullConstant(N1)) @@ -3435,30 +3534,20 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { if (N0.isUndef() || N1.isUndef()) return DAG.getConstant(0, SDLoc(N), VT); - bool N0IsConst = false; bool N1IsConst = false; bool N1IsOpaqueConst = false; - bool N0IsOpaqueConst = false; - APInt ConstValue0, ConstValue1; + APInt ConstValue1; + // fold vector ops if (VT.isVector()) { if (SDValue FoldedVOp = SimplifyVBinOp(N)) return FoldedVOp; - N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0); N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1); - assert((!N0IsConst || - ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) && - "Splat APInt should be element width"); assert((!N1IsConst || ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) && "Splat APInt should be element width"); } else { - N0IsConst = isa<ConstantSDNode>(N0); - if (N0IsConst) { - ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue(); - N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque(); - } N1IsConst = isa<ConstantSDNode>(N1); if (N1IsConst) { ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue(); @@ -3467,17 +3556,18 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { } // fold (mul c1, c2) -> c1*c2 - if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst) - return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, - N0.getNode(), N1.getNode()); + if (SDValue C = DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, {N0, N1})) + return C; // canonicalize constant to RHS (vector doesn't have to splat) if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0); + // fold (mul x, 0) -> 0 if (N1IsConst && ConstValue1.isNullValue()) return N1; + // fold (mul x, 1) -> x if (N1IsConst && ConstValue1.isOneValue()) return N0; @@ -3491,6 +3581,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0); } + // fold (mul x, (1 << c)) -> x << c if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) && DAG.isKnownToBeAPowerOfTwo(N1) && @@ -3501,6 +3592,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT); return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc); } + // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) { unsigned Log2Val = (-ConstValue1).logBase2(); @@ -3589,6 +3681,14 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { DAG.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1)); + // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)). + if (N0.getOpcode() == ISD::VSCALE) + if (ConstantSDNode *NC1 = isConstOrConstSplat(N1)) { + APInt C0 = N0.getConstantOperandAPInt(0); + APInt C1 = NC1->getAPIntValue(); + return DAG.getVScale(SDLoc(N), VT, C0 * C1); + } + // reassociate mul if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags())) return RMUL; @@ -3746,13 +3846,14 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { SDLoc DL(N); // fold (sdiv c1, c2) -> c1/c2 - ConstantSDNode *N0C = isConstOrConstSplat(N0); ConstantSDNode *N1C = isConstOrConstSplat(N1); - if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque()) - return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C); + if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1})) + return C; + // fold (sdiv X, -1) -> 0-X if (N1C && N1C->isAllOnesValue()) return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0); + // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0) if (N1C && N1C->getAPIntValue().isMinSignedValue()) return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ), @@ -3890,12 +3991,10 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { SDLoc DL(N); // fold (udiv c1, c2) -> c1/c2 - ConstantSDNode *N0C = isConstOrConstSplat(N0); ConstantSDNode *N1C = isConstOrConstSplat(N1); - if (N0C && N1C) - if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, - N0C, N1C)) - return Folded; + if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1})) + return C; + // fold (udiv X, -1) -> select(X == -1, 1, 0) if (N1C && N1C->getAPIntValue().isAllOnesValue()) return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ), @@ -3988,11 +4087,10 @@ SDValue DAGCombiner::visitREM(SDNode *N) { SDLoc DL(N); // fold (rem c1, c2) -> c1%c2 - ConstantSDNode *N0C = isConstOrConstSplat(N0); ConstantSDNode *N1C = isConstOrConstSplat(N1); - if (N0C && N1C) - if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C)) - return Folded; + if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1})) + return C; + // fold (urem X, -1) -> select(X == -1, 0, x) if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue()) return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ), @@ -4088,7 +4186,7 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { // If the type twice as wide is legal, transform the mulhs to a wider multiply // plus a shift. - if (VT.isSimple() && !VT.isVector()) { + if (!TLI.isMulhCheaperThanMulShift(VT) && VT.isSimple() && !VT.isVector()) { MVT Simple = VT.getSimpleVT(); unsigned SimpleSize = Simple.getSizeInBits(); EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); @@ -4144,7 +4242,7 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { // If the type twice as wide is legal, transform the mulhu to a wider multiply // plus a shift. - if (VT.isSimple() && !VT.isVector()) { + if (!TLI.isMulhCheaperThanMulShift(VT) && VT.isSimple() && !VT.isVector()) { MVT Simple = VT.getSimpleVT(); unsigned SimpleSize = Simple.getSizeInBits(); EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); @@ -4317,6 +4415,7 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); + unsigned Opcode = N->getOpcode(); // fold vector ops if (VT.isVector()) @@ -4324,19 +4423,16 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) { return FoldedVOp; // fold operation with constant operands. - ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); - ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); - if (N0C && N1C) - return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C); + if (SDValue C = DAG.FoldConstantArithmetic(Opcode, SDLoc(N), VT, {N0, N1})) + return C; // canonicalize constant to RHS if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && - !DAG.isConstantIntBuildVectorOrConstantInt(N1)) + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX. // Only do this if the current op isn't legal and the flipped is. - unsigned Opcode = N->getOpcode(); if (!TLI.isOperationLegal(Opcode, VT) && (N0.isUndef() || DAG.SignBitIsZero(N0)) && (N1.isUndef() || DAG.SignBitIsZero(N1))) { @@ -4825,11 +4921,16 @@ bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST, return false; // Ensure that this isn't going to produce an unsupported memory access. - if (ShAmt && - !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT, - LDST->getAddressSpace(), ShAmt / 8, - LDST->getMemOperand()->getFlags())) - return false; + if (ShAmt) { + assert(ShAmt % 8 == 0 && "ShAmt is byte offset"); + const unsigned ByteShAmt = ShAmt / 8; + const Align LDSTAlign = LDST->getAlign(); + const Align NarrowAlign = commonAlignment(LDSTAlign, ByteShAmt); + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT, + LDST->getAddressSpace(), NarrowAlign, + LDST->getMemOperand()->getFlags())) + return false; + } // It's not possible to generate a constant of extended or untyped type. EVT PtrType = LDST->getBasePtr().getValueType(); @@ -5174,17 +5275,19 @@ SDValue DAGCombiner::visitAND(SDNode *N) { } // fold (and c1, c2) -> c1&c2 - ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); ConstantSDNode *N1C = isConstOrConstSplat(N1); - if (N0C && N1C && !N1C->isOpaque()) - return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C); + if (SDValue C = DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, {N0, N1})) + return C; + // canonicalize constant to RHS if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0); + // fold (and x, -1) -> x if (isAllOnesConstant(N1)) return N0; + // if (and x, c) is known to be zero, return 0 unsigned BitWidth = VT.getScalarSizeInBits(); if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), @@ -5654,6 +5757,48 @@ static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) { return false; } +// Match this pattern: +// (or (and (shl (A, 8)), 0xff00ff00), (and (srl (A, 8)), 0x00ff00ff)) +// And rewrite this to: +// (rotr (bswap A), 16) +static SDValue matchBSwapHWordOrAndAnd(const TargetLowering &TLI, + SelectionDAG &DAG, SDNode *N, SDValue N0, + SDValue N1, EVT VT, EVT ShiftAmountTy) { + assert(N->getOpcode() == ISD::OR && VT == MVT::i32 && + "MatchBSwapHWordOrAndAnd: expecting i32"); + if (!TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) + return SDValue(); + if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND) + return SDValue(); + // TODO: this is too restrictive; lifting this restriction requires more tests + if (!N0->hasOneUse() || !N1->hasOneUse()) + return SDValue(); + ConstantSDNode *Mask0 = isConstOrConstSplat(N0.getOperand(1)); + ConstantSDNode *Mask1 = isConstOrConstSplat(N1.getOperand(1)); + if (!Mask0 || !Mask1) + return SDValue(); + if (Mask0->getAPIntValue() != 0xff00ff00 || + Mask1->getAPIntValue() != 0x00ff00ff) + return SDValue(); + SDValue Shift0 = N0.getOperand(0); + SDValue Shift1 = N1.getOperand(0); + if (Shift0.getOpcode() != ISD::SHL || Shift1.getOpcode() != ISD::SRL) + return SDValue(); + ConstantSDNode *ShiftAmt0 = isConstOrConstSplat(Shift0.getOperand(1)); + ConstantSDNode *ShiftAmt1 = isConstOrConstSplat(Shift1.getOperand(1)); + if (!ShiftAmt0 || !ShiftAmt1) + return SDValue(); + if (ShiftAmt0->getAPIntValue() != 8 || ShiftAmt1->getAPIntValue() != 8) + return SDValue(); + if (Shift0.getOperand(0) != Shift1.getOperand(0)) + return SDValue(); + + SDLoc DL(N); + SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, Shift0.getOperand(0)); + SDValue ShAmt = DAG.getConstant(16, DL, ShiftAmountTy); + return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt); +} + /// Match a 32-bit packed halfword bswap. That is /// ((x & 0x000000ff) << 8) | /// ((x & 0x0000ff00) >> 8) | @@ -5670,6 +5815,16 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT)) return SDValue(); + if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT, + getShiftAmountTy(VT))) + return BSwap; + + // Try again with commuted operands. + if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT, + getShiftAmountTy(VT))) + return BSwap; + + // Look for either // (or (bswaphpair), (bswaphpair)) // (or (or (bswaphpair), (and)), (and)) @@ -5875,17 +6030,19 @@ SDValue DAGCombiner::visitOR(SDNode *N) { } // fold (or c1, c2) -> c1|c2 - ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); - if (N0C && N1C && !N1C->isOpaque()) - return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C); + if (SDValue C = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, {N0, N1})) + return C; + // canonicalize constant to RHS if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0); + // fold (or x, 0) -> x if (isNullConstant(N1)) return N0; + // fold (or x, -1) -> -1 if (isAllOnesConstant(N1)) return N1; @@ -5920,8 +6077,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) { }; if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) { - if (SDValue COR = DAG.FoldConstantArithmetic( - ISD::OR, SDLoc(N1), VT, N1.getNode(), N0.getOperand(1).getNode())) { + if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT, + {N1, N0.getOperand(1)})) { SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1); AddToWorklist(IOR.getNode()); return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR); @@ -6020,6 +6177,7 @@ static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1)); // (add v v) -> (shl v 1) + // TODO: Should this be a general DAG canonicalization? if (OppShift.getOpcode() == ISD::SRL && OppShiftCst && ExtractFrom.getOpcode() == ISD::ADD && ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) && @@ -6192,8 +6350,12 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, // EltSize & Mask == NegC & Mask // // (because "x & Mask" is a truncation and distributes through subtraction). + // + // We also need to account for a potential truncation of NegOp1 if the amount + // has already been legalized to a shift amount type. APInt Width; - if (Pos == NegOp1) + if ((Pos == NegOp1) || + (NegOp1.getOpcode() == ISD::TRUNCATE && Pos == NegOp1.getOperand(0))) Width = NegC->getAPIntValue(); // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC. @@ -6246,19 +6408,91 @@ SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, return SDValue(); } +// A subroutine of MatchRotate used once we have found an OR of two opposite +// shifts of N0 + N1. If Neg == <operand size> - Pos then the OR reduces +// to both (PosOpcode N0, N1, Pos) and (NegOpcode N0, N1, Neg), with the +// former being preferred if supported. InnerPos and InnerNeg are Pos and +// Neg with outer conversions stripped away. +// TODO: Merge with MatchRotatePosNeg. +SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, + SDValue Neg, SDValue InnerPos, + SDValue InnerNeg, unsigned PosOpcode, + unsigned NegOpcode, const SDLoc &DL) { + EVT VT = N0.getValueType(); + unsigned EltBits = VT.getScalarSizeInBits(); + + // fold (or (shl x0, (*ext y)), + // (srl x1, (*ext (sub 32, y)))) -> + // (fshl x0, x1, y) or (fshr x0, x1, (sub 32, y)) + // + // fold (or (shl x0, (*ext (sub 32, y))), + // (srl x1, (*ext y))) -> + // (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y)) + if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG)) { + bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); + return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1, + HasPos ? Pos : Neg); + } + + // Matching the shift+xor cases, we can't easily use the xor'd shift amount + // so for now just use the PosOpcode case if its legal. + // TODO: When can we use the NegOpcode case? + if (PosOpcode == ISD::FSHL && isPowerOf2_32(EltBits)) { + auto IsBinOpImm = [](SDValue Op, unsigned BinOpc, unsigned Imm) { + if (Op.getOpcode() != BinOpc) + return false; + ConstantSDNode *Cst = isConstOrConstSplat(Op.getOperand(1)); + return Cst && (Cst->getAPIntValue() == Imm); + }; + + // fold (or (shl x0, y), (srl (srl x1, 1), (xor y, 31))) + // -> (fshl x0, x1, y) + if (IsBinOpImm(N1, ISD::SRL, 1) && + IsBinOpImm(InnerNeg, ISD::XOR, EltBits - 1) && + InnerPos == InnerNeg.getOperand(0) && + TLI.isOperationLegalOrCustom(ISD::FSHL, VT)) { + return DAG.getNode(ISD::FSHL, DL, VT, N0, N1.getOperand(0), Pos); + } + + // fold (or (shl (shl x0, 1), (xor y, 31)), (srl x1, y)) + // -> (fshr x0, x1, y) + if (IsBinOpImm(N0, ISD::SHL, 1) && + IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) && + InnerNeg == InnerPos.getOperand(0) && + TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) { + return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg); + } + + // fold (or (shl (add x0, x0), (xor y, 31)), (srl x1, y)) + // -> (fshr x0, x1, y) + // TODO: Should add(x,x) -> shl(x,1) be a general DAG canonicalization? + if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N0.getOperand(1) && + IsBinOpImm(InnerPos, ISD::XOR, EltBits - 1) && + InnerNeg == InnerPos.getOperand(0) && + TLI.isOperationLegalOrCustom(ISD::FSHR, VT)) { + return DAG.getNode(ISD::FSHR, DL, VT, N0.getOperand(0), N1, Neg); + } + } + + return SDValue(); +} + // MatchRotate - Handle an 'or' of two operands. If this is one of the many // idioms for rotate, and if the target supports rotation instructions, generate -// a rot[lr]. +// a rot[lr]. This also matches funnel shift patterns, similar to rotation but +// with different shifted sources. SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { // Must be a legal type. Expanded 'n promoted things won't work with rotates. EVT VT = LHS.getValueType(); if (!TLI.isTypeLegal(VT)) return SDValue(); - // The target must have at least one rotate flavor. + // The target must have at least one rotate/funnel flavor. bool HasROTL = hasOperation(ISD::ROTL, VT); bool HasROTR = hasOperation(ISD::ROTR, VT); - if (!HasROTL && !HasROTR) + bool HasFSHL = hasOperation(ISD::FSHL, VT); + bool HasFSHR = hasOperation(ISD::FSHR, VT); + if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR) return SDValue(); // Check for truncated rotate. @@ -6308,12 +6542,13 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { // At this point we've matched or extracted a shift op on each side. - if (LHSShift.getOperand(0) != RHSShift.getOperand(0)) - return SDValue(); // Not shifting the same value. - if (LHSShift.getOpcode() == RHSShift.getOpcode()) return SDValue(); // Shifts must disagree. + bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0); + if (!IsRotate && !(HasFSHL || HasFSHR)) + return SDValue(); // Requires funnel shift support. + // Canonicalize shl to left side in a shl/srl pair. if (RHSShift.getOpcode() == ISD::SHL) { std::swap(LHS, RHS); @@ -6329,13 +6564,21 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2) + // fold (or (shl x, C1), (srl y, C2)) -> (fshl x, y, C1) + // fold (or (shl x, C1), (srl y, C2)) -> (fshr x, y, C2) + // iff C1+C2 == EltSizeInBits auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS, ConstantSDNode *RHS) { return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits; }; if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) { - SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, - LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt); + SDValue Res; + if (IsRotate && (HasROTL || HasROTR)) + Res = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, + HasROTL ? LHSShiftAmt : RHSShiftAmt); + else + Res = DAG.getNode(HasFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg, + RHSShiftArg, HasFSHL ? LHSShiftAmt : RHSShiftAmt); // If there is an AND of either shifted operand, apply it to the result. if (LHSMask.getNode() || RHSMask.getNode()) { @@ -6353,10 +6596,10 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits)); } - Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask); + Res = DAG.getNode(ISD::AND, DL, VT, Res, Mask); } - return Rot; + return Res; } // If there is a mask here, and we have a variable shift, we can't be sure @@ -6379,13 +6622,29 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { RExtOp0 = RHSShiftAmt.getOperand(0); } - SDValue TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, - LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL); + if (IsRotate && (HasROTL || HasROTR)) { + SDValue TryL = + MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0, + RExtOp0, ISD::ROTL, ISD::ROTR, DL); + if (TryL) + return TryL; + + SDValue TryR = + MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0, + LExtOp0, ISD::ROTR, ISD::ROTL, DL); + if (TryR) + return TryR; + } + + SDValue TryL = + MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, LHSShiftAmt, RHSShiftAmt, + LExtOp0, RExtOp0, ISD::FSHL, ISD::FSHR, DL); if (TryL) return TryL; - SDValue TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, - RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL); + SDValue TryR = + MatchFunnelPosNeg(LHSShiftArg, RHSShiftArg, RHSShiftAmt, LHSShiftAmt, + RExtOp0, LExtOp0, ISD::FSHR, ISD::FSHL, DL); if (TryR) return TryR; @@ -6610,9 +6869,9 @@ SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) { if (LegalOperations && !TLI.isOperationLegal(ISD::STORE, VT)) return SDValue(); - // Check if all the bytes of the combined value we are looking at are stored - // to the same base address. Collect bytes offsets from Base address into - // ByteOffsets. + // Check if all the bytes of the combined value we are looking at are stored + // to the same base address. Collect bytes offsets from Base address into + // ByteOffsets. SDValue CombinedValue; SmallVector<int64_t, 8> ByteOffsets(Width, INT64_MAX); int64_t FirstOffset = INT64_MAX; @@ -6630,17 +6889,16 @@ SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) { SDValue Value = Trunc.getOperand(0); if (Value.getOpcode() == ISD::SRL || Value.getOpcode() == ISD::SRA) { - ConstantSDNode *ShiftOffset = - dyn_cast<ConstantSDNode>(Value.getOperand(1)); - // Trying to match the following pattern. The shift offset must be + auto *ShiftOffset = dyn_cast<ConstantSDNode>(Value.getOperand(1)); + // Trying to match the following pattern. The shift offset must be // a constant and a multiple of 8. It is the byte offset in "y". - // + // // x = srl y, offset - // i8 z = trunc x + // i8 z = trunc x // store z, ... if (!ShiftOffset || (ShiftOffset->getSExtValue() % 8)) return SDValue(); - + Offset = ShiftOffset->getSExtValue()/8; Value = Value.getOperand(0); } @@ -6685,7 +6943,7 @@ SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) { assert(FirstOffset != INT64_MAX && "First byte offset must be set"); assert(FirstStore && "First store must be set"); - // Check if the bytes of the combined value we are looking at match with + // Check if the bytes of the combined value we are looking at match with // either big or little endian value store. Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset); if (!IsBigEndian.hasValue()) @@ -7030,20 +7288,22 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { SDLoc DL(N); if (N0.isUndef() && N1.isUndef()) return DAG.getConstant(0, DL, VT); + // fold (xor x, undef) -> undef if (N0.isUndef()) return N0; if (N1.isUndef()) return N1; + // fold (xor c1, c2) -> c1^c2 - ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); - ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); - if (N0C && N1C) - return DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, N0C, N1C); + if (SDValue C = DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, {N0, N1})) + return C; + // canonicalize constant to RHS if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && !DAG.isConstantIntBuildVectorOrConstantInt(N1)) return DAG.getNode(ISD::XOR, DL, VT, N1, N0); + // fold (xor x, 0) -> x if (isNullConstant(N1)) return N0; @@ -7058,7 +7318,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { // fold !(x cc y) -> (x !cc y) unsigned N0Opcode = N0.getOpcode(); SDValue LHS, RHS, CC; - if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) { + if (TLI.isConstTrueVal(N1.getNode()) && + isSetCCEquivalent(N0, LHS, RHS, CC, /*MatchStrict*/true)) { ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), LHS.getValueType()); if (!LegalOperations || @@ -7071,6 +7332,21 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { case ISD::SELECT_CC: return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2), N0.getOperand(3), NotCC); + case ISD::STRICT_FSETCC: + case ISD::STRICT_FSETCCS: { + if (N0.hasOneUse()) { + // FIXME Can we handle multiple uses? Could we token factor the chain + // results from the new/old setcc? + SDValue SetCC = DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC, + N0.getOperand(0), + N0Opcode == ISD::STRICT_FSETCCS); + CombineTo(N, SetCC); + DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), SetCC.getValue(1)); + recursivelyDeleteUnusedNodes(N0.getNode()); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + break; + } } } } @@ -7405,15 +7681,29 @@ SDValue DAGCombiner::visitRotate(SDNode *N) { } // fold (rot x, c) -> (rot x, c % BitSize) - // TODO - support non-uniform vector amounts. - if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) { - if (Cst->getAPIntValue().uge(Bitsize)) { - uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize); - return DAG.getNode(N->getOpcode(), dl, VT, N0, - DAG.getConstant(RotAmt, dl, N1.getValueType())); - } + bool OutOfRange = false; + auto MatchOutOfRange = [Bitsize, &OutOfRange](ConstantSDNode *C) { + OutOfRange |= C->getAPIntValue().uge(Bitsize); + return true; + }; + if (ISD::matchUnaryPredicate(N1, MatchOutOfRange) && OutOfRange) { + EVT AmtVT = N1.getValueType(); + SDValue Bits = DAG.getConstant(Bitsize, dl, AmtVT); + if (SDValue Amt = + DAG.FoldConstantArithmetic(ISD::UREM, dl, AmtVT, {N1, Bits})) + return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt); } + // rot i16 X, 8 --> bswap X + auto *RotAmtC = isConstOrConstSplat(N1); + if (RotAmtC && RotAmtC->getAPIntValue() == 8 && + VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT)) + return DAG.getNode(ISD::BSWAP, dl, VT, N0); + + // Simplify the operands using demanded-bits information. + if (SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && N1.getOperand(0).getOpcode() == ISD::AND) { @@ -7430,12 +7720,11 @@ SDValue DAGCombiner::visitRotate(SDNode *N) { EVT ShiftVT = C1->getValueType(0); bool SameSide = (N->getOpcode() == NextOp); unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB; - if (SDValue CombinedShift = - DAG.FoldConstantArithmetic(CombineOp, dl, ShiftVT, C1, C2)) { + if (SDValue CombinedShift = DAG.FoldConstantArithmetic( + CombineOp, dl, ShiftVT, {N1, N0.getOperand(1)})) { SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT); SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic( - ISD::SREM, dl, ShiftVT, CombinedShift.getNode(), - BitsizeC.getNode()); + ISD::SREM, dl, ShiftVT, {CombinedShift, BitsizeC}); return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0), CombinedShiftNorm); } @@ -7471,8 +7760,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC && TLI.getBooleanContents(N00.getOperand(0).getValueType()) == TargetLowering::ZeroOrNegativeOneBooleanContent) { - if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, - N01CV, N1CV)) + if (SDValue C = + DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N01, N1})) return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C); } } @@ -7482,10 +7771,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { ConstantSDNode *N1C = isConstOrConstSplat(N1); // fold (shl c1, c2) -> c1<<c2 - // TODO - support non-uniform vector shift amounts. - ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); - if (N0C && N1C && !N1C->isOpaque()) - return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C); + if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, {N0, N1})) + return C; if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; @@ -7502,8 +7789,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1); } - // TODO - support non-uniform vector shift amounts. - if (N1C && SimplifyDemandedBits(SDValue(N, 0))) + if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2)) @@ -7691,9 +7977,90 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if (SDValue NewSHL = visitShiftByConstant(N)) return NewSHL; + // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)). + if (N0.getOpcode() == ISD::VSCALE) + if (ConstantSDNode *NC1 = isConstOrConstSplat(N->getOperand(1))) { + auto DL = SDLoc(N); + APInt C0 = N0.getConstantOperandAPInt(0); + APInt C1 = NC1->getAPIntValue(); + return DAG.getVScale(DL, VT, C0 << C1); + } + return SDValue(); } +// Transform a right shift of a multiply into a multiply-high. +// Examples: +// (srl (mul (zext i32:$a to i64), (zext i32:$a to i64)), 32) -> (mulhu $a, $b) +// (sra (mul (sext i32:$a to i64), (sext i32:$a to i64)), 32) -> (mulhs $a, $b) +static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG, + const TargetLowering &TLI) { + assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && + "SRL or SRA node is required here!"); + + // Check the shift amount. Proceed with the transformation if the shift + // amount is constant. + ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1)); + if (!ShiftAmtSrc) + return SDValue(); + + SDLoc DL(N); + + // The operation feeding into the shift must be a multiply. + SDValue ShiftOperand = N->getOperand(0); + if (ShiftOperand.getOpcode() != ISD::MUL) + return SDValue(); + + // Both operands must be equivalent extend nodes. + SDValue LeftOp = ShiftOperand.getOperand(0); + SDValue RightOp = ShiftOperand.getOperand(1); + bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND; + bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND; + + if ((!(IsSignExt || IsZeroExt)) || LeftOp.getOpcode() != RightOp.getOpcode()) + return SDValue(); + + EVT WideVT1 = LeftOp.getValueType(); + EVT WideVT2 = RightOp.getValueType(); + (void)WideVT2; + // Proceed with the transformation if the wide types match. + assert((WideVT1 == WideVT2) && + "Cannot have a multiply node with two different operand types."); + + EVT NarrowVT = LeftOp.getOperand(0).getValueType(); + // Check that the two extend nodes are the same type. + if (NarrowVT != RightOp.getOperand(0).getValueType()) + return SDValue(); + + // Only transform into mulh if mulh for the narrow type is cheaper than + // a multiply followed by a shift. This should also check if mulh is + // legal for NarrowVT on the target. + if (!TLI.isMulhCheaperThanMulShift(NarrowVT)) + return SDValue(); + + // Proceed with the transformation if the wide type is twice as large + // as the narrow type. + unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits(); + if (WideVT1.getScalarSizeInBits() != 2 * NarrowVTSize) + return SDValue(); + + // Check the shift amount with the narrow type size. + // Proceed with the transformation if the shift amount is the width + // of the narrow type. + unsigned ShiftAmt = ShiftAmtSrc->getZExtValue(); + if (ShiftAmt != NarrowVTSize) + return SDValue(); + + // If the operation feeding into the MUL is a sign extend (sext), + // we use mulhs. Othewise, zero extends (zext) use mulhu. + unsigned MulhOpcode = IsSignExt ? ISD::MULHS : ISD::MULHU; + + SDValue Result = DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), + RightOp.getOperand(0)); + return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT1) + : DAG.getZExtOrTrunc(Result, DL, WideVT1)); +} + SDValue DAGCombiner::visitSRA(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -7717,10 +8084,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { ConstantSDNode *N1C = isConstOrConstSplat(N1); // fold (sra c1, c2) -> (sra c1, c2) - // TODO - support non-uniform vector shift amounts. - ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); - if (N0C && N1C && !N1C->isOpaque()) - return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C); + if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, {N0, N1})) + return C; if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; @@ -7811,7 +8176,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper. // sra (add (shl X, N1C), AddC), N1C --> // sext (add (trunc X to (width - N1C)), AddC') - if (!LegalTypes && N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C && + if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C && N0.getOperand(0).getOpcode() == ISD::SHL && N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) { if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) { @@ -7828,7 +8193,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // implementation and/or target-specific overrides (because // non-simple types likely require masking when legalized), but that // restriction may conflict with other transforms. - if (TruncVT.isSimple() && TLI.isTruncateFree(VT, TruncVT)) { + if (TruncVT.isSimple() && isTypeLegal(TruncVT) && + TLI.isTruncateFree(VT, TruncVT)) { SDLoc DL(N); SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT); SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt). @@ -7871,8 +8237,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { } // Simplify, based on bits shifted out of the LHS. - // TODO - support non-uniform vector shift amounts. - if (N1C && SimplifyDemandedBits(SDValue(N, 0))) + if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); // If the sign bit is known to be zero, switch this to a SRL. @@ -7883,6 +8248,11 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { if (SDValue NewSRA = visitShiftByConstant(N)) return NewSRA; + // Try to transform this shift into a multiply-high if + // it matches the appropriate pattern detected in combineShiftToMULH. + if (SDValue MULH = combineShiftToMULH(N, DAG, TLI)) + return MULH; + return SDValue(); } @@ -7903,10 +8273,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { ConstantSDNode *N1C = isConstOrConstSplat(N1); // fold (srl c1, c2) -> c1 >>u c2 - // TODO - support non-uniform vector shift amounts. - ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); - if (N0C && N1C && !N1C->isOpaque()) - return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C); + if (SDValue C = DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, {N0, N1})) + return C; if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; @@ -8070,8 +8438,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // fold operands of srl based on knowledge that the low bits are not // demanded. - // TODO - support non-uniform vector shift amounts. - if (N1C && SimplifyDemandedBits(SDValue(N, 0))) + if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); if (N1C && !N1C->isOpaque()) @@ -8111,6 +8478,11 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { } } + // Try to transform this shift into a multiply-high if + // it matches the appropriate pattern detected in combineShiftToMULH. + if (SDValue MULH = combineShiftToMULH(N, DAG, TLI)) + return MULH; + return SDValue(); } @@ -8160,6 +8532,45 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) { return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt, SDLoc(N), ShAmtTy)); + + // fold (fshl ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive. + // fold (fshr ld1, ld0, c) -> (ld0[ofs]) iff ld0 and ld1 are consecutive. + // TODO - bigendian support once we have test coverage. + // TODO - can we merge this with CombineConseutiveLoads/MatchLoadCombine? + // TODO - permit LHS EXTLOAD if extensions are shifted out. + if ((BitWidth % 8) == 0 && (ShAmt % 8) == 0 && !VT.isVector() && + !DAG.getDataLayout().isBigEndian()) { + auto *LHS = dyn_cast<LoadSDNode>(N0); + auto *RHS = dyn_cast<LoadSDNode>(N1); + if (LHS && RHS && LHS->isSimple() && RHS->isSimple() && + LHS->getAddressSpace() == RHS->getAddressSpace() && + (LHS->hasOneUse() || RHS->hasOneUse()) && ISD::isNON_EXTLoad(RHS) && + ISD::isNON_EXTLoad(LHS)) { + if (DAG.areNonVolatileConsecutiveLoads(LHS, RHS, BitWidth / 8, 1)) { + SDLoc DL(RHS); + uint64_t PtrOff = + IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8); + Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff); + bool Fast = false; + if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, + RHS->getAddressSpace(), NewAlign, + RHS->getMemOperand()->getFlags(), &Fast) && + Fast) { + SDValue NewPtr = + DAG.getMemBasePlusOffset(RHS->getBasePtr(), PtrOff, DL); + AddToWorklist(NewPtr.getNode()); + SDValue Load = DAG.getLoad( + VT, DL, RHS->getChain(), NewPtr, + RHS->getPointerInfo().getWithOffset(PtrOff), NewAlign, + RHS->getMemOperand()->getFlags(), RHS->getAAInfo()); + // Replace the old load's chain with the new load's chain. + WorklistRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(N1.getValue(1), Load.getValue(1)); + return Load; + } + } + } + } } // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2) @@ -8609,7 +9020,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // Create the actual or node if we can generate good code for it. if (!normalizeToSequence) { SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0); - return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, + return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, N2_2, Flags); } // Otherwise see if we can optimize to a better pattern. @@ -8825,6 +9236,8 @@ SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) { SDValue N2Elt = N2.getOperand(i); if (N1Elt.isUndef() || N2Elt.isUndef()) continue; + if (N1Elt.getValueType() != N2Elt.getValueType()) + continue; const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue(); const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue(); @@ -9395,8 +9808,7 @@ SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) { SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad, N1.getOperand(1)); - APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); - Mask = Mask.zext(VT.getSizeInBits()); + APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits()); SDLoc DL0(N0); SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift, DAG.getConstant(Mask, DL0, VT)); @@ -9702,8 +10114,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { LN00->getChain(), LN00->getBasePtr(), LN00->getMemoryVT(), LN00->getMemOperand()); - APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); - Mask = Mask.sext(VT.getSizeInBits()); + APInt Mask = N0.getConstantOperandAPInt(1).sext(VT.getSizeInBits()); SDValue And = DAG.getNode(N0.getOpcode(), DL, VT, ExtLoad, DAG.getConstant(Mask, DL, VT)); ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND); @@ -9941,7 +10352,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) && TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) { SDValue Op = N0.getOperand(0); - Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType()); + Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT); AddToWorklist(Op.getNode()); SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT); // Transfer the debug info; the new node is equivalent to N0. @@ -9953,7 +10364,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) { SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT); AddToWorklist(Op.getNode()); - SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType()); + SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT); // We may safely transfer the debug info describing the truncate node over // to the equivalent and operation. DAG.transferDbgValues(N0, And); @@ -9971,8 +10382,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { !TLI.isZExtFree(N0.getValueType(), VT))) { SDValue X = N0.getOperand(0).getOperand(0); X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT); - APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); - Mask = Mask.zext(VT.getSizeInBits()); + APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits()); SDLoc DL(N); return DAG.getNode(ISD::AND, DL, VT, X, DAG.getConstant(Mask, DL, VT)); @@ -10026,8 +10436,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { LN00->getChain(), LN00->getBasePtr(), LN00->getMemoryVT(), LN00->getMemOperand()); - APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); - Mask = Mask.zext(VT.getSizeInBits()); + APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits()); SDLoc DL(N); SDValue And = DAG.getNode(N0.getOpcode(), DL, VT, ExtLoad, DAG.getConstant(Mask, DL, VT)); @@ -10080,23 +10489,22 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // that the element size of the sext'd result matches the element size of // the compare operands. SDLoc DL(N); - SDValue VecOnes = DAG.getConstant(1, DL, VT); if (VT.getSizeInBits() == N00VT.getSizeInBits()) { - // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors. + // zext(setcc) -> zext_in_reg(vsetcc) for vectors. SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0), N0.getOperand(1), N0.getOperand(2)); - return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes); + return DAG.getZeroExtendInReg(VSetCC, DL, N0.getValueType()); } // If the desired elements are smaller or larger than the source // elements we can use a matching integer vector type and then - // truncate/sign extend. + // truncate/any extend followed by zext_in_reg. EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger(); SDValue VsetCC = DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0), N0.getOperand(1), N0.getOperand(2)); - return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT), - VecOnes); + return DAG.getZeroExtendInReg(DAG.getAnyExtOrTrunc(VsetCC, DL, VT), DL, + N0.getValueType()); } // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc @@ -10127,7 +10535,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDLoc DL(N); // Ensure that the shift amount is wide enough for the shifted value. - if (VT.getSizeInBits() >= 256) + if (Log2_32_Ceil(VT.getSizeInBits()) > ShAmt.getValueSizeInBits()) ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt); return DAG.getNode(N0.getOpcode(), DL, VT, @@ -10187,8 +10595,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { SDLoc DL(N); SDValue X = N0.getOperand(0).getOperand(0); X = DAG.getAnyExtOrTrunc(X, DL, VT); - APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); - Mask = Mask.zext(VT.getSizeInBits()); + APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits()); return DAG.getNode(ISD::AND, DL, VT, X, DAG.getConstant(Mask, DL, VT)); } @@ -10348,6 +10755,45 @@ SDValue DAGCombiner::visitAssertExt(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitAssertAlign(SDNode *N) { + SDLoc DL(N); + + Align AL = cast<AssertAlignSDNode>(N)->getAlign(); + SDValue N0 = N->getOperand(0); + + // Fold (assertalign (assertalign x, AL0), AL1) -> + // (assertalign x, max(AL0, AL1)) + if (auto *AAN = dyn_cast<AssertAlignSDNode>(N0)) + return DAG.getAssertAlign(DL, N0.getOperand(0), + std::max(AL, AAN->getAlign())); + + // In rare cases, there are trivial arithmetic ops in source operands. Sink + // this assert down to source operands so that those arithmetic ops could be + // exposed to the DAG combining. + switch (N0.getOpcode()) { + default: + break; + case ISD::ADD: + case ISD::SUB: { + unsigned AlignShift = Log2(AL); + SDValue LHS = N0.getOperand(0); + SDValue RHS = N0.getOperand(1); + unsigned LHSAlignShift = DAG.computeKnownBits(LHS).countMinTrailingZeros(); + unsigned RHSAlignShift = DAG.computeKnownBits(RHS).countMinTrailingZeros(); + if (LHSAlignShift >= AlignShift || RHSAlignShift >= AlignShift) { + if (LHSAlignShift < AlignShift) + LHS = DAG.getAssertAlign(DL, LHS, AL); + if (RHSAlignShift < AlignShift) + RHS = DAG.getAssertAlign(DL, RHS, AL); + return DAG.getNode(N0.getOpcode(), DL, N0.getValueType(), LHS, RHS); + } + break; + } + } + + return SDValue(); +} + /// If the result of a wider load is shifted to right of N bits and then /// truncated to a narrower type and where N is a multiple of number of bits of /// the narrower type, transform it to a narrower load from address + N / num of @@ -10428,9 +10874,8 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { } // At this point, we must have a load or else we can't do the transform. - if (!isa<LoadSDNode>(N0)) return SDValue(); - - auto *LN0 = cast<LoadSDNode>(N0); + auto *LN0 = dyn_cast<LoadSDNode>(N0); + if (!LN0) return SDValue(); // Because a SRL must be assumed to *need* to zero-extend the high bits // (as opposed to anyext the high bits), we can't combine the zextload @@ -10449,8 +10894,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { SDNode *Mask = *(SRL->use_begin()); if (Mask->getOpcode() == ISD::AND && isa<ConstantSDNode>(Mask->getOperand(1))) { - const APInt &ShiftMask = - cast<ConstantSDNode>(Mask->getOperand(1))->getAPIntValue(); + const APInt& ShiftMask = Mask->getConstantOperandAPInt(1); if (ShiftMask.isMask()) { EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(), ShiftMask.countTrailingOnes()); @@ -10480,7 +10924,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); // Reducing the width of a volatile load is illegal. For atomics, we may be - // able to reduce the width provided we never widen again. (see D66309) + // able to reduce the width provided we never widen again. (see D66309) if (!LN0->isSimple() || !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt)) return SDValue(); @@ -10561,26 +11005,27 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); - EVT EVT = cast<VTSDNode>(N1)->getVT(); + EVT ExtVT = cast<VTSDNode>(N1)->getVT(); unsigned VTBits = VT.getScalarSizeInBits(); - unsigned EVTBits = EVT.getScalarSizeInBits(); + unsigned ExtVTBits = ExtVT.getScalarSizeInBits(); + // sext_vector_inreg(undef) = 0 because the top bit will all be the same. if (N0.isUndef()) - return DAG.getUNDEF(VT); + return DAG.getConstant(0, SDLoc(N), VT); // fold (sext_in_reg c1) -> c1 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1); // If the input is already sign extended, just drop the extension. - if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1) + if (DAG.ComputeNumSignBits(N0) >= (VTBits - ExtVTBits + 1)) return N0; // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && - EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())) - return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, - N0.getOperand(0), N1); + ExtVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())) + return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0), + N1); // fold (sext_in_reg (sext x)) -> (sext x) // fold (sext_in_reg (aext x)) -> (sext x) @@ -10589,8 +11034,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { SDValue N00 = N0.getOperand(0); unsigned N00Bits = N00.getScalarValueSizeInBits(); - if ((N00Bits <= EVTBits || - (N00Bits - DAG.ComputeNumSignBits(N00)) < EVTBits) && + if ((N00Bits <= ExtVTBits || + (N00Bits - DAG.ComputeNumSignBits(N00)) < ExtVTBits) && (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00); } @@ -10599,7 +11044,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG || N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG || N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) && - N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) { + N0.getOperand(0).getScalarValueSizeInBits() == ExtVTBits) { if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT)) return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, @@ -10610,14 +11055,14 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { // iff we are extending the source sign bit. if (N0.getOpcode() == ISD::ZERO_EXTEND) { SDValue N00 = N0.getOperand(0); - if (N00.getScalarValueSizeInBits() == EVTBits && + if (N00.getScalarValueSizeInBits() == ExtVTBits && (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1); } // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero. - if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1))) - return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType()); + if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, ExtVTBits - 1))) + return DAG.getZeroExtendInReg(N0, SDLoc(N), ExtVT); // fold operands of sext_in_reg based on knowledge that the top bits are not // demanded. @@ -10634,11 +11079,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above. if (N0.getOpcode() == ISD::SRL) { if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1))) - if (ShAmt->getAPIntValue().ule(VTBits - EVTBits)) { + if (ShAmt->getAPIntValue().ule(VTBits - ExtVTBits)) { // We can turn this into an SRA iff the input to the SRL is already sign // extended enough. unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0)); - if (((VTBits - EVTBits) - ShAmt->getZExtValue()) < InSignBits) + if (((VTBits - ExtVTBits) - ShAmt->getZExtValue()) < InSignBits) return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1)); } @@ -10650,14 +11095,14 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { // extends that the target does support. if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && - EVT == cast<LoadSDNode>(N0)->getMemoryVT() && + ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() && ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() && N0.hasOneUse()) || - TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { + TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), EVT, + LN0->getBasePtr(), ExtVT, LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); @@ -10667,13 +11112,13 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse() && - EVT == cast<LoadSDNode>(N0)->getMemoryVT() && + ExtVT == cast<LoadSDNode>(N0)->getMemoryVT() && ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) && - TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { + TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, ExtVT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), EVT, + LN0->getBasePtr(), ExtVT, LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); @@ -10681,11 +11126,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { } // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16)) - if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) { + if (ExtVTBits <= 16 && N0.getOpcode() == ISD::OR) { if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), N0.getOperand(1), false)) - return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, - BSwap, N1); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1); } return SDValue(); @@ -10695,8 +11139,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + // sext_vector_inreg(undef) = 0 because the top bit will all be the same. if (N0.isUndef()) - return DAG.getUNDEF(VT); + return DAG.getConstant(0, SDLoc(N), VT); if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) return Res; @@ -10711,8 +11156,9 @@ SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + // zext_vector_inreg(undef) = 0 because the top bits will be zero. if (N0.isUndef()) - return DAG.getUNDEF(VT); + return DAG.getConstant(0, SDLoc(N), VT); if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) return Res; @@ -10788,13 +11234,12 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SDValue EltNo = N0->getOperand(1); if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) { int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); - EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout()); int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1)); SDLoc DL(N); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy, DAG.getBitcast(NVT, N0.getOperand(0)), - DAG.getConstant(Index, DL, IndexTy)); + DAG.getVectorIdxConstant(Index, DL)); } } @@ -10832,7 +11277,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // Attempt to pre-truncate BUILD_VECTOR sources. if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations && - TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType())) { + TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType()) && + // Avoid creating illegal types if running after type legalizer. + (!LegalTypes || TLI.isTypeLegal(VT.getScalarType()))) { SDLoc DL(N); EVT SVT = VT.getScalarType(); SmallVector<SDValue, 8> TruncOps; @@ -10961,10 +11408,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecSrcVT))) { SDLoc SL(N); - EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout()); unsigned Idx = isLE ? 0 : VecSrcVT.getVectorNumElements() - 1; return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT, VecSrc, - DAG.getConstant(Idx, SL, IdxVT)); + DAG.getVectorIdxConstant(Idx, SL)); } } @@ -11064,14 +11510,14 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { unsigned LD1Bytes = LD1VT.getStoreSize(); if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() && DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) { - unsigned Align = LD1->getAlignment(); - unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment( + Align Alignment = LD1->getAlign(); + Align NewAlign = DAG.getDataLayout().getABITypeAlign( VT.getTypeForEVT(*DAG.getContext())); - if (NewAlign <= Align && + if (NewAlign <= Alignment && (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(), - LD1->getPointerInfo(), Align); + LD1->getPointerInfo(), Alignment); } return SDValue(); @@ -11389,6 +11835,20 @@ SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) { return CombineConsecutiveLoads(N, VT); } +SDValue DAGCombiner::visitFREEZE(SDNode *N) { + SDValue N0 = N->getOperand(0); + + // (freeze (freeze x)) -> (freeze x) + if (N0.getOpcode() == ISD::FREEZE) + return N0; + + // If the input is a constant, return it. + if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) + return N0; + + return SDValue(); +} + /// We know that BV is a build_vector node with Constant, ConstantFP or Undef /// operands. DstEltVT indicates the destination element value type. SDValue DAGCombiner:: @@ -11519,7 +11979,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { const TargetOptions &Options = DAG.getTarget().Options; // Floating-point multiply-add with intermediate rounding. - bool HasFMAD = (LegalOperations && TLI.isFMADLegalForFAddFSub(DAG, N)); + bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N)); // Floating-point multiply-add without intermediate rounding. bool HasFMA = @@ -11532,13 +11992,14 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { SDNodeFlags Flags = N->getFlags(); bool CanFuse = Options.UnsafeFPMath || isContractable(N); + bool CanReassociate = + Options.UnsafeFPMath || N->getFlags().hasAllowReassociation(); bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast || CanFuse || HasFMAD); // If the addition is not contractable, do not combine. if (!AllowFusionGlobally && !isContractable(N)) return SDValue(); - const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo(); if (STI && STI->generateFMAsInMachineCombiner(OptLevel)) return SDValue(); @@ -11573,6 +12034,30 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { N1.getOperand(0), N1.getOperand(1), N0, Flags); } + // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E) + // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E) + // This requires reassociation because it changes the order of operations. + SDValue FMA, E; + if (CanReassociate && N0.getOpcode() == PreferredFusedOpcode && + N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() && + N0.getOperand(2).hasOneUse()) { + FMA = N0; + E = N1; + } else if (CanReassociate && N1.getOpcode() == PreferredFusedOpcode && + N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() && + N1.getOperand(2).hasOneUse()) { + FMA = N1; + E = N0; + } + if (FMA && E) { + SDValue A = FMA.getOperand(0); + SDValue B = FMA.getOperand(1); + SDValue C = FMA.getOperand(2).getOperand(0); + SDValue D = FMA.getOperand(2).getOperand(1); + SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E, Flags); + return DAG.getNode(PreferredFusedOpcode, SL, VT, A, B, CDE, Flags); + } + // Look through FP_EXTEND nodes to do more combining. // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) @@ -11606,33 +12091,6 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { // More folding opportunities when target permits. if (Aggressive) { - // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z)) - if (CanFuse && - N0.getOpcode() == PreferredFusedOpcode && - N0.getOperand(2).getOpcode() == ISD::FMUL && - N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) { - return DAG.getNode(PreferredFusedOpcode, SL, VT, - N0.getOperand(0), N0.getOperand(1), - DAG.getNode(PreferredFusedOpcode, SL, VT, - N0.getOperand(2).getOperand(0), - N0.getOperand(2).getOperand(1), - N1, Flags), Flags); - } - - // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x)) - if (CanFuse && - N1->getOpcode() == PreferredFusedOpcode && - N1.getOperand(2).getOpcode() == ISD::FMUL && - N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) { - return DAG.getNode(PreferredFusedOpcode, SL, VT, - N1.getOperand(0), N1.getOperand(1), - DAG.getNode(PreferredFusedOpcode, SL, VT, - N1.getOperand(2).getOperand(0), - N1.getOperand(2).getOperand(1), - N0, Flags), Flags); - } - - // fold (fadd (fma x, y, (fpext (fmul u, v))), z) // -> (fma x, y, (fma (fpext u), (fpext v), z)) auto FoldFAddFMAFPExtFMul = [&] ( @@ -11736,7 +12194,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { const TargetOptions &Options = DAG.getTarget().Options; // Floating-point multiply-add with intermediate rounding. - bool HasFMAD = (LegalOperations && TLI.isFMADLegalForFAddFSub(DAG, N)); + bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N)); // Floating-point multiply-add without intermediate rounding. bool HasFMA = @@ -11756,13 +12214,13 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { if (!AllowFusionGlobally && !isContractable(N)) return SDValue(); - const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo(); if (STI && STI->generateFMAsInMachineCombiner(OptLevel)) return SDValue(); // Always prefer FMAD to FMA for precision. unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; bool Aggressive = TLI.enableAggressiveFMAFusion(VT); + bool NoSignedZero = Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros(); // Is the node an FMUL and contractable either due to global flags or // SDNodeFlags. @@ -11773,19 +12231,43 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { }; // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) - if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) { - return DAG.getNode(PreferredFusedOpcode, SL, VT, - N0.getOperand(0), N0.getOperand(1), - DAG.getNode(ISD::FNEG, SL, VT, N1), Flags); - } + auto tryToFoldXYSubZ = [&](SDValue XY, SDValue Z) { + if (isContractableFMUL(XY) && (Aggressive || XY->hasOneUse())) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, XY.getOperand(0), + XY.getOperand(1), DAG.getNode(ISD::FNEG, SL, VT, Z), + Flags); + } + return SDValue(); + }; // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) // Note: Commutes FSUB operands. - if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) { - return DAG.getNode(PreferredFusedOpcode, SL, VT, - DAG.getNode(ISD::FNEG, SL, VT, - N1.getOperand(0)), - N1.getOperand(1), N0, Flags); + auto tryToFoldXSubYZ = [&](SDValue X, SDValue YZ) { + if (isContractableFMUL(YZ) && (Aggressive || YZ->hasOneUse())) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, YZ.getOperand(0)), + YZ.getOperand(1), X, Flags); + } + return SDValue(); + }; + + // If we have two choices trying to fold (fsub (fmul u, v), (fmul x, y)), + // prefer to fold the multiply with fewer uses. + if (isContractableFMUL(N0) && isContractableFMUL(N1) && + (N0.getNode()->use_size() > N1.getNode()->use_size())) { + // fold (fsub (fmul a, b), (fmul c, d)) -> (fma (fneg c), d, (fmul a, b)) + if (SDValue V = tryToFoldXSubYZ(N0, N1)) + return V; + // fold (fsub (fmul a, b), (fmul c, d)) -> (fma a, b, (fneg (fmul c, d))) + if (SDValue V = tryToFoldXYSubZ(N0, N1)) + return V; + } else { + // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) + if (SDValue V = tryToFoldXYSubZ(N0, N1)) + return V; + // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) + if (SDValue V = tryToFoldXSubYZ(N0, N1)) + return V; } // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) @@ -11902,7 +12384,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { // -> (fma (fneg y), z, (fma (fneg u), v, x)) if (CanFuse && N1.getOpcode() == PreferredFusedOpcode && isContractableFMUL(N1.getOperand(2)) && - N1->hasOneUse()) { + N1->hasOneUse() && NoSignedZero) { SDValue N20 = N1.getOperand(2).getOperand(0); SDValue N21 = N1.getOperand(2).getOperand(1); return DAG.getNode(PreferredFusedOpcode, SL, VT, @@ -12055,7 +12537,7 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) { // Floating-point multiply-add with intermediate rounding. This can result // in a less precise result due to the changed rounding order. bool HasFMAD = Options.UnsafeFPMath && - (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)); + (LegalOperations && TLI.isFMADLegal(DAG, N)); // No valid opcode, do not combine. if (!HasFMAD && !HasFMA) @@ -12132,6 +12614,9 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { const TargetOptions &Options = DAG.getTarget().Options; const SDNodeFlags Flags = N->getFlags(); + if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags)) + return R; + // fold vector ops if (VT.isVector()) if (SDValue FoldedVOp = SimplifyVBinOp(N)) @@ -12155,18 +12640,16 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { return NewSel; // fold (fadd A, (fneg B)) -> (fsub A, B) - if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && - TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize) == 2) - return DAG.getNode( - ISD::FSUB, DL, VT, N0, - TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags); + if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) + if (SDValue NegN1 = TLI.getCheaperNegatedExpression( + N1, DAG, LegalOperations, ForCodeSize)) + return DAG.getNode(ISD::FSUB, DL, VT, N0, NegN1, Flags); // fold (fadd (fneg A), B) -> (fsub B, A) - if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && - TLI.isNegatibleForFree(N0, DAG, LegalOperations, ForCodeSize) == 2) - return DAG.getNode( - ISD::FSUB, DL, VT, N1, - TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize), Flags); + if (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) + if (SDValue NegN0 = TLI.getCheaperNegatedExpression( + N0, DAG, LegalOperations, ForCodeSize)) + return DAG.getNode(ISD::FSUB, DL, VT, N1, NegN0, Flags); auto isFMulNegTwo = [](SDValue FMul) { if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL) @@ -12311,6 +12794,9 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { const TargetOptions &Options = DAG.getTarget().Options; const SDNodeFlags Flags = N->getFlags(); + if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags)) + return R; + // fold vector ops if (VT.isVector()) if (SDValue FoldedVOp = SimplifyVBinOp(N)) @@ -12345,8 +12831,9 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { if (N0CFP && N0CFP->isZero()) { if (N0CFP->isNegative() || (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) { - if (TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize)) - return TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize); + if (SDValue NegN1 = + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize)) + return NegN1; if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags); } @@ -12364,10 +12851,9 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { } // fold (fsub A, (fneg B)) -> (fadd A, B) - if (TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize)) - return DAG.getNode( - ISD::FADD, DL, VT, N0, - TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags); + if (SDValue NegN1 = + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize)) + return DAG.getNode(ISD::FADD, DL, VT, N0, NegN1, Flags); // FSUB -> FMA combines: if (SDValue Fused = visitFSUBForFMACombine(N)) { @@ -12378,21 +12864,6 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { return SDValue(); } -/// Return true if both inputs are at least as cheap in negated form and at -/// least one input is strictly cheaper in negated form. -bool DAGCombiner::isCheaperToUseNegatedFPOps(SDValue X, SDValue Y) { - if (char LHSNeg = - TLI.isNegatibleForFree(X, DAG, LegalOperations, ForCodeSize)) - if (char RHSNeg = - TLI.isNegatibleForFree(Y, DAG, LegalOperations, ForCodeSize)) - // Both negated operands are at least as cheap as their counterparts. - // Check to see if at least one is cheaper negated. - if (LHSNeg == 2 || RHSNeg == 2) - return true; - - return false; -} - SDValue DAGCombiner::visitFMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -12403,6 +12874,9 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { const TargetOptions &Options = DAG.getTarget().Options; const SDNodeFlags Flags = N->getFlags(); + if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags)) + return R; + // fold vector ops if (VT.isVector()) { // This just handles C1 * C2 for vectors. Other vector folds are below. @@ -12464,13 +12938,18 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { return DAG.getNode(ISD::FNEG, DL, VT, N0); // -N0 * -N1 --> N0 * N1 - if (isCheaperToUseNegatedFPOps(N0, N1)) { - SDValue NegN0 = - TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize); - SDValue NegN1 = - TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize); + TargetLowering::NegatibleCost CostN0 = + TargetLowering::NegatibleCost::Expensive; + TargetLowering::NegatibleCost CostN1 = + TargetLowering::NegatibleCost::Expensive; + SDValue NegN0 = + TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0); + SDValue NegN1 = + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1); + if (NegN0 && NegN1 && + (CostN0 == TargetLowering::NegatibleCost::Cheaper || + CostN1 == TargetLowering::NegatibleCost::Cheaper)) return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1, Flags); - } // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X)) // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X) @@ -12549,13 +13028,18 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { } // (-N0 * -N1) + N2 --> (N0 * N1) + N2 - if (isCheaperToUseNegatedFPOps(N0, N1)) { - SDValue NegN0 = - TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize); - SDValue NegN1 = - TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize); + TargetLowering::NegatibleCost CostN0 = + TargetLowering::NegatibleCost::Expensive; + TargetLowering::NegatibleCost CostN1 = + TargetLowering::NegatibleCost::Expensive; + SDValue NegN0 = + TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0); + SDValue NegN1 = + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1); + if (NegN0 && NegN1 && + (CostN0 == TargetLowering::NegatibleCost::Cheaper || + CostN1 == TargetLowering::NegatibleCost::Cheaper)) return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2, Flags); - } if (UnsafeFPMath) { if (N0CFP && N0CFP->isZero()) @@ -12641,13 +13125,10 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { // fold ((fma (fneg X), Y, (fneg Z)) -> fneg (fma X, Y, Z)) // fold ((fma X, (fneg Y), (fneg Z)) -> fneg (fma X, Y, Z)) - if (!TLI.isFNegFree(VT) && - TLI.isNegatibleForFree(SDValue(N, 0), DAG, LegalOperations, - ForCodeSize) == 2) - return DAG.getNode(ISD::FNEG, DL, VT, - TLI.getNegatedExpression(SDValue(N, 0), DAG, - LegalOperations, ForCodeSize), - Flags); + if (!TLI.isFNegFree(VT)) + if (SDValue Neg = TLI.getCheaperNegatedExpression( + SDValue(N, 0), DAG, LegalOperations, ForCodeSize)) + return DAG.getNode(ISD::FNEG, DL, VT, Neg, Flags); return SDValue(); } @@ -12664,7 +13145,7 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) { // that only minsize should restrict this. bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath; const SDNodeFlags Flags = N->getFlags(); - if (!UnsafeMath && !Flags.hasAllowReciprocal()) + if (LegalDAG || (!UnsafeMath && !Flags.hasAllowReciprocal())) return SDValue(); // Skip if current node is a reciprocal/fneg-reciprocal. @@ -12735,6 +13216,9 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { const TargetOptions &Options = DAG.getTarget().Options; SDNodeFlags Flags = N->getFlags(); + if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags)) + return R; + // fold vector ops if (VT.isVector()) if (SDValue FoldedVOp = SimplifyVBinOp(N)) @@ -12794,37 +13278,62 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { } else if (N1.getOpcode() == ISD::FMUL) { // Look through an FMUL. Even though this won't remove the FDIV directly, // it's still worthwhile to get rid of the FSQRT if possible. - SDValue SqrtOp; - SDValue OtherOp; + SDValue Sqrt, Y; if (N1.getOperand(0).getOpcode() == ISD::FSQRT) { - SqrtOp = N1.getOperand(0); - OtherOp = N1.getOperand(1); + Sqrt = N1.getOperand(0); + Y = N1.getOperand(1); } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) { - SqrtOp = N1.getOperand(1); - OtherOp = N1.getOperand(0); + Sqrt = N1.getOperand(1); + Y = N1.getOperand(0); } - if (SqrtOp.getNode()) { + if (Sqrt.getNode()) { + // If the other multiply operand is known positive, pull it into the + // sqrt. That will eliminate the division if we convert to an estimate: + // X / (fabs(A) * sqrt(Z)) --> X / sqrt(A*A*Z) --> X * rsqrt(A*A*Z) + // TODO: Also fold the case where A == Z (fabs is missing). + if (Flags.hasAllowReassociation() && N1.hasOneUse() && + N1->getFlags().hasAllowReassociation() && Sqrt.hasOneUse() && + Y.getOpcode() == ISD::FABS && Y.hasOneUse()) { + SDValue AA = DAG.getNode(ISD::FMUL, DL, VT, Y.getOperand(0), + Y.getOperand(0), Flags); + SDValue AAZ = + DAG.getNode(ISD::FMUL, DL, VT, AA, Sqrt.getOperand(0), Flags); + if (SDValue Rsqrt = buildRsqrtEstimate(AAZ, Flags)) + return DAG.getNode(ISD::FMUL, DL, VT, N0, Rsqrt, Flags); + + // Estimate creation failed. Clean up speculatively created nodes. + recursivelyDeleteUnusedNodes(AAZ.getNode()); + } + // We found a FSQRT, so try to make this fold: - // x / (y * sqrt(z)) -> x * (rsqrt(z) / y) - if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) { - RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags); - AddToWorklist(RV.getNode()); - return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); + // X / (Y * sqrt(Z)) -> X * (rsqrt(Z) / Y) + if (SDValue Rsqrt = buildRsqrtEstimate(Sqrt.getOperand(0), Flags)) { + SDValue Div = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, Rsqrt, Y, Flags); + AddToWorklist(Div.getNode()); + return DAG.getNode(ISD::FMUL, DL, VT, N0, Div, Flags); } } } // Fold into a reciprocal estimate and multiply instead of a real divide. - if (SDValue RV = BuildDivEstimate(N0, N1, Flags)) - return RV; + if (Options.NoInfsFPMath || Flags.hasNoInfs()) + if (SDValue RV = BuildDivEstimate(N0, N1, Flags)) + return RV; } // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) - if (isCheaperToUseNegatedFPOps(N0, N1)) - return DAG.getNode( - ISD::FDIV, SDLoc(N), VT, - TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize), - TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags); + TargetLowering::NegatibleCost CostN0 = + TargetLowering::NegatibleCost::Expensive; + TargetLowering::NegatibleCost CostN1 = + TargetLowering::NegatibleCost::Expensive; + SDValue NegN0 = + TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0); + SDValue NegN1 = + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1); + if (NegN0 && NegN1 && + (CostN0 == TargetLowering::NegatibleCost::Cheaper || + CostN1 == TargetLowering::NegatibleCost::Cheaper)) + return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1, Flags); return SDValue(); } @@ -12835,6 +13344,10 @@ SDValue DAGCombiner::visitFREM(SDNode *N) { ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); EVT VT = N->getValueType(0); + SDNodeFlags Flags = N->getFlags(); + + if (SDValue R = DAG.simplifyFPBinop(N->getOpcode(), N0, N1, Flags)) + return R; // fold (frem c1, c2) -> fmod(c1,c2) if (N0CFP && N1CFP) @@ -12848,8 +13361,12 @@ SDValue DAGCombiner::visitFREM(SDNode *N) { SDValue DAGCombiner::visitFSQRT(SDNode *N) { SDNodeFlags Flags = N->getFlags(); - if (!DAG.getTarget().Options.UnsafeFPMath && - !Flags.hasApproximateFuncs()) + const TargetOptions &Options = DAG.getTarget().Options; + + // Require 'ninf' flag since sqrt(+Inf) = +Inf, but the estimation goes as: + // sqrt(+Inf) == rsqrt(+Inf) * +Inf = 0 * +Inf = NaN + if ((!Options.UnsafeFPMath && !Flags.hasApproximateFuncs()) || + (!Options.NoInfsFPMath && !Flags.hasNoInfs())) return SDValue(); SDValue N0 = N->getOperand(0); @@ -13061,33 +13578,24 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { } // The next optimizations are desirable only if SELECT_CC can be lowered. - if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) { - // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) - if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 && - !VT.isVector() && - (!LegalOperations || - TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) { - SDLoc DL(N); - SDValue Ops[] = - { N0.getOperand(0), N0.getOperand(1), - DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT), - N0.getOperand(2) }; - return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); - } + // fold (sint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), -1.0, 0.0) + if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 && + !VT.isVector() && + (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) { + SDLoc DL(N); + return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(-1.0, DL, VT), + DAG.getConstantFP(0.0, DL, VT)); + } - // fold (sint_to_fp (zext (setcc x, y, cc))) -> - // (select_cc x, y, 1.0, 0.0,, cc) - if (N0.getOpcode() == ISD::ZERO_EXTEND && - N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() && - (!LegalOperations || - TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) { - SDLoc DL(N); - SDValue Ops[] = - { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1), - DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT), - N0.getOperand(0).getOperand(2) }; - return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); - } + // fold (sint_to_fp (zext (setcc x, y, cc))) -> + // (select (setcc x, y, cc), 1.0, 0.0) + if (N0.getOpcode() == ISD::ZERO_EXTEND && + N0.getOperand(0).getOpcode() == ISD::SETCC && !VT.isVector() && + (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) { + SDLoc DL(N); + return DAG.getSelect(DL, VT, N0.getOperand(0), + DAG.getConstantFP(1.0, DL, VT), + DAG.getConstantFP(0.0, DL, VT)); } if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI)) @@ -13121,19 +13629,12 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0); } - // The next optimizations are desirable only if SELECT_CC can be lowered. - if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) { - // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) - if (N0.getOpcode() == ISD::SETCC && !VT.isVector() && - (!LegalOperations || - TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) { - SDLoc DL(N); - SDValue Ops[] = - { N0.getOperand(0), N0.getOperand(1), - DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT), - N0.getOperand(2) }; - return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); - } + // fold (uint_to_fp (setcc x, y, cc)) -> (select (setcc x, y, cc), 1.0, 0.0) + if (N0.getOpcode() == ISD::SETCC && !VT.isVector() && + (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) { + SDLoc DL(N); + return DAG.getSelect(DL, VT, N0, DAG.getConstantFP(1.0, DL, VT), + DAG.getConstantFP(0.0, DL, VT)); } if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI)) @@ -13378,12 +13879,14 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0); - if (TLI.isNegatibleForFree(N0, DAG, LegalOperations, ForCodeSize)) - return TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize); + if (SDValue NegN0 = + TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize)) + return NegN0; - // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 FIXME: This is - // duplicated in isNegatibleForFree, but isNegatibleForFree doesn't know it - // was called from a context with a nsz flag if the input fsub does not. + // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 + // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't + // know it was called from a context with a nsz flag if the input fsub does + // not. if (N0.getOpcode() == ISD::FSUB && (DAG.getTarget().Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros()) && N0.hasOneUse()) { @@ -13539,8 +14042,12 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { } if (N1.hasOneUse()) { + // rebuildSetCC calls visitXor which may change the Chain when there is a + // STRICT_FSETCC/STRICT_FSETCCS involved. Use a handle to track changes. + HandleSDNode ChainHandle(Chain); if (SDValue NewN1 = rebuildSetCC(N1)) - return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, NewN1, N2); + return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, + ChainHandle.getValue(), NewN1, N2); } return SDValue(); @@ -13592,8 +14099,8 @@ SDValue DAGCombiner::rebuildSetCC(SDValue N) { } } - // Transform br(xor(x, y)) -> br(x != y) - // Transform br(xor(xor(x,y), 1)) -> br (x == y) + // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne)) + // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq)) if (N.getOpcode() == ISD::XOR) { // Because we may call this on a speculatively constructed // SimplifiedSetCC Node, we need to simplify this node first. @@ -13617,16 +14124,17 @@ SDValue DAGCombiner::rebuildSetCC(SDValue N) { if (N.getOpcode() != ISD::XOR) return N; - SDNode *TheXor = N.getNode(); - - SDValue Op0 = TheXor->getOperand(0); - SDValue Op1 = TheXor->getOperand(1); + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) { bool Equal = false; - if (isOneConstant(Op0) && Op0.hasOneUse() && - Op0.getOpcode() == ISD::XOR) { - TheXor = Op0.getNode(); + // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq)) + if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR && + Op0.getValueType() == MVT::i1) { + N = Op0; + Op0 = N->getOperand(0); + Op1 = N->getOperand(1); Equal = true; } @@ -13634,7 +14142,7 @@ SDValue DAGCombiner::rebuildSetCC(SDValue N) { if (LegalTypes) SetCCVT = getSetCCResultType(SetCCVT); // Replace the uses of XOR with SETCC - return DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1, + return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1, Equal ? ISD::SETEQ : ISD::SETNE); } } @@ -13994,118 +14502,142 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { return true; } -/// Try to combine a load/store with a add/sub of the base pointer node into a -/// post-indexed load/store. The transformation folded the add/subtract into the -/// new indexed load/store effectively and all of its uses are redirected to the -/// new load/store. -bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { - if (Level < AfterLegalizeDAG) +static bool shouldCombineToPostInc(SDNode *N, SDValue Ptr, SDNode *PtrUse, + SDValue &BasePtr, SDValue &Offset, + ISD::MemIndexedMode &AM, + SelectionDAG &DAG, + const TargetLowering &TLI) { + if (PtrUse == N || + (PtrUse->getOpcode() != ISD::ADD && PtrUse->getOpcode() != ISD::SUB)) return false; - bool IsLoad = true; - bool IsMasked = false; - SDValue Ptr; - if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad, IsMasked, - Ptr, TLI)) + if (!TLI.getPostIndexedAddressParts(N, PtrUse, BasePtr, Offset, AM, DAG)) return false; - if (Ptr.getNode()->hasOneUse()) + // Don't create a indexed load / store with zero offset. + if (isNullConstant(Offset)) return false; - for (SDNode *Op : Ptr.getNode()->uses()) { - if (Op == N || - (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)) - continue; + if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr)) + return false; - SDValue BasePtr; - SDValue Offset; - ISD::MemIndexedMode AM = ISD::UNINDEXED; - if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) { - // Don't create a indexed load / store with zero offset. - if (isNullConstant(Offset)) - continue; + SmallPtrSet<const SDNode *, 32> Visited; + for (SDNode *Use : BasePtr.getNode()->uses()) { + if (Use == Ptr.getNode()) + continue; - // Try turning it into a post-indexed load / store except when - // 1) All uses are load / store ops that use it as base ptr (and - // it may be folded as addressing mmode). - // 2) Op must be independent of N, i.e. Op is neither a predecessor - // nor a successor of N. Otherwise, if Op is folded that would - // create a cycle. + // No if there's a later user which could perform the index instead. + if (isa<MemSDNode>(Use)) { + bool IsLoad = true; + bool IsMasked = false; + SDValue OtherPtr; + if (getCombineLoadStoreParts(Use, ISD::POST_INC, ISD::POST_DEC, IsLoad, + IsMasked, OtherPtr, TLI)) { + SmallVector<const SDNode *, 2> Worklist; + Worklist.push_back(Use); + if (SDNode::hasPredecessorHelper(N, Visited, Worklist)) + return false; + } + } - if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr)) - continue; + // If all the uses are load / store addresses, then don't do the + // transformation. + if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) { + for (SDNode *UseUse : Use->uses()) + if (canFoldInAddressingMode(Use, UseUse, DAG, TLI)) + return false; + } + } + return true; +} - // Check for #1. - bool TryNext = false; - for (SDNode *Use : BasePtr.getNode()->uses()) { - if (Use == Ptr.getNode()) - continue; +static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad, + bool &IsMasked, SDValue &Ptr, + SDValue &BasePtr, SDValue &Offset, + ISD::MemIndexedMode &AM, + SelectionDAG &DAG, + const TargetLowering &TLI) { + if (!getCombineLoadStoreParts(N, ISD::POST_INC, ISD::POST_DEC, IsLoad, + IsMasked, Ptr, TLI) || + Ptr.getNode()->hasOneUse()) + return nullptr; + + // Try turning it into a post-indexed load / store except when + // 1) All uses are load / store ops that use it as base ptr (and + // it may be folded as addressing mmode). + // 2) Op must be independent of N, i.e. Op is neither a predecessor + // nor a successor of N. Otherwise, if Op is folded that would + // create a cycle. + for (SDNode *Op : Ptr->uses()) { + // Check for #1. + if (!shouldCombineToPostInc(N, Ptr, Op, BasePtr, Offset, AM, DAG, TLI)) + continue; - // If all the uses are load / store addresses, then don't do the - // transformation. - if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB) { - bool RealUse = false; - for (SDNode *UseUse : Use->uses()) { - if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI)) - RealUse = true; - } + // Check for #2. + SmallPtrSet<const SDNode *, 32> Visited; + SmallVector<const SDNode *, 8> Worklist; + // Ptr is predecessor to both N and Op. + Visited.insert(Ptr.getNode()); + Worklist.push_back(N); + Worklist.push_back(Op); + if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) && + !SDNode::hasPredecessorHelper(Op, Visited, Worklist)) + return Op; + } + return nullptr; +} - if (!RealUse) { - TryNext = true; - break; - } - } - } +/// Try to combine a load/store with a add/sub of the base pointer node into a +/// post-indexed load/store. The transformation folded the add/subtract into the +/// new indexed load/store effectively and all of its uses are redirected to the +/// new load/store. +bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { + if (Level < AfterLegalizeDAG) + return false; - if (TryNext) - continue; + bool IsLoad = true; + bool IsMasked = false; + SDValue Ptr; + SDValue BasePtr; + SDValue Offset; + ISD::MemIndexedMode AM = ISD::UNINDEXED; + SDNode *Op = getPostIndexedLoadStoreOp(N, IsLoad, IsMasked, Ptr, BasePtr, + Offset, AM, DAG, TLI); + if (!Op) + return false; - // Check for #2. - SmallPtrSet<const SDNode *, 32> Visited; - SmallVector<const SDNode *, 8> Worklist; - // Ptr is predecessor to both N and Op. - Visited.insert(Ptr.getNode()); - Worklist.push_back(N); - Worklist.push_back(Op); - if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) && - !SDNode::hasPredecessorHelper(Op, Visited, Worklist)) { - SDValue Result; - if (!IsMasked) - Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, - Offset, AM) - : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), + SDValue Result; + if (!IsMasked) + Result = IsLoad ? DAG.getIndexedLoad(SDValue(N, 0), SDLoc(N), BasePtr, + Offset, AM) + : DAG.getIndexedStore(SDValue(N, 0), SDLoc(N), + BasePtr, Offset, AM); + else + Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), + BasePtr, Offset, AM) + : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), BasePtr, Offset, AM); - else - Result = IsLoad ? DAG.getIndexedMaskedLoad(SDValue(N, 0), SDLoc(N), - BasePtr, Offset, AM) - : DAG.getIndexedMaskedStore(SDValue(N, 0), SDLoc(N), - BasePtr, Offset, AM); - ++PostIndexedNodes; - ++NodesCombined; - LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); - dbgs() << "\nWith: "; Result.getNode()->dump(&DAG); - dbgs() << '\n'); - WorklistRemover DeadNodes(*this); - if (IsLoad) { - DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); - DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); - } else { - DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1)); - } - - // Finally, since the node is now dead, remove it from the graph. - deleteAndRecombine(N); - - // Replace the uses of Use with uses of the updated base value. - DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0), - Result.getValue(IsLoad ? 1 : 0)); - deleteAndRecombine(Op); - return true; - } - } + ++PostIndexedNodes; + ++NodesCombined; + LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG); + dbgs() << "\nWith: "; Result.getNode()->dump(&DAG); + dbgs() << '\n'); + WorklistRemover DeadNodes(*this); + if (IsLoad) { + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); + } else { + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1)); } - return false; + // Finally, since the node is now dead, remove it from the graph. + deleteAndRecombine(N); + + // Replace the uses of Use with uses of the updated base value. + DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0), + Result.getValue(IsLoad ? 1 : 0)); + deleteAndRecombine(Op); + return true; } /// Return the base-pointer arithmetic from an indexed \p LD. @@ -14222,11 +14754,11 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) { auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue { if (LD->isIndexed()) { - bool IsSub = (LD->getAddressingMode() == ISD::PRE_DEC || - LD->getAddressingMode() == ISD::POST_DEC); - unsigned Opc = IsSub ? ISD::SUB : ISD::ADD; - SDValue Idx = DAG.getNode(Opc, SDLoc(LD), LD->getOperand(1).getValueType(), - LD->getOperand(1), LD->getOperand(2)); + // Cannot handle opaque target constants and we must respect the user's + // request not to split indexes from loads. + if (!canSplitIdx(LD)) + return SDValue(); + SDValue Idx = SplitIndexingFromLoad(LD); SDValue Ops[] = {Val, Idx, Chain}; return CombineTo(LD, Ops, 3); } @@ -14322,14 +14854,12 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // the indexing into an add/sub directly (that TargetConstant may not be // valid for a different type of node, and we cannot convert an opaque // target constant into a regular constant). - bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant && - cast<ConstantSDNode>(LD->getOperand(2))->isOpaque(); + bool CanSplitIdx = canSplitIdx(LD); - if (!N->hasAnyUseOfValue(0) && - ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) { + if (!N->hasAnyUseOfValue(0) && (CanSplitIdx || !N->hasAnyUseOfValue(1))) { SDValue Undef = DAG.getUNDEF(N->getValueType(0)); SDValue Index; - if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) { + if (N->hasAnyUseOfValue(1) && CanSplitIdx) { Index = SplitIndexingFromLoad(LD); // Try to fold the base pointer arithmetic into subsequent loads and // stores. @@ -14356,11 +14886,12 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // Try to infer better alignment information than the load already has. if (OptLevel != CodeGenOpt::None && LD->isUnindexed() && !LD->isAtomic()) { - if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { - if (Align > LD->getAlignment() && LD->getSrcValueOffset() % Align == 0) { + if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) { + if (*Alignment > LD->getAlign() && + isAligned(*Alignment, LD->getSrcValueOffset())) { SDValue NewLoad = DAG.getExtLoad( LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr, - LD->getPointerInfo(), LD->getMemoryVT(), Align, + LD->getPointerInfo(), LD->getMemoryVT(), *Alignment, LD->getMemOperand()->getFlags(), LD->getAAInfo()); // NewLoad will always be N as we are only refining the alignment assert(NewLoad.getNode() == N); @@ -14557,11 +15088,11 @@ struct LoadedSlice { } /// Get the alignment of the load used for this slice. - unsigned getAlignment() const { - unsigned Alignment = Origin->getAlignment(); + Align getAlign() const { + Align Alignment = Origin->getAlign(); uint64_t Offset = getOffsetFromBase(); if (Offset != 0) - Alignment = MinAlign(Alignment, Alignment + Offset); + Alignment = commonAlignment(Alignment, Alignment.value() + Offset); return Alignment; } @@ -14657,8 +15188,8 @@ struct LoadedSlice { // Create the load for the slice. SDValue LastInst = DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr, - Origin->getPointerInfo().getWithOffset(Offset), - getAlignment(), Origin->getMemOperand()->getFlags()); + Origin->getPointerInfo().getWithOffset(Offset), getAlign(), + Origin->getMemOperand()->getFlags()); // If the final type is not the same as the loaded type, this means that // we have to pad with zero. Create a zero extend for that. EVT FinalType = Inst->getValueType(0); @@ -14699,10 +15230,10 @@ struct LoadedSlice { // Check if it will be merged with the load. // 1. Check the alignment constraint. - unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment( + Align RequiredAlignment = DAG->getDataLayout().getABITypeAlign( ResVT.getTypeForEVT(*DAG->getContext())); - if (RequiredAlignment > getAlignment()) + if (RequiredAlignment > getAlign()) return false; // 2. Check that the load is a legal operation for that type. @@ -14788,14 +15319,14 @@ static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices, continue; // Check if the target supplies paired loads for this type. - unsigned RequiredAlignment = 0; + Align RequiredAlignment; if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) { // move to the next pair, this type is hopeless. Second = nullptr; continue; } // Check if we meet the alignment requirement. - if (RequiredAlignment > First->getAlignment()) + if (First->getAlign() < RequiredAlignment) continue; // Check that both loads are next to each other in memory. @@ -14868,6 +15399,12 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) { !LD->getValueType(0).isInteger()) return false; + // The algorithm to split up a load of a scalable vector into individual + // elements currently requires knowing the length of the loaded type, + // so will need adjusting to work on scalable vectors. + if (LD->getValueType(0).isScalableVector()) + return false; + // Keep track of already used bits to detect overlapping values. // In that case, we will just abort the transformation. APInt UsedBits(LD->getValueSizeInBits(0), 0); @@ -15112,7 +15649,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { // Y is known to provide just those bytes. If so, we try to replace the // load + replace + store sequence with a single (narrower) store, which makes // the load dead. - if (Opc == ISD::OR) { + if (Opc == ISD::OR && EnableShrinkLoadReplaceStoreWithStore) { std::pair<unsigned, unsigned> MaskedLoad; MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain); if (MaskedLoad.first) @@ -15128,6 +15665,9 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { return NewST; } + if (!EnableReduceLoadOpStoreWidth) + return SDValue(); + if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) || Value.getOperand(1).getOpcode() != ISD::Constant) return SDValue(); @@ -15181,9 +15721,9 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { if (DAG.getDataLayout().isBigEndian()) PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff; - unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff); + Align NewAlign = commonAlignment(LD->getAlign(), PtrOff); Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext()); - if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy)) + if (NewAlign < DAG.getDataLayout().getABITypeAlign(NewVTTy)) return SDValue(); SDValue NewPtr = DAG.getMemBasePlusOffset(Ptr, PtrOff, SDLoc(LD)); @@ -15229,17 +15769,24 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { ST->getPointerInfo().getAddrSpace() != 0) return SDValue(); - EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); + TypeSize VTSize = VT.getSizeInBits(); + + // We don't know the size of scalable types at compile time so we cannot + // create an integer of the equivalent size. + if (VTSize.isScalable()) + return SDValue(); + + EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize()); if (!TLI.isOperationLegal(ISD::LOAD, IntVT) || !TLI.isOperationLegal(ISD::STORE, IntVT) || !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) || !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT)) return SDValue(); - unsigned LDAlign = LD->getAlignment(); - unsigned STAlign = ST->getAlignment(); + Align LDAlign = LD->getAlign(); + Align STAlign = ST->getAlign(); Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext()); - unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy); + Align ABIAlign = DAG.getDataLayout().getABITypeAlign(IntVTTy); if (LDAlign < ABIAlign || STAlign < ABIAlign) return SDValue(); @@ -15356,7 +15903,7 @@ SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes, return DAG.getTokenFactor(StoreDL, Chains); } -bool DAGCombiner::MergeStoresOfConstantsOrVecElts( +bool DAGCombiner::mergeStoresOfConstantsOrVecElts( SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores, bool IsConstantSrc, bool UseVector, bool UseTrunc) { // Make sure we have something to merge. @@ -15530,14 +16077,12 @@ void DAGCombiner::getStoreMergeCandidates( if (BasePtr.getBase().isUndef()) return; - bool IsConstantSrc = isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val); - bool IsExtractVecSrc = (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT || - Val.getOpcode() == ISD::EXTRACT_SUBVECTOR); - bool IsLoadSrc = isa<LoadSDNode>(Val); + StoreSource StoreSrc = getStoreSource(Val); + assert(StoreSrc != StoreSource::Unknown && "Expected known source for store"); BaseIndexOffset LBasePtr; // Match on loadbaseptr if relevant. EVT LoadVT; - if (IsLoadSrc) { + if (StoreSrc == StoreSource::Load) { auto *Ld = cast<LoadSDNode>(Val); LBasePtr = BaseIndexOffset::match(Ld, DAG); LoadVT = Ld->getMemoryVT(); @@ -15565,7 +16110,7 @@ void DAGCombiner::getStoreMergeCandidates( // Allow merging constants of different types as integers. bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT()) : Other->getMemoryVT() != MemVT; - if (IsLoadSrc) { + if (StoreSrc == StoreSource::Load) { if (NoTypeMatch) return false; // The Load's Base Ptr must also match @@ -15589,13 +16134,13 @@ void DAGCombiner::getStoreMergeCandidates( } else return false; } - if (IsConstantSrc) { + if (StoreSrc == StoreSource::Constant) { if (NoTypeMatch) return false; if (!(isa<ConstantSDNode>(OtherBC) || isa<ConstantFPSDNode>(OtherBC))) return false; } - if (IsExtractVecSrc) { + if (StoreSrc == StoreSource::Extract) { // Do not merge truncated stores here. if (Other->isTruncatingStore()) return false; @@ -15736,77 +16281,22 @@ bool DAGCombiner::checkMergeStoreCandidatesForDependencies( return true; } -bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { - if (OptLevel == CodeGenOpt::None || !EnableStoreMerging) - return false; - - EVT MemVT = St->getMemoryVT(); - int64_t ElementSizeBytes = MemVT.getStoreSize(); - unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1; - - if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits) - return false; - - bool NoVectors = DAG.getMachineFunction().getFunction().hasFnAttribute( - Attribute::NoImplicitFloat); - - // This function cannot currently deal with non-byte-sized memory sizes. - if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits()) - return false; - - if (!MemVT.isSimple()) - return false; - - // Perform an early exit check. Do not bother looking at stored values that - // are not constants, loads, or extracted vector elements. - SDValue StoredVal = peekThroughBitcasts(St->getValue()); - bool IsLoadSrc = isa<LoadSDNode>(StoredVal); - bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) || - isa<ConstantFPSDNode>(StoredVal); - bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT || - StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR); - bool IsNonTemporalStore = St->isNonTemporal(); - bool IsNonTemporalLoad = - IsLoadSrc && cast<LoadSDNode>(StoredVal)->isNonTemporal(); - - if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc) - return false; - - SmallVector<MemOpLink, 8> StoreNodes; - SDNode *RootNode; - // Find potential store merge candidates by searching through chain sub-DAG - getStoreMergeCandidates(St, StoreNodes, RootNode); - - // Check if there is anything to merge. - if (StoreNodes.size() < 2) - return false; - - // Sort the memory operands according to their distance from the - // base pointer. - llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) { - return LHS.OffsetFromBase < RHS.OffsetFromBase; - }); - - // Store Merge attempts to merge the lowest stores. This generally - // works out as if successful, as the remaining stores are checked - // after the first collection of stores is merged. However, in the - // case that a non-mergeable store is found first, e.g., {p[-2], - // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent - // mergeable cases. To prevent this, we prune such stores from the - // front of StoreNodes here. - - bool RV = false; - while (StoreNodes.size() > 1) { +unsigned +DAGCombiner::getConsecutiveStores(SmallVectorImpl<MemOpLink> &StoreNodes, + int64_t ElementSizeBytes) const { + while (true) { + // Find a store past the width of the first store. size_t StartIdx = 0; while ((StartIdx + 1 < StoreNodes.size()) && StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes != - StoreNodes[StartIdx + 1].OffsetFromBase) + StoreNodes[StartIdx + 1].OffsetFromBase) ++StartIdx; // Bail if we don't have enough candidates to merge. if (StartIdx + 1 >= StoreNodes.size()) - return RV; + return 0; + // Trim stores that overlapped with the first store. if (StartIdx) StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx); @@ -15822,302 +16312,345 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { break; NumConsecutiveStores = i + 1; } + if (NumConsecutiveStores > 1) + return NumConsecutiveStores; - if (NumConsecutiveStores < 2) { - StoreNodes.erase(StoreNodes.begin(), - StoreNodes.begin() + NumConsecutiveStores); - continue; - } - - // The node with the lowest store address. - LLVMContext &Context = *DAG.getContext(); - const DataLayout &DL = DAG.getDataLayout(); - - // Store the constants into memory as one consecutive store. - if (IsConstantSrc) { - while (NumConsecutiveStores >= 2) { - LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; - unsigned FirstStoreAS = FirstInChain->getAddressSpace(); - unsigned FirstStoreAlign = FirstInChain->getAlignment(); - unsigned LastLegalType = 1; - unsigned LastLegalVectorType = 1; - bool LastIntegerTrunc = false; - bool NonZero = false; - unsigned FirstZeroAfterNonZero = NumConsecutiveStores; - for (unsigned i = 0; i < NumConsecutiveStores; ++i) { - StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode); - SDValue StoredVal = ST->getValue(); - bool IsElementZero = false; - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) - IsElementZero = C->isNullValue(); - else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) - IsElementZero = C->getConstantFPValue()->isNullValue(); - if (IsElementZero) { - if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores) - FirstZeroAfterNonZero = i; - } - NonZero |= !IsElementZero; - - // Find a legal type for the constant store. - unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; - EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits); - bool IsFast = false; + // There are no consecutive stores at the start of the list. + // Remove the first store and try again. + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 1); + } +} - // Break early when size is too large to be legal. - if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits) - break; +bool DAGCombiner::tryStoreMergeOfConstants( + SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores, + EVT MemVT, SDNode *RootNode, bool AllowVectors) { + LLVMContext &Context = *DAG.getContext(); + const DataLayout &DL = DAG.getDataLayout(); + int64_t ElementSizeBytes = MemVT.getStoreSize(); + unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1; + bool MadeChange = false; + + // Store the constants into memory as one consecutive store. + while (NumConsecutiveStores >= 2) { + LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; + unsigned FirstStoreAS = FirstInChain->getAddressSpace(); + unsigned FirstStoreAlign = FirstInChain->getAlignment(); + unsigned LastLegalType = 1; + unsigned LastLegalVectorType = 1; + bool LastIntegerTrunc = false; + bool NonZero = false; + unsigned FirstZeroAfterNonZero = NumConsecutiveStores; + for (unsigned i = 0; i < NumConsecutiveStores; ++i) { + StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode); + SDValue StoredVal = ST->getValue(); + bool IsElementZero = false; + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) + IsElementZero = C->isNullValue(); + else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) + IsElementZero = C->getConstantFPValue()->isNullValue(); + if (IsElementZero) { + if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores) + FirstZeroAfterNonZero = i; + } + NonZero |= !IsElementZero; - if (TLI.isTypeLegal(StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && - TLI.allowsMemoryAccess(Context, DL, StoreTy, - *FirstInChain->getMemOperand(), &IsFast) && - IsFast) { - LastIntegerTrunc = false; - LastLegalType = i + 1; - // Or check whether a truncstore is legal. - } else if (TLI.getTypeAction(Context, StoreTy) == - TargetLowering::TypePromoteInteger) { - EVT LegalizedStoredValTy = - TLI.getTypeToTransformTo(Context, StoredVal.getValueType()); - if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) && - TLI.allowsMemoryAccess(Context, DL, StoreTy, - *FirstInChain->getMemOperand(), - &IsFast) && - IsFast) { - LastIntegerTrunc = true; - LastLegalType = i + 1; - } - } + // Find a legal type for the constant store. + unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; + EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits); + bool IsFast = false; - // We only use vectors if the constant is known to be zero or the - // target allows it and the function is not marked with the - // noimplicitfloat attribute. - if ((!NonZero || - TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) && - !NoVectors) { - // Find a legal type for the vector store. - unsigned Elts = (i + 1) * NumMemElts; - EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); - if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) && - TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && - TLI.allowsMemoryAccess( - Context, DL, Ty, *FirstInChain->getMemOperand(), &IsFast) && - IsFast) - LastLegalVectorType = i + 1; - } - } + // Break early when size is too large to be legal. + if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits) + break; - bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors; - unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType; - - // Check if we found a legal integer type that creates a meaningful - // merge. - if (NumElem < 2) { - // We know that candidate stores are in order and of correct - // shape. While there is no mergeable sequence from the - // beginning one may start later in the sequence. The only - // reason a merge of size N could have failed where another of - // the same size would not have, is if the alignment has - // improved or we've dropped a non-zero value. Drop as many - // candidates as we can here. - unsigned NumSkip = 1; - while ( - (NumSkip < NumConsecutiveStores) && - (NumSkip < FirstZeroAfterNonZero) && - (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) - NumSkip++; - - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); - NumConsecutiveStores -= NumSkip; - continue; + if (TLI.isTypeLegal(StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, + *FirstInChain->getMemOperand(), &IsFast) && + IsFast) { + LastIntegerTrunc = false; + LastLegalType = i + 1; + // Or check whether a truncstore is legal. + } else if (TLI.getTypeAction(Context, StoreTy) == + TargetLowering::TypePromoteInteger) { + EVT LegalizedStoredValTy = + TLI.getTypeToTransformTo(Context, StoredVal.getValueType()); + if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, + *FirstInChain->getMemOperand(), &IsFast) && + IsFast) { + LastIntegerTrunc = true; + LastLegalType = i + 1; } + } - // Check that we can merge these candidates without causing a cycle. - if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem, - RootNode)) { - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); - NumConsecutiveStores -= NumElem; - continue; - } + // We only use vectors if the constant is known to be zero or the + // target allows it and the function is not marked with the + // noimplicitfloat attribute. + if ((!NonZero || + TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) && + AllowVectors) { + // Find a legal type for the vector store. + unsigned Elts = (i + 1) * NumMemElts; + EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); + if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) && + TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && + TLI.allowsMemoryAccess(Context, DL, Ty, + *FirstInChain->getMemOperand(), &IsFast) && + IsFast) + LastLegalVectorType = i + 1; + } + } - RV |= MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, true, - UseVector, LastIntegerTrunc); + bool UseVector = (LastLegalVectorType > LastLegalType) && AllowVectors; + unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType; + + // Check if we found a legal integer type that creates a meaningful + // merge. + if (NumElem < 2) { + // We know that candidate stores are in order and of correct + // shape. While there is no mergeable sequence from the + // beginning one may start later in the sequence. The only + // reason a merge of size N could have failed where another of + // the same size would not have, is if the alignment has + // improved or we've dropped a non-zero value. Drop as many + // candidates as we can here. + unsigned NumSkip = 1; + while ((NumSkip < NumConsecutiveStores) && + (NumSkip < FirstZeroAfterNonZero) && + (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) + NumSkip++; + + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); + NumConsecutiveStores -= NumSkip; + continue; + } - // Remove merged stores for next iteration. - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); - NumConsecutiveStores -= NumElem; - } + // Check that we can merge these candidates without causing a cycle. + if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem, + RootNode)) { + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); + NumConsecutiveStores -= NumElem; continue; } - // When extracting multiple vector elements, try to store them - // in one vector store rather than a sequence of scalar stores. - if (IsExtractVecSrc) { - // Loop on Consecutive Stores on success. - while (NumConsecutiveStores >= 2) { - LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; - unsigned FirstStoreAS = FirstInChain->getAddressSpace(); - unsigned FirstStoreAlign = FirstInChain->getAlignment(); - unsigned NumStoresToMerge = 1; - for (unsigned i = 0; i < NumConsecutiveStores; ++i) { - // Find a legal type for the vector store. - unsigned Elts = (i + 1) * NumMemElts; - EVT Ty = - EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); - bool IsFast; - - // Break early when size is too large to be legal. - if (Ty.getSizeInBits() > MaximumLegalStoreInBits) - break; + MadeChange |= mergeStoresOfConstantsOrVecElts( + StoreNodes, MemVT, NumElem, true, UseVector, LastIntegerTrunc); - if (TLI.isTypeLegal(Ty) && - TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && - TLI.allowsMemoryAccess(Context, DL, Ty, - *FirstInChain->getMemOperand(), &IsFast) && - IsFast) - NumStoresToMerge = i + 1; - } + // Remove merged stores for next iteration. + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); + NumConsecutiveStores -= NumElem; + } + return MadeChange; +} - // Check if we found a legal integer type creating a meaningful - // merge. - if (NumStoresToMerge < 2) { - // We know that candidate stores are in order and of correct - // shape. While there is no mergeable sequence from the - // beginning one may start later in the sequence. The only - // reason a merge of size N could have failed where another of - // the same size would not have, is if the alignment has - // improved. Drop as many candidates as we can here. - unsigned NumSkip = 1; - while ( - (NumSkip < NumConsecutiveStores) && - (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) - NumSkip++; - - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); - NumConsecutiveStores -= NumSkip; - continue; - } +bool DAGCombiner::tryStoreMergeOfExtracts( + SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumConsecutiveStores, + EVT MemVT, SDNode *RootNode) { + LLVMContext &Context = *DAG.getContext(); + const DataLayout &DL = DAG.getDataLayout(); + unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1; + bool MadeChange = false; + + // Loop on Consecutive Stores on success. + while (NumConsecutiveStores >= 2) { + LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; + unsigned FirstStoreAS = FirstInChain->getAddressSpace(); + unsigned FirstStoreAlign = FirstInChain->getAlignment(); + unsigned NumStoresToMerge = 1; + for (unsigned i = 0; i < NumConsecutiveStores; ++i) { + // Find a legal type for the vector store. + unsigned Elts = (i + 1) * NumMemElts; + EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); + bool IsFast = false; - // Check that we can merge these candidates without causing a cycle. - if (!checkMergeStoreCandidatesForDependencies( - StoreNodes, NumStoresToMerge, RootNode)) { - StoreNodes.erase(StoreNodes.begin(), - StoreNodes.begin() + NumStoresToMerge); - NumConsecutiveStores -= NumStoresToMerge; - continue; - } + // Break early when size is too large to be legal. + if (Ty.getSizeInBits() > MaximumLegalStoreInBits) + break; - RV |= MergeStoresOfConstantsOrVecElts( - StoreNodes, MemVT, NumStoresToMerge, false, true, false); + if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && + TLI.allowsMemoryAccess(Context, DL, Ty, + *FirstInChain->getMemOperand(), &IsFast) && + IsFast) + NumStoresToMerge = i + 1; + } + + // Check if we found a legal integer type creating a meaningful + // merge. + if (NumStoresToMerge < 2) { + // We know that candidate stores are in order and of correct + // shape. While there is no mergeable sequence from the + // beginning one may start later in the sequence. The only + // reason a merge of size N could have failed where another of + // the same size would not have, is if the alignment has + // improved. Drop as many candidates as we can here. + unsigned NumSkip = 1; + while ((NumSkip < NumConsecutiveStores) && + (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) + NumSkip++; + + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); + NumConsecutiveStores -= NumSkip; + continue; + } - StoreNodes.erase(StoreNodes.begin(), - StoreNodes.begin() + NumStoresToMerge); - NumConsecutiveStores -= NumStoresToMerge; - } + // Check that we can merge these candidates without causing a cycle. + if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumStoresToMerge, + RootNode)) { + StoreNodes.erase(StoreNodes.begin(), + StoreNodes.begin() + NumStoresToMerge); + NumConsecutiveStores -= NumStoresToMerge; continue; } - // Below we handle the case of multiple consecutive stores that - // come from multiple consecutive loads. We merge them into a single - // wide load and a single wide store. + MadeChange |= mergeStoresOfConstantsOrVecElts( + StoreNodes, MemVT, NumStoresToMerge, false, true, false); + + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumStoresToMerge); + NumConsecutiveStores -= NumStoresToMerge; + } + return MadeChange; +} + +bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes, + unsigned NumConsecutiveStores, EVT MemVT, + SDNode *RootNode, bool AllowVectors, + bool IsNonTemporalStore, + bool IsNonTemporalLoad) { + LLVMContext &Context = *DAG.getContext(); + const DataLayout &DL = DAG.getDataLayout(); + int64_t ElementSizeBytes = MemVT.getStoreSize(); + unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1; + bool MadeChange = false; - // Look for load nodes which are used by the stored values. - SmallVector<MemOpLink, 8> LoadNodes; + int64_t StartAddress = StoreNodes[0].OffsetFromBase; - // Find acceptable loads. Loads need to have the same chain (token factor), - // must not be zext, volatile, indexed, and they must be consecutive. - BaseIndexOffset LdBasePtr; + // Look for load nodes which are used by the stored values. + SmallVector<MemOpLink, 8> LoadNodes; - for (unsigned i = 0; i < NumConsecutiveStores; ++i) { - StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); - SDValue Val = peekThroughBitcasts(St->getValue()); - LoadSDNode *Ld = cast<LoadSDNode>(Val); - - BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG); - // If this is not the first ptr that we check. - int64_t LdOffset = 0; - if (LdBasePtr.getBase().getNode()) { - // The base ptr must be the same. - if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset)) - break; - } else { - // Check that all other base pointers are the same as this one. - LdBasePtr = LdPtr; - } + // Find acceptable loads. Loads need to have the same chain (token factor), + // must not be zext, volatile, indexed, and they must be consecutive. + BaseIndexOffset LdBasePtr; + + for (unsigned i = 0; i < NumConsecutiveStores; ++i) { + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); + SDValue Val = peekThroughBitcasts(St->getValue()); + LoadSDNode *Ld = cast<LoadSDNode>(Val); - // We found a potential memory operand to merge. - LoadNodes.push_back(MemOpLink(Ld, LdOffset)); + BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG); + // If this is not the first ptr that we check. + int64_t LdOffset = 0; + if (LdBasePtr.getBase().getNode()) { + // The base ptr must be the same. + if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset)) + break; + } else { + // Check that all other base pointers are the same as this one. + LdBasePtr = LdPtr; } - while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) { + // We found a potential memory operand to merge. + LoadNodes.push_back(MemOpLink(Ld, LdOffset)); + } + + while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) { + Align RequiredAlignment; + bool NeedRotate = false; + if (LoadNodes.size() == 2) { // If we have load/store pair instructions and we only have two values, // don't bother merging. - unsigned RequiredAlignment; - if (LoadNodes.size() == 2 && - TLI.hasPairedLoad(MemVT, RequiredAlignment) && - StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) { + if (TLI.hasPairedLoad(MemVT, RequiredAlignment) && + StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) { StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2); LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2); break; } - LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; - unsigned FirstStoreAS = FirstInChain->getAddressSpace(); - unsigned FirstStoreAlign = FirstInChain->getAlignment(); - LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode); - unsigned FirstLoadAlign = FirstLoad->getAlignment(); - - // Scan the memory operations on the chain and find the first - // non-consecutive load memory address. These variables hold the index in - // the store node array. - - unsigned LastConsecutiveLoad = 1; - - // This variable refers to the size and not index in the array. - unsigned LastLegalVectorType = 1; - unsigned LastLegalIntegerType = 1; - bool isDereferenceable = true; - bool DoIntegerTruncate = false; - StartAddress = LoadNodes[0].OffsetFromBase; - SDValue FirstChain = FirstLoad->getChain(); - for (unsigned i = 1; i < LoadNodes.size(); ++i) { - // All loads must share the same chain. - if (LoadNodes[i].MemNode->getChain() != FirstChain) - break; + // If the loads are reversed, see if we can rotate the halves into place. + int64_t Offset0 = LoadNodes[0].OffsetFromBase; + int64_t Offset1 = LoadNodes[1].OffsetFromBase; + EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2); + if (Offset0 - Offset1 == ElementSizeBytes && + (hasOperation(ISD::ROTL, PairVT) || + hasOperation(ISD::ROTR, PairVT))) { + std::swap(LoadNodes[0], LoadNodes[1]); + NeedRotate = true; + } + } + LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; + unsigned FirstStoreAS = FirstInChain->getAddressSpace(); + unsigned FirstStoreAlign = FirstInChain->getAlignment(); + LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode); - int64_t CurrAddress = LoadNodes[i].OffsetFromBase; - if (CurrAddress - StartAddress != (ElementSizeBytes * i)) - break; - LastConsecutiveLoad = i; + // Scan the memory operations on the chain and find the first + // non-consecutive load memory address. These variables hold the index in + // the store node array. + + unsigned LastConsecutiveLoad = 1; + + // This variable refers to the size and not index in the array. + unsigned LastLegalVectorType = 1; + unsigned LastLegalIntegerType = 1; + bool isDereferenceable = true; + bool DoIntegerTruncate = false; + StartAddress = LoadNodes[0].OffsetFromBase; + SDValue LoadChain = FirstLoad->getChain(); + for (unsigned i = 1; i < LoadNodes.size(); ++i) { + // All loads must share the same chain. + if (LoadNodes[i].MemNode->getChain() != LoadChain) + break; - if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable()) - isDereferenceable = false; + int64_t CurrAddress = LoadNodes[i].OffsetFromBase; + if (CurrAddress - StartAddress != (ElementSizeBytes * i)) + break; + LastConsecutiveLoad = i; - // Find a legal type for the vector store. - unsigned Elts = (i + 1) * NumMemElts; - EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); + if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable()) + isDereferenceable = false; - // Break early when size is too large to be legal. - if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits) - break; + // Find a legal type for the vector store. + unsigned Elts = (i + 1) * NumMemElts; + EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); - bool IsFastSt, IsFastLd; - if (TLI.isTypeLegal(StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && - TLI.allowsMemoryAccess(Context, DL, StoreTy, - *FirstInChain->getMemOperand(), &IsFastSt) && - IsFastSt && - TLI.allowsMemoryAccess(Context, DL, StoreTy, - *FirstLoad->getMemOperand(), &IsFastLd) && - IsFastLd) { - LastLegalVectorType = i + 1; - } + // Break early when size is too large to be legal. + if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits) + break; - // Find a legal type for the integer store. - unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; - StoreTy = EVT::getIntegerVT(Context, SizeInBits); - if (TLI.isTypeLegal(StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && + bool IsFastSt = false; + bool IsFastLd = false; + if (TLI.isTypeLegal(StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, + *FirstInChain->getMemOperand(), &IsFastSt) && + IsFastSt && + TLI.allowsMemoryAccess(Context, DL, StoreTy, + *FirstLoad->getMemOperand(), &IsFastLd) && + IsFastLd) { + LastLegalVectorType = i + 1; + } + + // Find a legal type for the integer store. + unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; + StoreTy = EVT::getIntegerVT(Context, SizeInBits); + if (TLI.isTypeLegal(StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, + *FirstInChain->getMemOperand(), &IsFastSt) && + IsFastSt && + TLI.allowsMemoryAccess(Context, DL, StoreTy, + *FirstLoad->getMemOperand(), &IsFastLd) && + IsFastLd) { + LastLegalIntegerType = i + 1; + DoIntegerTruncate = false; + // Or check whether a truncstore and extload is legal. + } else if (TLI.getTypeAction(Context, StoreTy) == + TargetLowering::TypePromoteInteger) { + EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy); + if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) && + TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) && + TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) && + TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) && TLI.allowsMemoryAccess(Context, DL, StoreTy, *FirstInChain->getMemOperand(), &IsFastSt) && IsFastSt && @@ -16125,149 +16658,225 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { *FirstLoad->getMemOperand(), &IsFastLd) && IsFastLd) { LastLegalIntegerType = i + 1; - DoIntegerTruncate = false; - // Or check whether a truncstore and extload is legal. - } else if (TLI.getTypeAction(Context, StoreTy) == - TargetLowering::TypePromoteInteger) { - EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy); - if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) && - TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, - StoreTy) && - TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, - StoreTy) && - TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) && - TLI.allowsMemoryAccess(Context, DL, StoreTy, - *FirstInChain->getMemOperand(), - &IsFastSt) && - IsFastSt && - TLI.allowsMemoryAccess(Context, DL, StoreTy, - *FirstLoad->getMemOperand(), &IsFastLd) && - IsFastLd) { - LastLegalIntegerType = i + 1; - DoIntegerTruncate = true; - } + DoIntegerTruncate = true; } } + } - // Only use vector types if the vector type is larger than the integer - // type. If they are the same, use integers. - bool UseVectorTy = - LastLegalVectorType > LastLegalIntegerType && !NoVectors; - unsigned LastLegalType = - std::max(LastLegalVectorType, LastLegalIntegerType); - - // We add +1 here because the LastXXX variables refer to location while - // the NumElem refers to array/index size. - unsigned NumElem = - std::min(NumConsecutiveStores, LastConsecutiveLoad + 1); - NumElem = std::min(LastLegalType, NumElem); - - if (NumElem < 2) { - // We know that candidate stores are in order and of correct - // shape. While there is no mergeable sequence from the - // beginning one may start later in the sequence. The only - // reason a merge of size N could have failed where another of - // the same size would not have is if the alignment or either - // the load or store has improved. Drop as many candidates as we - // can here. - unsigned NumSkip = 1; - while ((NumSkip < LoadNodes.size()) && - (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) && - (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) - NumSkip++; - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); - LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip); - NumConsecutiveStores -= NumSkip; - continue; - } + // Only use vector types if the vector type is larger than the integer + // type. If they are the same, use integers. + bool UseVectorTy = + LastLegalVectorType > LastLegalIntegerType && AllowVectors; + unsigned LastLegalType = + std::max(LastLegalVectorType, LastLegalIntegerType); + + // We add +1 here because the LastXXX variables refer to location while + // the NumElem refers to array/index size. + unsigned NumElem = std::min(NumConsecutiveStores, LastConsecutiveLoad + 1); + NumElem = std::min(LastLegalType, NumElem); + unsigned FirstLoadAlign = FirstLoad->getAlignment(); + + if (NumElem < 2) { + // We know that candidate stores are in order and of correct + // shape. While there is no mergeable sequence from the + // beginning one may start later in the sequence. The only + // reason a merge of size N could have failed where another of + // the same size would not have is if the alignment or either + // the load or store has improved. Drop as many candidates as we + // can here. + unsigned NumSkip = 1; + while ((NumSkip < LoadNodes.size()) && + (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) && + (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign)) + NumSkip++; + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip); + LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip); + NumConsecutiveStores -= NumSkip; + continue; + } - // Check that we can merge these candidates without causing a cycle. - if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem, - RootNode)) { - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); - LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem); - NumConsecutiveStores -= NumElem; - continue; - } + // Check that we can merge these candidates without causing a cycle. + if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem, + RootNode)) { + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); + LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem); + NumConsecutiveStores -= NumElem; + continue; + } - // Find if it is better to use vectors or integers to load and store - // to memory. - EVT JointMemOpVT; - if (UseVectorTy) { - // Find a legal type for the vector store. - unsigned Elts = NumElem * NumMemElts; - JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); - } else { - unsigned SizeInBits = NumElem * ElementSizeBytes * 8; - JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits); + // Find if it is better to use vectors or integers to load and store + // to memory. + EVT JointMemOpVT; + if (UseVectorTy) { + // Find a legal type for the vector store. + unsigned Elts = NumElem * NumMemElts; + JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); + } else { + unsigned SizeInBits = NumElem * ElementSizeBytes * 8; + JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits); + } + + SDLoc LoadDL(LoadNodes[0].MemNode); + SDLoc StoreDL(StoreNodes[0].MemNode); + + // The merged loads are required to have the same incoming chain, so + // using the first's chain is acceptable. + + SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem); + AddToWorklist(NewStoreChain.getNode()); + + MachineMemOperand::Flags LdMMOFlags = + isDereferenceable ? MachineMemOperand::MODereferenceable + : MachineMemOperand::MONone; + if (IsNonTemporalLoad) + LdMMOFlags |= MachineMemOperand::MONonTemporal; + + MachineMemOperand::Flags StMMOFlags = IsNonTemporalStore + ? MachineMemOperand::MONonTemporal + : MachineMemOperand::MONone; + + SDValue NewLoad, NewStore; + if (UseVectorTy || !DoIntegerTruncate) { + NewLoad = DAG.getLoad( + JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(), + FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags); + SDValue StoreOp = NewLoad; + if (NeedRotate) { + unsigned LoadWidth = ElementSizeBytes * 8 * 2; + assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) && + "Unexpected type for rotate-able load pair"); + SDValue RotAmt = + DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL); + // Target can convert to the identical ROTR if it does not have ROTL. + StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt); } + NewStore = DAG.getStore( + NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags); + } else { // This must be the truncstore/extload case + EVT ExtendedTy = + TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT); + NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy, + FirstLoad->getChain(), FirstLoad->getBasePtr(), + FirstLoad->getPointerInfo(), JointMemOpVT, + FirstLoadAlign, LdMMOFlags); + NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad, + FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), JointMemOpVT, + FirstInChain->getAlignment(), + FirstInChain->getMemOperand()->getFlags()); + } + + // Transfer chain users from old loads to the new load. + for (unsigned i = 0; i < NumElem; ++i) { + LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode); + DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), + SDValue(NewLoad.getNode(), 1)); + } + + // Replace all stores with the new store. Recursively remove corresponding + // values if they are no longer used. + for (unsigned i = 0; i < NumElem; ++i) { + SDValue Val = StoreNodes[i].MemNode->getOperand(1); + CombineTo(StoreNodes[i].MemNode, NewStore); + if (Val.getNode()->use_empty()) + recursivelyDeleteUnusedNodes(Val.getNode()); + } + + MadeChange = true; + StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); + LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem); + NumConsecutiveStores -= NumElem; + } + return MadeChange; +} + +bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) { + if (OptLevel == CodeGenOpt::None || !EnableStoreMerging) + return false; - SDLoc LoadDL(LoadNodes[0].MemNode); - SDLoc StoreDL(StoreNodes[0].MemNode); - - // The merged loads are required to have the same incoming chain, so - // using the first's chain is acceptable. - - SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem); - AddToWorklist(NewStoreChain.getNode()); - - MachineMemOperand::Flags LdMMOFlags = - isDereferenceable ? MachineMemOperand::MODereferenceable - : MachineMemOperand::MONone; - if (IsNonTemporalLoad) - LdMMOFlags |= MachineMemOperand::MONonTemporal; - - MachineMemOperand::Flags StMMOFlags = - IsNonTemporalStore ? MachineMemOperand::MONonTemporal - : MachineMemOperand::MONone; - - SDValue NewLoad, NewStore; - if (UseVectorTy || !DoIntegerTruncate) { - NewLoad = - DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(), - FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(), - FirstLoadAlign, LdMMOFlags); - NewStore = DAG.getStore( - NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(), - FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags); - } else { // This must be the truncstore/extload case - EVT ExtendedTy = - TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT); - NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy, - FirstLoad->getChain(), FirstLoad->getBasePtr(), - FirstLoad->getPointerInfo(), JointMemOpVT, - FirstLoadAlign, LdMMOFlags); - NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad, - FirstInChain->getBasePtr(), - FirstInChain->getPointerInfo(), - JointMemOpVT, FirstInChain->getAlignment(), - FirstInChain->getMemOperand()->getFlags()); - } + // TODO: Extend this function to merge stores of scalable vectors. + // (i.e. two <vscale x 8 x i8> stores can be merged to one <vscale x 16 x i8> + // store since we know <vscale x 16 x i8> is exactly twice as large as + // <vscale x 8 x i8>). Until then, bail out for scalable vectors. + EVT MemVT = St->getMemoryVT(); + if (MemVT.isScalableVector()) + return false; + if (!MemVT.isSimple() || MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits) + return false; - // Transfer chain users from old loads to the new load. - for (unsigned i = 0; i < NumElem; ++i) { - LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode); - DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), - SDValue(NewLoad.getNode(), 1)); - } + // This function cannot currently deal with non-byte-sized memory sizes. + int64_t ElementSizeBytes = MemVT.getStoreSize(); + if (ElementSizeBytes * 8 != (int64_t)MemVT.getSizeInBits()) + return false; - // Replace the all stores with the new store. Recursively remove - // corresponding value if its no longer used. - for (unsigned i = 0; i < NumElem; ++i) { - SDValue Val = StoreNodes[i].MemNode->getOperand(1); - CombineTo(StoreNodes[i].MemNode, NewStore); - if (Val.getNode()->use_empty()) - recursivelyDeleteUnusedNodes(Val.getNode()); - } + // Do not bother looking at stored values that are not constants, loads, or + // extracted vector elements. + SDValue StoredVal = peekThroughBitcasts(St->getValue()); + const StoreSource StoreSrc = getStoreSource(StoredVal); + if (StoreSrc == StoreSource::Unknown) + return false; - RV = true; - StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem); - LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem); - NumConsecutiveStores -= NumElem; + SmallVector<MemOpLink, 8> StoreNodes; + SDNode *RootNode; + // Find potential store merge candidates by searching through chain sub-DAG + getStoreMergeCandidates(St, StoreNodes, RootNode); + + // Check if there is anything to merge. + if (StoreNodes.size() < 2) + return false; + + // Sort the memory operands according to their distance from the + // base pointer. + llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) { + return LHS.OffsetFromBase < RHS.OffsetFromBase; + }); + + bool AllowVectors = !DAG.getMachineFunction().getFunction().hasFnAttribute( + Attribute::NoImplicitFloat); + bool IsNonTemporalStore = St->isNonTemporal(); + bool IsNonTemporalLoad = StoreSrc == StoreSource::Load && + cast<LoadSDNode>(StoredVal)->isNonTemporal(); + + // Store Merge attempts to merge the lowest stores. This generally + // works out as if successful, as the remaining stores are checked + // after the first collection of stores is merged. However, in the + // case that a non-mergeable store is found first, e.g., {p[-2], + // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent + // mergeable cases. To prevent this, we prune such stores from the + // front of StoreNodes here. + bool MadeChange = false; + while (StoreNodes.size() > 1) { + unsigned NumConsecutiveStores = + getConsecutiveStores(StoreNodes, ElementSizeBytes); + // There are no more stores in the list to examine. + if (NumConsecutiveStores == 0) + return MadeChange; + + // We have at least 2 consecutive stores. Try to merge them. + assert(NumConsecutiveStores >= 2 && "Expected at least 2 stores"); + switch (StoreSrc) { + case StoreSource::Constant: + MadeChange |= tryStoreMergeOfConstants(StoreNodes, NumConsecutiveStores, + MemVT, RootNode, AllowVectors); + break; + + case StoreSource::Extract: + MadeChange |= tryStoreMergeOfExtracts(StoreNodes, NumConsecutiveStores, + MemVT, RootNode); + break; + + case StoreSource::Load: + MadeChange |= tryStoreMergeOfLoads(StoreNodes, NumConsecutiveStores, + MemVT, RootNode, AllowVectors, + IsNonTemporalStore, IsNonTemporalLoad); + break; + + default: + llvm_unreachable("Unhandled store source type"); } } - return RV; + return MadeChange; } SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) { @@ -16408,11 +17017,12 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // Try to infer better alignment information than the store already has. if (OptLevel != CodeGenOpt::None && ST->isUnindexed() && !ST->isAtomic()) { - if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { - if (Align > ST->getAlignment() && ST->getSrcValueOffset() % Align == 0) { + if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) { + if (*Alignment > ST->getAlign() && + isAligned(*Alignment, ST->getSrcValueOffset())) { SDValue NewStore = DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(), - ST->getMemoryVT(), Align, + ST->getMemoryVT(), *Alignment, ST->getMemOperand()->getFlags(), ST->getAAInfo()); // NewStore will always be N as we are only refining the alignment assert(NewStore.getNode() == N); @@ -16497,7 +17107,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { } if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() && - !ST1->getBasePtr().isUndef()) { + !ST1->getBasePtr().isUndef() && + // BaseIndexOffset and the code below requires knowing the size + // of a vector, so bail out if MemoryVT is scalable. + !ST1->getMemoryVT().isScalableVector()) { const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG); const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG); unsigned STBitSize = ST->getMemoryVT().getSizeInBits(); @@ -16510,33 +17123,6 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { CombineTo(ST1, ST1->getChain()); return SDValue(); } - - // If ST stores to a subset of preceding store's write set, we may be - // able to fold ST's value into the preceding stored value. As we know - // the other uses of ST1's chain are unconcerned with ST, this folding - // will not affect those nodes. - int64_t BitOffset; - if (ChainBase.contains(DAG, ChainBitSize, STBase, STBitSize, - BitOffset)) { - SDValue ChainValue = ST1->getValue(); - if (auto *C1 = dyn_cast<ConstantSDNode>(ChainValue)) { - if (auto *C = dyn_cast<ConstantSDNode>(Value)) { - APInt Val = C1->getAPIntValue(); - APInt InsertVal = C->getAPIntValue().zextOrTrunc(STBitSize); - // FIXME: Handle Big-endian mode. - if (!DAG.getDataLayout().isBigEndian()) { - Val.insertBits(InsertVal, BitOffset); - SDValue NewSDVal = - DAG.getConstant(Val, SDLoc(C), ChainValue.getValueType(), - C1->isTargetOpcode(), C1->isOpaque()); - SDNode *NewST1 = DAG.UpdateNodeOperands( - ST1, ST1->getChain(), NewSDVal, ST1->getOperand(2), - ST1->getOperand(3)); - return CombineTo(ST, SDValue(NewST1, 0)); - } - } - } - } // End ST subset of ST1 case. } } } @@ -16559,7 +17145,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // There can be multiple store sequences on the same chain. // Keep trying to merge store sequences until we are unable to do so // or until we merge the last store on the chain. - bool Changed = MergeConsecutiveStores(ST); + bool Changed = mergeConsecutiveStores(ST); if (!Changed) break; // Return N as merge only uses CombineTo and no worklist clean // up is necessary. @@ -16835,6 +17421,10 @@ SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) { EVT SubVecVT = SubVec.getValueType(); EVT VT = DestVec.getValueType(); unsigned NumSrcElts = SubVecVT.getVectorNumElements(); + // If the source only has a single vector element, the cost of creating adding + // it to a vector is likely to exceed the cost of a insert_vector_elt. + if (NumSrcElts == 1) + return SDValue(); unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits(); unsigned NumMaskVals = ExtendRatio * NumSrcElts; @@ -16880,12 +17470,12 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { SDLoc DL(N); EVT VT = InVec.getValueType(); - unsigned NumElts = VT.getVectorNumElements(); + auto *IndexC = dyn_cast<ConstantSDNode>(EltNo); // Insert into out-of-bounds element is undefined. - if (auto *IndexC = dyn_cast<ConstantSDNode>(EltNo)) - if (IndexC->getZExtValue() >= VT.getVectorNumElements()) - return DAG.getUNDEF(VT); + if (IndexC && VT.isFixedLengthVector() && + IndexC->getZExtValue() >= VT.getVectorNumElements()) + return DAG.getUNDEF(VT); // Remove redundant insertions: // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x @@ -16893,17 +17483,25 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1)) return InVec; - auto *IndexC = dyn_cast<ConstantSDNode>(EltNo); if (!IndexC) { // If this is variable insert to undef vector, it might be better to splat: // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... > if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) { - SmallVector<SDValue, 8> Ops(NumElts, InVal); - return DAG.getBuildVector(VT, DL, Ops); + if (VT.isScalableVector()) + return DAG.getSplatVector(VT, DL, InVal); + else { + SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), InVal); + return DAG.getBuildVector(VT, DL, Ops); + } } return SDValue(); } + if (VT.isScalableVector()) + return SDValue(); + + unsigned NumElts = VT.getVectorNumElements(); + // We must know which element is being inserted for folds below here. unsigned Elt = IndexC->getZExtValue(); if (SDValue Shuf = combineInsertEltToShuffle(N, Elt)) @@ -16968,11 +17566,12 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, EVT ResultVT = EVE->getValueType(0); EVT VecEltVT = InVecVT.getVectorElementType(); - unsigned Align = OriginalLoad->getAlignment(); - unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment( + Align Alignment = OriginalLoad->getAlign(); + Align NewAlign = DAG.getDataLayout().getABITypeAlign( VecEltVT.getTypeForEVT(*DAG.getContext())); - if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT)) + if (NewAlign > Alignment || + !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT)) return SDValue(); ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ? @@ -16980,7 +17579,7 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT)) return SDValue(); - Align = NewAlign; + Alignment = NewAlign; SDValue NewPtr = OriginalLoad->getBasePtr(); SDValue Offset; @@ -17020,13 +17619,13 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, : ISD::EXTLOAD; Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT, OriginalLoad->getChain(), NewPtr, MPI, VecEltVT, - Align, OriginalLoad->getMemOperand()->getFlags(), + Alignment, OriginalLoad->getMemOperand()->getFlags(), OriginalLoad->getAAInfo()); Chain = Load.getValue(1); } else { - Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, - MPI, Align, OriginalLoad->getMemOperand()->getFlags(), - OriginalLoad->getAAInfo()); + Load = DAG.getLoad( + VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, Alignment, + OriginalLoad->getMemOperand()->getFlags(), OriginalLoad->getAAInfo()); Chain = Load.getValue(1); if (ResultVT.bitsLT(VecEltVT)) Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load); @@ -17102,6 +17701,10 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // (vextract (scalar_to_vector val, 0) -> val if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) { + // Only 0'th element of SCALAR_TO_VECTOR is defined. + if (DAG.isKnownNeverZero(Index)) + return DAG.getUNDEF(ScalarVT); + // Check if the result type doesn't match the inserted element type. A // SCALAR_TO_VECTOR may truncate the inserted element and the // EXTRACT_VECTOR_ELT may widen the extracted vector. @@ -17115,15 +17718,21 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // extract_vector_elt of out-of-bounds element -> UNDEF auto *IndexC = dyn_cast<ConstantSDNode>(Index); - unsigned NumElts = VecVT.getVectorNumElements(); - if (IndexC && IndexC->getAPIntValue().uge(NumElts)) + if (IndexC && VecVT.isFixedLengthVector() && + IndexC->getAPIntValue().uge(VecVT.getVectorNumElements())) return DAG.getUNDEF(ScalarVT); // extract_vector_elt (build_vector x, y), 1 -> y - if (IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR && + if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) || + VecOp.getOpcode() == ISD::SPLAT_VECTOR) && TLI.isTypeLegal(VecVT) && (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) { - SDValue Elt = VecOp.getOperand(IndexC->getZExtValue()); + assert((VecOp.getOpcode() != ISD::BUILD_VECTOR || + VecVT.isFixedLengthVector()) && + "BUILD_VECTOR used for scalable vectors"); + unsigned IndexVal = + VecOp.getOpcode() == ISD::BUILD_VECTOR ? IndexC->getZExtValue() : 0; + SDValue Elt = VecOp.getOperand(IndexVal); EVT InEltVT = Elt.getValueType(); // Sometimes build_vector's scalar input types do not match result type. @@ -17134,6 +17743,15 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // converts. } + if (VecVT.isScalableVector()) + return SDValue(); + + // All the code from this point onwards assumes fixed width vectors, but it's + // possible that some of the combinations could be made to work for scalable + // vectors too. + unsigned NumElts = VecVT.getVectorNumElements(); + unsigned VecEltBitWidth = VecVT.getScalarSizeInBits(); + // TODO: These transforms should not require the 'hasOneUse' restriction, but // there are regressions on multiple targets without it. We can end up with a // mess of scalar and vector code if we reduce only part of the DAG to scalar. @@ -17157,7 +17775,6 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { "Extract element and scalar to vector can't change element type " "from FP to integer."); unsigned XBitWidth = X.getValueSizeInBits(); - unsigned VecEltBitWidth = VecVT.getScalarSizeInBits(); BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1; // An extract element return value type can be wider than its vector @@ -17215,9 +17832,8 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // FIXME: Should really be just isOperationLegalOrCustom. TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) || TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) { - EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout()); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec, - DAG.getConstant(OrigElt, DL, IndexTy)); + DAG.getVectorIdxConstant(OrigElt, DL)); } } @@ -17241,6 +17857,14 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { AddToWorklist(N); return SDValue(N, 0); } + APInt DemandedBits = APInt::getAllOnesValue(VecEltBitWidth); + if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) { + // We simplified the vector operand of this extract element. If this + // extract is not dead, visit it again so it is folded properly. + if (N->getOpcode() != ISD::DELETED_NODE) + AddToWorklist(N); + return SDValue(N, 0); + } } // Everything under here is trying to match an extract of a loaded value. @@ -17326,6 +17950,30 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts; Index = DAG.getConstant(Elt, DL, Index.getValueType()); } + } else if (VecOp.getOpcode() == ISD::CONCAT_VECTORS && !BCNumEltsChanged && + VecVT.getVectorElementType() == ScalarVT && + (!LegalTypes || + TLI.isTypeLegal( + VecOp.getOperand(0).getValueType().getVectorElementType()))) { + // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 0 + // -> extract_vector_elt a, 0 + // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 1 + // -> extract_vector_elt a, 1 + // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 2 + // -> extract_vector_elt b, 0 + // extract_vector_elt (concat_vectors v2i16:a, v2i16:b), 3 + // -> extract_vector_elt b, 1 + SDLoc SL(N); + EVT ConcatVT = VecOp.getOperand(0).getValueType(); + unsigned ConcatNumElts = ConcatVT.getVectorNumElements(); + SDValue NewIdx = DAG.getConstant(Elt % ConcatNumElts, SL, + Index.getValueType()); + + SDValue ConcatOp = VecOp.getOperand(Elt / ConcatNumElts); + SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, + ConcatVT.getVectorElementType(), + ConcatOp, NewIdx); + return DAG.getNode(ISD::BITCAST, SL, ScalarVT, Elt); } // Make sure we found a non-volatile load and the extractelement is @@ -17407,6 +18055,11 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { if (!ValidTypes) return SDValue(); + // If we already have a splat buildvector, then don't fold it if it means + // introducing zeros. + if (!AllAnyExt && DAG.isSplatValue(SDValue(N, 0), /*AllowUndefs*/ true)) + return SDValue(); + bool isLE = DAG.getDataLayout().isLittleEndian(); unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits(); assert(ElemRatio > 1 && "Invalid element size ratio"); @@ -17453,12 +18106,89 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { return DAG.getBitcast(VT, BV); } +// Simplify (build_vec (trunc $1) +// (trunc (srl $1 half-width)) +// (trunc (srl $1 (2 * half-width))) …) +// to (bitcast $1) +SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) { + assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector"); + + // Only for little endian + if (!DAG.getDataLayout().isLittleEndian()) + return SDValue(); + + SDLoc DL(N); + EVT VT = N->getValueType(0); + EVT OutScalarTy = VT.getScalarType(); + uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits(); + + // Only for power of two types to be sure that bitcast works well + if (!isPowerOf2_64(ScalarTypeBitsize)) + return SDValue(); + + unsigned NumInScalars = N->getNumOperands(); + + // Look through bitcasts + auto PeekThroughBitcast = [](SDValue Op) { + if (Op.getOpcode() == ISD::BITCAST) + return Op.getOperand(0); + return Op; + }; + + // The source value where all the parts are extracted. + SDValue Src; + for (unsigned i = 0; i != NumInScalars; ++i) { + SDValue In = PeekThroughBitcast(N->getOperand(i)); + // Ignore undef inputs. + if (In.isUndef()) continue; + + if (In.getOpcode() != ISD::TRUNCATE) + return SDValue(); + + In = PeekThroughBitcast(In.getOperand(0)); + + if (In.getOpcode() != ISD::SRL) { + // For now only build_vec without shuffling, handle shifts here in the + // future. + if (i != 0) + return SDValue(); + + Src = In; + } else { + // In is SRL + SDValue part = PeekThroughBitcast(In.getOperand(0)); + + if (!Src) { + Src = part; + } else if (Src != part) { + // Vector parts do not stem from the same variable + return SDValue(); + } + + SDValue ShiftAmtVal = In.getOperand(1); + if (!isa<ConstantSDNode>(ShiftAmtVal)) + return SDValue(); + + uint64_t ShiftAmt = In.getNode()->getConstantOperandVal(1); + + // The extracted value is not extracted at the right position + if (ShiftAmt != i * ScalarTypeBitsize) + return SDValue(); + } + } + + // Only cast if the size is the same + if (Src.getValueType().getSizeInBits() != VT.getSizeInBits()) + return SDValue(); + + return DAG.getBitcast(VT, Src); +} + SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N, ArrayRef<int> VectorMask, SDValue VecIn1, SDValue VecIn2, unsigned LeftIdx, bool DidSplitVec) { - MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); - SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy); + SDValue ZeroIdx = DAG.getVectorIdxConstant(0, DL); EVT VT = N->getValueType(0); EVT InVT1 = VecIn1.getValueType(); @@ -17492,7 +18222,7 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N, // If we only have one input vector, and it's twice the size of the // output, split it in two. VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, - DAG.getConstant(NumElems, DL, IdxTy)); + DAG.getVectorIdxConstant(NumElems, DL)); VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx); // Since we now have shorter input vectors, adjust the offset of the // second vector's start. @@ -17699,6 +18429,9 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) { return SDValue(); SDValue ExtractedFromVec = Op.getOperand(0); + if (ExtractedFromVec.getValueType().isScalableVector()) + return SDValue(); + const APInt &ExtractIdx = Op.getConstantOperandAPInt(1); if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements())) return SDValue(); @@ -17733,7 +18466,6 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) { unsigned NearestPow2 = 0; SDValue Vec = VecIn.back(); EVT InVT = Vec.getValueType(); - MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); SmallVector<unsigned, 8> IndexVec(NumElems, 0); for (unsigned i = 0; i < NumElems; i++) { @@ -17752,9 +18484,9 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) { InVT.getVectorElementType(), SplitSize); if (TLI.isTypeLegal(SplitVT)) { SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec, - DAG.getConstant(SplitSize, DL, IdxTy)); + DAG.getVectorIdxConstant(SplitSize, DL)); SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec, - DAG.getConstant(0, DL, IdxTy)); + DAG.getVectorIdxConstant(0, DL)); VecIn.pop_back(); VecIn.push_back(VecIn1); VecIn.push_back(VecIn2); @@ -17986,6 +18718,9 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (SDValue V = reduceBuildVecExtToExtBuildVec(N)) return V; + if (SDValue V = reduceBuildVecTruncToBitCast(N)) + return V; + if (SDValue V = reduceBuildVecToShuffle(N)) return V; @@ -18080,6 +18815,7 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) { // What vector are we extracting the subvector from and at what index? SDValue ExtVec = Op.getOperand(0); + int ExtIdx = Op.getConstantOperandVal(1); // We want the EVT of the original extraction to correctly scale the // extraction index. @@ -18092,10 +18828,6 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) { continue; } - if (!isa<ConstantSDNode>(Op.getOperand(1))) - return SDValue(); - int ExtIdx = Op.getConstantOperandVal(1); - // Ensure that we are extracting a subvector from a vector the same // size as the result. if (ExtVT.getSizeInBits() != VT.getSizeInBits()) @@ -18129,6 +18861,69 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) { DAG.getBitcast(VT, SV1), Mask, DAG); } +static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) { + unsigned CastOpcode = N->getOperand(0).getOpcode(); + switch (CastOpcode) { + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + // TODO: Allow more opcodes? + // case ISD::BITCAST: + // case ISD::TRUNCATE: + // case ISD::ZERO_EXTEND: + // case ISD::SIGN_EXTEND: + // case ISD::FP_EXTEND: + break; + default: + return SDValue(); + } + + EVT SrcVT = N->getOperand(0).getOperand(0).getValueType(); + if (!SrcVT.isVector()) + return SDValue(); + + // All operands of the concat must be the same kind of cast from the same + // source type. + SmallVector<SDValue, 4> SrcOps; + for (SDValue Op : N->ops()) { + if (Op.getOpcode() != CastOpcode || !Op.hasOneUse() || + Op.getOperand(0).getValueType() != SrcVT) + return SDValue(); + SrcOps.push_back(Op.getOperand(0)); + } + + // The wider cast must be supported by the target. This is unusual because + // the operation support type parameter depends on the opcode. In addition, + // check the other type in the cast to make sure this is really legal. + EVT VT = N->getValueType(0); + EVT SrcEltVT = SrcVT.getVectorElementType(); + unsigned NumElts = SrcVT.getVectorElementCount().Min * N->getNumOperands(); + EVT ConcatSrcVT = EVT::getVectorVT(*DAG.getContext(), SrcEltVT, NumElts); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + switch (CastOpcode) { + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + if (!TLI.isOperationLegalOrCustom(CastOpcode, ConcatSrcVT) || + !TLI.isTypeLegal(VT)) + return SDValue(); + break; + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + if (!TLI.isOperationLegalOrCustom(CastOpcode, VT) || + !TLI.isTypeLegal(ConcatSrcVT)) + return SDValue(); + break; + default: + llvm_unreachable("Unexpected cast opcode"); + } + + // concat (cast X), (cast Y)... -> cast (concat X, Y...) + SDLoc DL(N); + SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatSrcVT, SrcOps); + return DAG.getNode(CastOpcode, DL, VT, NewConcat); +} + SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { // If we only have one input vector, we don't need to do any concatenation. if (N->getNumOperands() == 1) @@ -18256,6 +19051,9 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { if (SDValue V = combineConcatVectorOfExtracts(N, DAG)) return V; + if (SDValue V = combineConcatVectorOfCasts(N, DAG)) + return V; + // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR // nodes often generate nop CONCAT_VECTOR nodes. // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that @@ -18287,14 +19085,9 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { return SDValue(); } - auto *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1)); - // The extract index must be constant. - if (!CS) - return SDValue(); - // Check that we are reading from the identity index. unsigned IdentityIndex = i * PartNumElem; - if (CS->getAPIntValue() != IdentityIndex) + if (Op.getConstantOperandAPInt(1) != IdentityIndex) return SDValue(); } @@ -18377,6 +19170,15 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) { if (!TLI.isBinOp(BOpcode) || BinOp.getNode()->getNumValues() != 1) return SDValue(); + // Exclude the fake form of fneg (fsub -0.0, x) because that is likely to be + // reduced to the unary fneg when it is visited, and we probably want to deal + // with fneg in a target-specific way. + if (BOpcode == ISD::FSUB) { + auto *C = isConstOrConstSplatFP(BinOp.getOperand(0), /*AllowUndefs*/ true); + if (C && C->getValueAPF().isNegZero()) + return SDValue(); + } + // The binop must be a vector type, so we can extract some fraction of it. EVT WideBVT = BinOp.getValueType(); if (!WideBVT.isVector()) @@ -18412,12 +19214,11 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) { // bitcasted. unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements(); unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements(); - EVT ExtBOIdxVT = Extract->getOperand(1).getValueType(); if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) && BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) { // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N) SDLoc DL(Extract); - SDValue NewExtIndex = DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT); + SDValue NewExtIndex = DAG.getVectorIdxConstant(ExtBOIdx, DL); SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT, BinOp.getOperand(0), NewExtIndex); SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT, @@ -18457,7 +19258,7 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) { // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC) // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN SDLoc DL(Extract); - SDValue IndexC = DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT); + SDValue IndexC = DAG.getVectorIdxConstant(ExtBOIdx, DL); SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL) : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT, BinOp.getOperand(0), IndexC); @@ -18489,6 +19290,26 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { // Allow targets to opt-out. EVT VT = Extract->getValueType(0); + + // We can only create byte sized loads. + if (!VT.isByteSized()) + return SDValue(); + + unsigned Index = ExtIdx->getZExtValue(); + unsigned NumElts = VT.getVectorNumElements(); + + // If the index is a multiple of the extract element count, we can offset the + // address by the store size multiplied by the subvector index. Otherwise if + // the scalar type is byte sized, we can just use the index multiplied by + // the element size in bytes as the offset. + unsigned Offset; + if (Index % NumElts == 0) + Offset = (Index / NumElts) * VT.getStoreSize(); + else if (VT.getScalarType().isByteSized()) + Offset = Index * VT.getScalarType().getStoreSize(); + else + return SDValue(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT)) return SDValue(); @@ -18496,8 +19317,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { // The narrow load will be offset from the base address of the old load if // we are extracting from something besides index 0 (little-endian). SDLoc DL(Extract); - SDValue BaseAddr = Ld->getOperand(1); - unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize(); + SDValue BaseAddr = Ld->getBasePtr(); // TODO: Use "BaseIndexOffset" to make this more effective. SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL); @@ -18512,6 +19332,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { EVT NVT = N->getValueType(0); SDValue V = N->getOperand(0); + uint64_t ExtIdx = N->getConstantOperandVal(1); // Extract from UNDEF is UNDEF. if (V.isUndef()) @@ -18523,9 +19344,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { // Combine an extract of an extract into a single extract_subvector. // ext (ext X, C), 0 --> ext X, C - SDValue Index = N->getOperand(1); - if (isNullConstant(Index) && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && - V.hasOneUse() && isa<ConstantSDNode>(V.getOperand(1))) { + if (ExtIdx == 0 && V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse()) { if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(), V.getConstantOperandVal(1)) && TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) { @@ -18536,21 +19355,20 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { // Try to move vector bitcast after extract_subv by scaling extraction index: // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index') - if (isa<ConstantSDNode>(Index) && V.getOpcode() == ISD::BITCAST && + if (V.getOpcode() == ISD::BITCAST && V.getOperand(0).getValueType().isVector()) { SDValue SrcOp = V.getOperand(0); EVT SrcVT = SrcOp.getValueType(); - unsigned SrcNumElts = SrcVT.getVectorNumElements(); - unsigned DestNumElts = V.getValueType().getVectorNumElements(); + unsigned SrcNumElts = SrcVT.getVectorMinNumElements(); + unsigned DestNumElts = V.getValueType().getVectorMinNumElements(); if ((SrcNumElts % DestNumElts) == 0) { unsigned SrcDestRatio = SrcNumElts / DestNumElts; - unsigned NewExtNumElts = NVT.getVectorNumElements() * SrcDestRatio; + ElementCount NewExtEC = NVT.getVectorElementCount() * SrcDestRatio; EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), - NewExtNumElts); + NewExtEC); if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) { - unsigned IndexValScaled = N->getConstantOperandVal(1) * SrcDestRatio; SDLoc DL(N); - SDValue NewIndex = DAG.getIntPtrConstant(IndexValScaled, DL); + SDValue NewIndex = DAG.getVectorIdxConstant(ExtIdx * SrcDestRatio, DL); SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT, V.getOperand(0), NewIndex); return DAG.getBitcast(NVT, NewExtract); @@ -18558,34 +19376,43 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { } if ((DestNumElts % SrcNumElts) == 0) { unsigned DestSrcRatio = DestNumElts / SrcNumElts; - if ((NVT.getVectorNumElements() % DestSrcRatio) == 0) { - unsigned NewExtNumElts = NVT.getVectorNumElements() / DestSrcRatio; - EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), - SrcVT.getScalarType(), NewExtNumElts); - if ((N->getConstantOperandVal(1) % DestSrcRatio) == 0 && - TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) { - unsigned IndexValScaled = N->getConstantOperandVal(1) / DestSrcRatio; + if ((NVT.getVectorMinNumElements() % DestSrcRatio) == 0) { + ElementCount NewExtEC = NVT.getVectorElementCount() / DestSrcRatio; + EVT ScalarVT = SrcVT.getScalarType(); + if ((ExtIdx % DestSrcRatio) == 0) { SDLoc DL(N); - SDValue NewIndex = DAG.getIntPtrConstant(IndexValScaled, DL); - SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT, - V.getOperand(0), NewIndex); - return DAG.getBitcast(NVT, NewExtract); + unsigned IndexValScaled = ExtIdx / DestSrcRatio; + EVT NewExtVT = + EVT::getVectorVT(*DAG.getContext(), ScalarVT, NewExtEC); + if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) { + SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL); + SDValue NewExtract = + DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT, + V.getOperand(0), NewIndex); + return DAG.getBitcast(NVT, NewExtract); + } + if (NewExtEC == 1 && + TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, ScalarVT)) { + SDValue NewIndex = DAG.getVectorIdxConstant(IndexValScaled, DL); + SDValue NewExtract = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, + V.getOperand(0), NewIndex); + return DAG.getBitcast(NVT, NewExtract); + } } } } } - if (V.getOpcode() == ISD::CONCAT_VECTORS && isa<ConstantSDNode>(Index)) { + if (V.getOpcode() == ISD::CONCAT_VECTORS) { + unsigned ExtNumElts = NVT.getVectorMinNumElements(); EVT ConcatSrcVT = V.getOperand(0).getValueType(); assert(ConcatSrcVT.getVectorElementType() == NVT.getVectorElementType() && "Concat and extract subvector do not change element type"); - - unsigned ExtIdx = N->getConstantOperandVal(1); - unsigned ExtNumElts = NVT.getVectorNumElements(); - assert(ExtIdx % ExtNumElts == 0 && + assert((ExtIdx % ExtNumElts) == 0 && "Extract index is not a multiple of the input vector length."); - unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorNumElements(); + unsigned ConcatSrcNumElts = ConcatSrcVT.getVectorMinNumElements(); unsigned ConcatOpIdx = ExtIdx / ConcatSrcNumElts; // If the concatenated source types match this extract, it's a direct @@ -18599,15 +19426,14 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { // concat operand. Example: // v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 --> // v2i8 extract_subvec v8i8 Y, 6 - if (ConcatSrcNumElts % ExtNumElts == 0) { + if (NVT.isFixedLengthVector() && ConcatSrcNumElts % ExtNumElts == 0) { SDLoc DL(N); unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts; assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts && "Trying to extract from >1 concat operand?"); assert(NewExtIdx % ExtNumElts == 0 && "Extract index is not a multiple of the input vector length."); - MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); - SDValue NewIndexC = DAG.getConstant(NewExtIdx, DL, IdxTy); + SDValue NewIndexC = DAG.getVectorIdxConstant(NewExtIdx, DL); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NVT, V.getOperand(ConcatOpIdx), NewIndexC); } @@ -18617,37 +19443,33 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { // If the input is a build vector. Try to make a smaller build vector. if (V.getOpcode() == ISD::BUILD_VECTOR) { - if (auto *IdxC = dyn_cast<ConstantSDNode>(Index)) { - EVT InVT = V.getValueType(); - unsigned ExtractSize = NVT.getSizeInBits(); - unsigned EltSize = InVT.getScalarSizeInBits(); - // Only do this if we won't split any elements. - if (ExtractSize % EltSize == 0) { - unsigned NumElems = ExtractSize / EltSize; - EVT EltVT = InVT.getVectorElementType(); - EVT ExtractVT = NumElems == 1 ? EltVT - : EVT::getVectorVT(*DAG.getContext(), - EltVT, NumElems); - if ((Level < AfterLegalizeDAG || - (NumElems == 1 || - TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) && - (!LegalTypes || TLI.isTypeLegal(ExtractVT))) { - unsigned IdxVal = IdxC->getZExtValue(); - IdxVal *= NVT.getScalarSizeInBits(); - IdxVal /= EltSize; - - if (NumElems == 1) { - SDValue Src = V->getOperand(IdxVal); - if (EltVT != Src.getValueType()) - Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src); - return DAG.getBitcast(NVT, Src); - } - - // Extract the pieces from the original build_vector. - SDValue BuildVec = DAG.getBuildVector( - ExtractVT, SDLoc(N), V->ops().slice(IdxVal, NumElems)); - return DAG.getBitcast(NVT, BuildVec); + EVT InVT = V.getValueType(); + unsigned ExtractSize = NVT.getSizeInBits(); + unsigned EltSize = InVT.getScalarSizeInBits(); + // Only do this if we won't split any elements. + if (ExtractSize % EltSize == 0) { + unsigned NumElems = ExtractSize / EltSize; + EVT EltVT = InVT.getVectorElementType(); + EVT ExtractVT = + NumElems == 1 ? EltVT + : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems); + if ((Level < AfterLegalizeDAG || + (NumElems == 1 || + TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) && + (!LegalTypes || TLI.isTypeLegal(ExtractVT))) { + unsigned IdxVal = (ExtIdx * NVT.getScalarSizeInBits()) / EltSize; + + if (NumElems == 1) { + SDValue Src = V->getOperand(IdxVal); + if (EltVT != Src.getValueType()) + Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src); + return DAG.getBitcast(NVT, Src); } + + // Extract the pieces from the original build_vector. + SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N), + V->ops().slice(IdxVal, NumElems)); + return DAG.getBitcast(NVT, BuildVec); } } } @@ -18659,23 +19481,19 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { if (!NVT.bitsEq(SmallVT)) return SDValue(); - // Only handle cases where both indexes are constants. - auto *ExtIdx = dyn_cast<ConstantSDNode>(Index); - auto *InsIdx = dyn_cast<ConstantSDNode>(V.getOperand(2)); - if (InsIdx && ExtIdx) { - // Combine: - // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx) - // Into: - // indices are equal or bit offsets are equal => V1 - // otherwise => (extract_subvec V1, ExtIdx) - if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() == - ExtIdx->getZExtValue() * NVT.getScalarSizeInBits()) - return DAG.getBitcast(NVT, V.getOperand(1)); - return DAG.getNode( - ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, - DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)), - Index); - } + // Combine: + // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx) + // Into: + // indices are equal or bit offsets are equal => V1 + // otherwise => (extract_subvec V1, ExtIdx) + uint64_t InsIdx = V.getConstantOperandVal(2); + if (InsIdx * SmallVT.getScalarSizeInBits() == + ExtIdx * NVT.getScalarSizeInBits()) + return DAG.getBitcast(NVT, V.getOperand(1)); + return DAG.getNode( + ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, + DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)), + N->getOperand(1)); } if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG)) @@ -19064,6 +19882,57 @@ static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf, NewMask); } +/// Combine shuffle of shuffle of the form: +/// shuf (shuf X, undef, InnerMask), undef, OuterMask --> splat X +static SDValue formSplatFromShuffles(ShuffleVectorSDNode *OuterShuf, + SelectionDAG &DAG) { + if (!OuterShuf->getOperand(1).isUndef()) + return SDValue(); + auto *InnerShuf = dyn_cast<ShuffleVectorSDNode>(OuterShuf->getOperand(0)); + if (!InnerShuf || !InnerShuf->getOperand(1).isUndef()) + return SDValue(); + + ArrayRef<int> OuterMask = OuterShuf->getMask(); + ArrayRef<int> InnerMask = InnerShuf->getMask(); + unsigned NumElts = OuterMask.size(); + assert(NumElts == InnerMask.size() && "Mask length mismatch"); + SmallVector<int, 32> CombinedMask(NumElts, -1); + int SplatIndex = -1; + for (unsigned i = 0; i != NumElts; ++i) { + // Undef lanes remain undef. + int OuterMaskElt = OuterMask[i]; + if (OuterMaskElt == -1) + continue; + + // Peek through the shuffle masks to get the underlying source element. + int InnerMaskElt = InnerMask[OuterMaskElt]; + if (InnerMaskElt == -1) + continue; + + // Initialize the splatted element. + if (SplatIndex == -1) + SplatIndex = InnerMaskElt; + + // Non-matching index - this is not a splat. + if (SplatIndex != InnerMaskElt) + return SDValue(); + + CombinedMask[i] = InnerMaskElt; + } + assert((all_of(CombinedMask, [](int M) { return M == -1; }) || + getSplatIndex(CombinedMask) != -1) && + "Expected a splat mask"); + + // TODO: The transform may be a win even if the mask is not legal. + EVT VT = OuterShuf->getValueType(0); + assert(VT == InnerShuf->getValueType(0) && "Expected matching shuffle types"); + if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(CombinedMask, VT)) + return SDValue(); + + return DAG.getVectorShuffle(VT, SDLoc(OuterShuf), InnerShuf->getOperand(0), + InnerShuf->getOperand(1), CombinedMask); +} + /// If the shuffle mask is taking exactly one element from the first vector /// operand and passing through all other elements from the second vector /// operand, return the index of the mask element that is choosing an element @@ -19136,8 +20005,7 @@ static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf, // element used. Therefore, our new insert element occurs at the shuffle's // mask index value, not the insert's index value. // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C' - SDValue NewInsIndex = DAG.getConstant(ShufOp0Index, SDLoc(Shuf), - Op0.getOperand(2).getValueType()); + SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf)); return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(), Op1, Op0.getOperand(1), NewInsIndex); } @@ -19223,6 +20091,9 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { if (SDValue V = combineShuffleOfSplatVal(SVN, DAG)) return V; + if (SDValue V = formSplatFromShuffles(SVN, DAG)) + return V; + // If it is a splat, check if the argument vector is another splat or a // build_vector. if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) { @@ -19234,7 +20105,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { SDValue L = N0.getOperand(0), R = N0.getOperand(1); SDLoc DL(N); EVT EltVT = VT.getScalarType(); - SDValue Index = DAG.getIntPtrConstant(SplatIndex, DL); + SDValue Index = DAG.getVectorIdxConstant(SplatIndex, DL); SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index); SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index); SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR, @@ -19354,16 +20225,6 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() && N1.isUndef() && Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) { - auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) { - if (Scale == 1) - return SmallVector<int, 8>(Mask.begin(), Mask.end()); - - SmallVector<int, 8> NewMask; - for (int M : Mask) - for (int s = 0; s != Scale; ++s) - NewMask.push_back(M < 0 ? -1 : Scale * M + s); - return NewMask; - }; SDValue BC0 = peekThroughOneUseBitcasts(N0); if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) { @@ -19383,10 +20244,10 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { // Scale the shuffle masks to the smaller scalar type. ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0); - SmallVector<int, 8> InnerMask = - ScaleShuffleMask(InnerSVN->getMask(), InnerScale); - SmallVector<int, 8> OuterMask = - ScaleShuffleMask(SVN->getMask(), OuterScale); + SmallVector<int, 8> InnerMask; + SmallVector<int, 8> OuterMask; + narrowShuffleMaskElts(InnerScale, InnerSVN->getMask(), InnerMask); + narrowShuffleMaskElts(OuterScale, SVN->getMask(), OuterMask); // Merge the shuffle masks. SmallVector<int, 8> NewMask; @@ -19547,7 +20408,9 @@ SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) { // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern // with a VECTOR_SHUFFLE and possible truncate. - if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { + if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + VT.isFixedLengthVector() && + InVal->getOperand(0).getValueType().isFixedLengthVector()) { SDValue InVec = InVal->getOperand(0); SDValue EltNo = InVal->getOperand(1); auto InVecT = InVec.getValueType(); @@ -19576,11 +20439,10 @@ SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) { return LegalShuffle; // If not we must truncate the vector. if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) { - MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); - SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy); - EVT SubVT = - EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(), - VT.getVectorNumElements()); + SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N)); + EVT SubVT = EVT::getVectorVT(*DAG.getContext(), + InVecT.getVectorElementType(), + VT.getVectorNumElements()); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, LegalShuffle, ZeroIdx); } @@ -19597,6 +20459,7 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); + uint64_t InsIdx = N->getConstantOperandVal(2); // If inserting an UNDEF, just return the original vector. if (N1.isUndef()) @@ -19657,11 +20520,6 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0, N1.getOperand(1), N2); - if (!isa<ConstantSDNode>(N2)) - return SDValue(); - - uint64_t InsIdx = cast<ConstantSDNode>(N2)->getZExtValue(); - // Push subvector bitcasts to the output, adjusting the index as we go. // insert_subvector(bitcast(v), bitcast(s), c1) // -> bitcast(insert_subvector(v, s, c2)) @@ -19676,19 +20534,18 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { EVT NewVT; SDLoc DL(N); SDValue NewIdx; - MVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout()); LLVMContext &Ctx = *DAG.getContext(); unsigned NumElts = VT.getVectorNumElements(); unsigned EltSizeInBits = VT.getScalarSizeInBits(); if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) { unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits(); NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale); - NewIdx = DAG.getConstant(InsIdx * Scale, DL, IdxVT); + NewIdx = DAG.getVectorIdxConstant(InsIdx * Scale, DL); } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) { unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits; if ((NumElts % Scale) == 0 && (InsIdx % Scale) == 0) { NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts / Scale); - NewIdx = DAG.getConstant(InsIdx / Scale, DL, IdxVT); + NewIdx = DAG.getVectorIdxConstant(InsIdx / Scale, DL); } } if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) { @@ -19704,8 +20561,7 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { // (insert_subvector (insert_subvector A, Idx0), Idx1) // -> (insert_subvector (insert_subvector A, Idx1), Idx0) if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() && - N1.getValueType() == N0.getOperand(1).getValueType() && - isa<ConstantSDNode>(N0.getOperand(2))) { + N1.getValueType() == N0.getOperand(1).getValueType()) { unsigned OtherIdx = N0.getConstantOperandVal(2); if (InsIdx < OtherIdx) { // Swap nodes. @@ -19722,10 +20578,8 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() && N0.getOperand(0).getValueType() == N1.getValueType()) { unsigned Factor = N1.getValueType().getVectorNumElements(); - SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end()); - Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1; - + Ops[InsIdx / Factor] = N1; return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops); } @@ -19769,9 +20623,9 @@ SDValue DAGCombiner::visitVECREDUCE(SDNode *N) { // VECREDUCE over 1-element vector is just an extract. if (VT.getVectorNumElements() == 1) { SDLoc dl(N); - SDValue Res = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0, - DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue Res = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0, + DAG.getVectorIdxConstant(0, dl)); if (Res.getValueType() != N->getValueType(0)) Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res); return Res; @@ -19904,10 +20758,9 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) { return SDValue(); SDLoc DL(N); - SDValue IndexC = - DAG.getConstant(Index0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())); - SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, N0, IndexC); - SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, N1, IndexC); + SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL); + SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC); + SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC); SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags()); // If all lanes but 1 are undefined, no need to splat the scalar result. @@ -19937,6 +20790,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { SDValue Ops[] = {LHS, RHS}; EVT VT = N->getValueType(0); unsigned Opcode = N->getOpcode(); + SDNodeFlags Flags = N->getFlags(); // See if we can constant fold the vector operation. if (SDValue Fold = DAG.FoldConstantVectorArithmetic( @@ -19960,10 +20814,37 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) { SDLoc DL(N); SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0), - RHS.getOperand(0), N->getFlags()); + RHS.getOperand(0), Flags); SDValue UndefV = LHS.getOperand(1); return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask()); } + + // Try to sink a splat shuffle after a binop with a uniform constant. + // This is limited to cases where neither the shuffle nor the constant have + // undefined elements because that could be poison-unsafe or inhibit + // demanded elements analysis. It is further limited to not change a splat + // of an inserted scalar because that may be optimized better by + // load-folding or other target-specific behaviors. + if (isConstOrConstSplat(RHS) && Shuf0 && is_splat(Shuf0->getMask()) && + Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() && + Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) { + // binop (splat X), (splat C) --> splat (binop X, C) + SDLoc DL(N); + SDValue X = Shuf0->getOperand(0); + SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags); + return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT), + Shuf0->getMask()); + } + if (isConstOrConstSplat(LHS) && Shuf1 && is_splat(Shuf1->getMask()) && + Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() && + Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) { + // binop (splat C), (splat X) --> splat (binop C, X) + SDLoc DL(N); + SDValue X = Shuf1->getOperand(0); + SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags); + return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT), + Shuf1->getMask()); + } } // The following pattern is likely to emerge with vector reduction ops. Moving @@ -20361,8 +21242,8 @@ SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset( // Create a ConstantArray of the two constants. Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts); SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()), - TD.getPrefTypeAlignment(FPTy)); - unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); + TD.getPrefTypeAlign(FPTy)); + Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign(); // Get offsets to the 0 and 1 elements of the array, so we can select between // them. @@ -20797,7 +21678,10 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, EVT CCVT = getSetCCResultType(VT); ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT; DenormalMode DenormMode = DAG.getDenormalMode(VT); - if (DenormMode == DenormalMode::IEEE) { + if (DenormMode.Input == DenormalMode::IEEE) { + // This is specifically a check for the handling of denormal inputs, + // not the result. + // fabs(X) < SmallestNormal ? 0.0 : Est const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT); APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem); @@ -20849,9 +21733,11 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const { : (LSN->getAddressingMode() == ISD::PRE_DEC) ? -1 * C->getSExtValue() : 0; + uint64_t Size = + MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize()); return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(), Offset /*base offset*/, - Optional<int64_t>(LSN->getMemoryVT().getStoreSize()), + Optional<int64_t>(Size), LSN->getMemOperand()}; } if (const auto *LN = cast<LifetimeSDNode>(N)) @@ -20911,21 +21797,24 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const { // If we know required SrcValue1 and SrcValue2 have relatively large // alignment compared to the size and offset of the access, we may be able // to prove they do not alias. This check is conservative for now to catch - // cases created by splitting vector types. + // cases created by splitting vector types, it only works when the offsets are + // multiples of the size of the data. int64_t SrcValOffset0 = MUC0.MMO->getOffset(); int64_t SrcValOffset1 = MUC1.MMO->getOffset(); - unsigned OrigAlignment0 = MUC0.MMO->getBaseAlignment(); - unsigned OrigAlignment1 = MUC1.MMO->getBaseAlignment(); + Align OrigAlignment0 = MUC0.MMO->getBaseAlign(); + Align OrigAlignment1 = MUC1.MMO->getBaseAlign(); + auto &Size0 = MUC0.NumBytes; + auto &Size1 = MUC1.NumBytes; if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 && - MUC0.NumBytes.hasValue() && MUC1.NumBytes.hasValue() && - *MUC0.NumBytes == *MUC1.NumBytes && OrigAlignment0 > *MUC0.NumBytes) { - int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0; - int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1; + Size0.hasValue() && Size1.hasValue() && *Size0 == *Size1 && + OrigAlignment0 > *Size0 && SrcValOffset0 % *Size0 == 0 && + SrcValOffset1 % *Size1 == 0) { + int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value(); + int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value(); // There is no overlap between these relatively aligned accesses of // similar size. Return no alias. - if ((OffAlign0 + *MUC0.NumBytes) <= OffAlign1 || - (OffAlign1 + *MUC1.NumBytes) <= OffAlign0) + if ((OffAlign0 + *Size0) <= OffAlign1 || (OffAlign1 + *Size1) <= OffAlign0) return false; } @@ -20938,11 +21827,12 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const { UseAA = false; #endif - if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue()) { + if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue() && + Size0.hasValue() && Size1.hasValue()) { // Use alias analysis information. int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1); - int64_t Overlap0 = *MUC0.NumBytes + SrcValOffset0 - MinOffset; - int64_t Overlap1 = *MUC1.NumBytes + SrcValOffset1 - MinOffset; + int64_t Overlap0 = *Size0 + SrcValOffset0 - MinOffset; + int64_t Overlap1 = *Size1 + SrcValOffset1 - MinOffset; AliasResult AAResult = AA->alias( MemoryLocation(MUC0.MMO->getValue(), Overlap0, UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()), @@ -21099,10 +21989,10 @@ bool operator!=(const UnitT &, const UnitT &) { return false; } // redundant, as this function gets called when visiting every store // node, so why not let the work be done on each store as it's visited? // -// I believe this is mainly important because MergeConsecutiveStores +// I believe this is mainly important because mergeConsecutiveStores // is unable to deal with merging stores of different sizes, so unless // we improve the chains of all the potential candidates up-front -// before running MergeConsecutiveStores, it might only see some of +// before running mergeConsecutiveStores, it might only see some of // the nodes that will eventually be candidates, and then not be able // to go from a partially-merged state to the desired final // fully-merged state. @@ -21131,6 +22021,12 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) { if (BasePtr.getBase().isUndef()) return false; + // BaseIndexOffset assumes that offsets are fixed-size, which + // is not valid for scalable vectors where the offsets are + // scaled by `vscale`, so bail out early. + if (St->getMemoryVT().isScalableVector()) + return false; + // Add ST's interval. Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit); diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index 2bec8613e79c4..fc6c3a145f132 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -68,7 +68,6 @@ #include "llvm/IR/Argument.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" @@ -156,7 +155,7 @@ bool FastISel::lowerArguments() { for (Function::const_arg_iterator I = FuncInfo.Fn->arg_begin(), E = FuncInfo.Fn->arg_end(); I != E; ++I) { - DenseMap<const Value *, unsigned>::iterator VI = LocalValueMap.find(&*I); + DenseMap<const Value *, Register>::iterator VI = LocalValueMap.find(&*I); assert(VI != LocalValueMap.end() && "Missed an argument?"); FuncInfo.ValueMap[&*I] = VI->second; } @@ -165,8 +164,8 @@ bool FastISel::lowerArguments() { /// Return the defined register if this instruction defines exactly one /// virtual register and uses no other virtual registers. Otherwise return 0. -static unsigned findSinkableLocalRegDef(MachineInstr &MI) { - unsigned RegDef = 0; +static Register findSinkableLocalRegDef(MachineInstr &MI) { + Register RegDef; for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; @@ -174,9 +173,9 @@ static unsigned findSinkableLocalRegDef(MachineInstr &MI) { if (RegDef) return 0; RegDef = MO.getReg(); - } else if (Register::isVirtualRegister(MO.getReg())) { + } else if (MO.getReg().isVirtual()) { // This is another use of a vreg. Don't try to sink it. - return 0; + return Register(); } } return RegDef; @@ -202,7 +201,7 @@ void FastISel::flushLocalValueMap() { bool Store = true; if (!LocalMI.isSafeToMove(nullptr, Store)) continue; - unsigned DefReg = findSinkableLocalRegDef(LocalMI); + Register DefReg = findSinkableLocalRegDef(LocalMI); if (DefReg == 0) continue; @@ -217,7 +216,7 @@ void FastISel::flushLocalValueMap() { LastFlushPoint = FuncInfo.InsertPt; } -static bool isRegUsedByPhiNodes(unsigned DefReg, +static bool isRegUsedByPhiNodes(Register DefReg, FunctionLoweringInfo &FuncInfo) { for (auto &P : FuncInfo.PHINodesToUpdate) if (P.second == DefReg) @@ -225,6 +224,21 @@ static bool isRegUsedByPhiNodes(unsigned DefReg, return false; } +static bool isTerminatingEHLabel(MachineBasicBlock *MBB, MachineInstr &MI) { + // Ignore non-EH labels. + if (!MI.isEHLabel()) + return false; + + // Any EH label outside a landing pad must be for an invoke. Consider it a + // terminator. + if (!MBB->isEHPad()) + return true; + + // If this is a landingpad, the first non-phi instruction will be an EH_LABEL. + // Don't consider that label to be a terminator. + return MI.getIterator() != MBB->getFirstNonPHI(); +} + /// Build a map of instruction orders. Return the first terminator and its /// order. Consider EH_LABEL instructions to be terminators as well, since local /// values for phis after invokes must be materialized before the call. @@ -233,7 +247,7 @@ void FastISel::InstOrderMap::initialize( unsigned Order = 0; for (MachineInstr &I : *MBB) { if (!FirstTerminator && - (I.isTerminator() || (I.isEHLabel() && &I != &MBB->front()))) { + (I.isTerminator() || isTerminatingEHLabel(MBB, I))) { FirstTerminator = &I; FirstTerminatorOrder = Order; } @@ -246,7 +260,7 @@ void FastISel::InstOrderMap::initialize( } void FastISel::sinkLocalValueMaterialization(MachineInstr &LocalMI, - unsigned DefReg, + Register DefReg, InstOrderMap &OrderMap) { // If this register is used by a register fixup, MRI will not contain all // the uses until after register fixups, so don't attempt to sink or DCE @@ -341,7 +355,7 @@ bool FastISel::hasTrivialKill(const Value *V) { // Even the value might have only one use in the LLVM IR, it is possible that // FastISel might fold the use into another instruction and now there is more // than one use at the Machine Instruction level. - unsigned Reg = lookUpRegForValue(V); + Register Reg = lookUpRegForValue(V); if (Reg && !MRI.use_empty(Reg)) return false; @@ -359,11 +373,11 @@ bool FastISel::hasTrivialKill(const Value *V) { cast<Instruction>(*I->user_begin())->getParent() == I->getParent(); } -unsigned FastISel::getRegForValue(const Value *V) { +Register FastISel::getRegForValue(const Value *V) { EVT RealVT = TLI.getValueType(DL, V->getType(), /*AllowUnknown=*/true); // Don't handle non-simple values in FastISel. if (!RealVT.isSimple()) - return 0; + return Register(); // Ignore illegal types. We must do this before looking up the value // in ValueMap because Arguments are given virtual registers regardless @@ -374,11 +388,11 @@ unsigned FastISel::getRegForValue(const Value *V) { if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) VT = TLI.getTypeToTransformTo(V->getContext(), VT).getSimpleVT(); else - return 0; + return Register(); } // Look up the value to see if we already have a register for it. - unsigned Reg = lookUpRegForValue(V); + Register Reg = lookUpRegForValue(V); if (Reg) return Reg; @@ -400,8 +414,8 @@ unsigned FastISel::getRegForValue(const Value *V) { return Reg; } -unsigned FastISel::materializeConstant(const Value *V, MVT VT) { - unsigned Reg = 0; +Register FastISel::materializeConstant(const Value *V, MVT VT) { + Register Reg; if (const auto *CI = dyn_cast<ConstantInt>(V)) { if (CI->getValue().getActiveBits() <= 64) Reg = fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue()); @@ -428,9 +442,9 @@ unsigned FastISel::materializeConstant(const Value *V, MVT VT) { bool isExact; (void)Flt.convertToInteger(SIntVal, APFloat::rmTowardZero, &isExact); if (isExact) { - unsigned IntegerReg = + Register IntegerReg = getRegForValue(ConstantInt::get(V->getContext(), SIntVal)); - if (IntegerReg != 0) + if (IntegerReg) Reg = fastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, IntegerReg, /*Kill=*/false); } @@ -452,8 +466,8 @@ unsigned FastISel::materializeConstant(const Value *V, MVT VT) { /// Helper for getRegForValue. This function is called when the value isn't /// already available in a register and must be materialized with new /// instructions. -unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { - unsigned Reg = 0; +Register FastISel::materializeRegForValue(const Value *V, MVT VT) { + Register Reg; // Give the target-specific code a try first. if (isa<Constant>(V)) Reg = fastMaterializeConstant(cast<Constant>(V)); @@ -472,25 +486,25 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { return Reg; } -unsigned FastISel::lookUpRegForValue(const Value *V) { +Register FastISel::lookUpRegForValue(const Value *V) { // Look up the value to see if we already have a register for it. We // cache values defined by Instructions across blocks, and other values // only locally. This is because Instructions already have the SSA // def-dominates-use requirement enforced. - DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V); + DenseMap<const Value *, Register>::iterator I = FuncInfo.ValueMap.find(V); if (I != FuncInfo.ValueMap.end()) return I->second; return LocalValueMap[V]; } -void FastISel::updateValueMap(const Value *I, unsigned Reg, unsigned NumRegs) { +void FastISel::updateValueMap(const Value *I, Register Reg, unsigned NumRegs) { if (!isa<Instruction>(I)) { LocalValueMap[I] = Reg; return; } - unsigned &AssignedReg = FuncInfo.ValueMap[I]; - if (AssignedReg == 0) + Register &AssignedReg = FuncInfo.ValueMap[I]; + if (!AssignedReg) // Use the new register. AssignedReg = Reg; else if (Reg != AssignedReg) { @@ -504,11 +518,11 @@ void FastISel::updateValueMap(const Value *I, unsigned Reg, unsigned NumRegs) { } } -std::pair<unsigned, bool> FastISel::getRegForGEPIndex(const Value *Idx) { - unsigned IdxN = getRegForValue(Idx); - if (IdxN == 0) +std::pair<Register, bool> FastISel::getRegForGEPIndex(const Value *Idx) { + Register IdxN = getRegForValue(Idx); + if (!IdxN) // Unhandled operand. Halt "fast" selection and bail. - return std::pair<unsigned, bool>(0, false); + return std::pair<Register, bool>(Register(), false); bool IdxNIsKill = hasTrivialKill(Idx); @@ -524,7 +538,7 @@ std::pair<unsigned, bool> FastISel::getRegForGEPIndex(const Value *Idx) { fastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE, IdxN, IdxNIsKill); IdxNIsKill = true; } - return std::pair<unsigned, bool>(IdxN, IdxNIsKill); + return std::pair<Register, bool>(IdxN, IdxNIsKill); } void FastISel::recomputeInsertPt() { @@ -605,12 +619,12 @@ bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) { // we don't have anything that canonicalizes operand order. if (const auto *CI = dyn_cast<ConstantInt>(I->getOperand(0))) if (isa<Instruction>(I) && cast<Instruction>(I)->isCommutative()) { - unsigned Op1 = getRegForValue(I->getOperand(1)); + Register Op1 = getRegForValue(I->getOperand(1)); if (!Op1) return false; bool Op1IsKill = hasTrivialKill(I->getOperand(1)); - unsigned ResultReg = + Register ResultReg = fastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op1, Op1IsKill, CI->getZExtValue(), VT.getSimpleVT()); if (!ResultReg) @@ -621,7 +635,7 @@ bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) { return true; } - unsigned Op0 = getRegForValue(I->getOperand(0)); + Register Op0 = getRegForValue(I->getOperand(0)); if (!Op0) // Unhandled operand. Halt "fast" selection and bail. return false; bool Op0IsKill = hasTrivialKill(I->getOperand(0)); @@ -644,7 +658,7 @@ bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) { ISDOpcode = ISD::AND; } - unsigned ResultReg = fastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0, + Register ResultReg = fastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0, Op0IsKill, Imm, VT.getSimpleVT()); if (!ResultReg) return false; @@ -654,13 +668,13 @@ bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) { return true; } - unsigned Op1 = getRegForValue(I->getOperand(1)); + Register Op1 = getRegForValue(I->getOperand(1)); if (!Op1) // Unhandled operand. Halt "fast" selection and bail. return false; bool Op1IsKill = hasTrivialKill(I->getOperand(1)); // Now we have both operands in registers. Emit the instruction. - unsigned ResultReg = fastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(), + Register ResultReg = fastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(), ISDOpcode, Op0, Op0IsKill, Op1, Op1IsKill); if (!ResultReg) // Target-specific code wasn't able to find a machine opcode for @@ -673,7 +687,7 @@ bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) { } bool FastISel::selectGetElementPtr(const User *I) { - unsigned N = getRegForValue(I->getOperand(0)); + Register N = getRegForValue(I->getOperand(0)); if (!N) // Unhandled operand. Halt "fast" selection and bail. return false; bool NIsKill = hasTrivialKill(I->getOperand(0)); @@ -729,8 +743,8 @@ bool FastISel::selectGetElementPtr(const User *I) { // N = N + Idx * ElementSize; uint64_t ElementSize = DL.getTypeAllocSize(Ty); - std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx); - unsigned IdxN = Pair.first; + std::pair<Register, bool> Pair = getRegForGEPIndex(Idx); + Register IdxN = Pair.first; bool IdxNIsKill = Pair.second; if (!IdxN) // Unhandled operand. Halt "fast" selection and bail. return false; @@ -778,7 +792,7 @@ bool FastISel::addStackMapLiveVars(SmallVectorImpl<MachineOperand> &Ops, else return false; } else { - unsigned Reg = getRegForValue(Val); + Register Reg = getRegForValue(Val); if (!Reg) return false; Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/false)); @@ -871,7 +885,6 @@ bool FastISel::lowerCallOperands(const CallInst *CI, unsigned ArgIdx, Args.reserve(NumArgs); // Populate the argument list. - ImmutableCallSite CS(CI); for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs; ArgI != ArgE; ++ArgI) { Value *V = CI->getOperand(ArgI); @@ -880,7 +893,7 @@ bool FastISel::lowerCallOperands(const CallInst *CI, unsigned ArgIdx, ArgListEntry Entry; Entry.Val = V; Entry.Ty = V->getType(); - Entry.setAttributes(&CS, ArgI); + Entry.setAttributes(CI, ArgI); Args.push_back(Entry); } @@ -987,7 +1000,7 @@ bool FastISel::selectPatchpoint(const CallInst *I) { // place these in any free register. if (IsAnyRegCC) { for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i) { - unsigned Reg = getRegForValue(I->getArgOperand(i)); + Register Reg = getRegForValue(I->getArgOperand(i)); if (!Reg) return false; Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/false)); @@ -1104,10 +1117,8 @@ bool FastISel::lowerCallTo(const CallInst *CI, const char *SymName, bool FastISel::lowerCallTo(const CallInst *CI, MCSymbol *Symbol, unsigned NumArgs) { - ImmutableCallSite CS(CI); - - FunctionType *FTy = CS.getFunctionType(); - Type *RetTy = CS.getType(); + FunctionType *FTy = CI->getFunctionType(); + Type *RetTy = CI->getType(); ArgListTy Args; Args.reserve(NumArgs); @@ -1122,13 +1133,13 @@ bool FastISel::lowerCallTo(const CallInst *CI, MCSymbol *Symbol, ArgListEntry Entry; Entry.Val = V; Entry.Ty = V->getType(); - Entry.setAttributes(&CS, ArgI); + Entry.setAttributes(CI, ArgI); Args.push_back(Entry); } - TLI.markLibCallAttributes(MF, CS.getCallingConv(), Args); + TLI.markLibCallAttributes(MF, CI->getCallingConv(), Args); CallLoweringInfo CLI; - CLI.setCallee(RetTy, FTy, Symbol, std::move(Args), CS, NumArgs); + CLI.setCallee(RetTy, FTy, Symbol, std::move(Args), *CI, NumArgs); return lowerCallTo(CLI); } @@ -1203,7 +1214,16 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) { // the various CC lowering callbacks. Flags.setByVal(); } - if (Arg.IsByVal || Arg.IsInAlloca) { + if (Arg.IsPreallocated) { + Flags.setPreallocated(); + // Set the byval flag for CCAssignFn callbacks that don't know about + // preallocated. This way we can know how many bytes we should've + // allocated and how many bytes a callee cleanup function will pop. If we + // port preallocated to more targets, we'll have to add custom + // preallocated handling in the various CC lowering callbacks. + Flags.setByVal(); + } + if (Arg.IsByVal || Arg.IsInAlloca || Arg.IsPreallocated) { PointerType *Ty = cast<PointerType>(Arg.Ty); Type *ElementTy = Ty->getElementType(); unsigned FrameSize = @@ -1211,17 +1231,17 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) { // For ByVal, alignment should come from FE. BE will guess if this info // is not there, but there are cases it cannot get right. - unsigned FrameAlign = Arg.Alignment; + MaybeAlign FrameAlign = Arg.Alignment; if (!FrameAlign) - FrameAlign = TLI.getByValTypeAlignment(ElementTy, DL); + FrameAlign = Align(TLI.getByValTypeAlignment(ElementTy, DL)); Flags.setByValSize(FrameSize); - Flags.setByValAlign(Align(FrameAlign)); + Flags.setByValAlign(*FrameAlign); } if (Arg.IsNest) Flags.setNest(); if (NeedsRegBlock) Flags.setInConsecutiveRegs(); - Flags.setOrigAlign(Align(DL.getABITypeAlignment(Arg.Ty))); + Flags.setOrigAlign(DL.getABITypeAlign(Arg.Ty)); CLI.OutVals.push_back(Arg.Val); CLI.OutFlags.push_back(Flags); @@ -1234,29 +1254,26 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) { assert(CLI.Call && "No call instruction specified."); CLI.Call->setPhysRegsDeadExcept(CLI.InRegs, TRI); - if (CLI.NumResultRegs && CLI.CS) - updateValueMap(CLI.CS->getInstruction(), CLI.ResultReg, CLI.NumResultRegs); + if (CLI.NumResultRegs && CLI.CB) + updateValueMap(CLI.CB, CLI.ResultReg, CLI.NumResultRegs); // Set labels for heapallocsite call. - if (CLI.CS) - if (MDNode *MD = CLI.CS->getInstruction()->getMetadata("heapallocsite")) + if (CLI.CB) + if (MDNode *MD = CLI.CB->getMetadata("heapallocsite")) CLI.Call->setHeapAllocMarker(*MF, MD); return true; } bool FastISel::lowerCall(const CallInst *CI) { - ImmutableCallSite CS(CI); - - FunctionType *FuncTy = CS.getFunctionType(); - Type *RetTy = CS.getType(); + FunctionType *FuncTy = CI->getFunctionType(); + Type *RetTy = CI->getType(); ArgListTy Args; ArgListEntry Entry; - Args.reserve(CS.arg_size()); + Args.reserve(CI->arg_size()); - for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); - i != e; ++i) { + for (auto i = CI->arg_begin(), e = CI->arg_end(); i != e; ++i) { Value *V = *i; // Skip empty types @@ -1267,14 +1284,14 @@ bool FastISel::lowerCall(const CallInst *CI) { Entry.Ty = V->getType(); // Skip the first return-type Attribute to get to params. - Entry.setAttributes(&CS, i - CS.arg_begin()); + Entry.setAttributes(CI, i - CI->arg_begin()); Args.push_back(Entry); } // Check if target-independent constraints permit a tail call here. // Target-dependent constraints are checked within fastLowerCall. bool IsTailCall = CI->isTailCall(); - if (IsTailCall && !isInTailCallPosition(CS, TM)) + if (IsTailCall && !isInTailCallPosition(*CI, TM)) IsTailCall = false; if (IsTailCall && MF->getFunction() .getFnAttribute("disable-tail-calls") @@ -1282,7 +1299,7 @@ bool FastISel::lowerCall(const CallInst *CI) { IsTailCall = false; CallLoweringInfo CLI; - CLI.setCallee(RetTy, FuncTy, CI->getCalledValue(), std::move(Args), CS) + CLI.setCallee(RetTy, FuncTy, CI->getCalledOperand(), std::move(Args), *CI) .setTailCall(IsTailCall); return lowerCallTo(CLI); @@ -1292,7 +1309,7 @@ bool FastISel::selectCall(const User *I) { const CallInst *Call = cast<CallInst>(I); // Handle simple inline asms. - if (const InlineAsm *IA = dyn_cast<InlineAsm>(Call->getCalledValue())) { + if (const InlineAsm *IA = dyn_cast<InlineAsm>(Call->getCalledOperand())) { // If the inline asm has side effects, then make sure that no local value // lives across by flushing the local value map. if (IA->hasSideEffects()) @@ -1307,12 +1324,19 @@ bool FastISel::selectCall(const User *I) { ExtraInfo |= InlineAsm::Extra_HasSideEffects; if (IA->isAlignStack()) ExtraInfo |= InlineAsm::Extra_IsAlignStack; + if (Call->isConvergent()) + ExtraInfo |= InlineAsm::Extra_IsConvergent; ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::INLINEASM)) - .addExternalSymbol(IA->getAsmString().c_str()) - .addImm(ExtraInfo); + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::INLINEASM)); + MIB.addExternalSymbol(IA->getAsmString().c_str()); + MIB.addImm(ExtraInfo); + + const MDNode *SrcLoc = Call->getMetadata("srcloc"); + if (SrcLoc) + MIB.addMetadata(SrcLoc); + return true; } @@ -1350,13 +1374,15 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { const DbgDeclareInst *DI = cast<DbgDeclareInst>(II); assert(DI->getVariable() && "Missing variable"); if (!FuncInfo.MF->getMMI().hasDebugInfo()) { - LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); + LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI + << " (!hasDebugInfo)\n"); return true; } const Value *Address = DI->getAddress(); if (!Address || isa<UndefValue>(Address)) { - LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); + LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI + << " (bad/undef address)\n"); return true; } @@ -1368,7 +1394,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { return true; Optional<MachineOperand> Op; - if (unsigned Reg = lookUpRegForValue(Address)) + if (Register Reg = lookUpRegForValue(Address)) Op = MachineOperand::CreateReg(Reg, false); // If we have a VLA that has a "use" in a metadata node that's then used @@ -1393,15 +1419,14 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { "Expected inlined-at fields to agree"); // A dbg.declare describes the address of a source variable, so lower it // into an indirect DBG_VALUE. - auto *Expr = DI->getExpression(); - Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref}); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ false, - *Op, DI->getVariable(), Expr); + TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true, + *Op, DI->getVariable(), DI->getExpression()); } else { // We can't yet handle anything else here because it would require // generating code, thus altering codegen because of debug info. - LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); + LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI + << " (no materialized reg for address)\n"); } return true; } @@ -1412,38 +1437,37 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { const Value *V = DI->getValue(); assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) && "Expected inlined-at fields to agree"); - if (!V) { + if (!V || isa<UndefValue>(V)) { // Currently the optimizer can produce this; insert an undef to - // help debugging. Probably the optimizer should not do this. + // help debugging. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, false, 0U, DI->getVariable(), DI->getExpression()); } else if (const auto *CI = dyn_cast<ConstantInt>(V)) { if (CI->getBitWidth() > 64) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addCImm(CI) - .addReg(0U) + .addImm(0U) .addMetadata(DI->getVariable()) .addMetadata(DI->getExpression()); else BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addImm(CI->getZExtValue()) - .addReg(0U) + .addImm(0U) .addMetadata(DI->getVariable()) .addMetadata(DI->getExpression()); } else if (const auto *CF = dyn_cast<ConstantFP>(V)) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addFPImm(CF) - .addReg(0U) + .addImm(0U) .addMetadata(DI->getVariable()) .addMetadata(DI->getExpression()); - } else if (unsigned Reg = lookUpRegForValue(V)) { + } else if (Register Reg = lookUpRegForValue(V)) { // FIXME: This does not handle register-indirect values at offset 0. bool IsIndirect = false; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, IsIndirect, Reg, DI->getVariable(), DI->getExpression()); } else { - // We can't yet handle anything else here because it would require - // generating code, thus altering codegen because of debug info. + // We don't know how to handle other cases, so we drop. LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); } return true; @@ -1469,7 +1493,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { case Intrinsic::launder_invariant_group: case Intrinsic::strip_invariant_group: case Intrinsic::expect: { - unsigned ResultReg = getRegForValue(II->getArgOperand(0)); + Register ResultReg = getRegForValue(II->getArgOperand(0)); if (!ResultReg) return false; updateValueMap(II, ResultReg); @@ -1507,14 +1531,14 @@ bool FastISel::selectCast(const User *I, unsigned Opcode) { if (!TLI.isTypeLegal(SrcVT)) return false; - unsigned InputReg = getRegForValue(I->getOperand(0)); + Register InputReg = getRegForValue(I->getOperand(0)); if (!InputReg) // Unhandled operand. Halt "fast" selection and bail. return false; bool InputRegIsKill = hasTrivialKill(I->getOperand(0)); - unsigned ResultReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), + Register ResultReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opcode, InputReg, InputRegIsKill); if (!ResultReg) return false; @@ -1526,7 +1550,7 @@ bool FastISel::selectCast(const User *I, unsigned Opcode) { bool FastISel::selectBitCast(const User *I) { // If the bitcast doesn't change the type, just use the operand value. if (I->getType() == I->getOperand(0)->getType()) { - unsigned Reg = getRegForValue(I->getOperand(0)); + Register Reg = getRegForValue(I->getOperand(0)); if (!Reg) return false; updateValueMap(I, Reg); @@ -1543,13 +1567,13 @@ bool FastISel::selectBitCast(const User *I) { MVT SrcVT = SrcEVT.getSimpleVT(); MVT DstVT = DstEVT.getSimpleVT(); - unsigned Op0 = getRegForValue(I->getOperand(0)); + Register Op0 = getRegForValue(I->getOperand(0)); if (!Op0) // Unhandled operand. Halt "fast" selection and bail. return false; bool Op0IsKill = hasTrivialKill(I->getOperand(0)); // First, try to perform the bitcast by inserting a reg-reg copy. - unsigned ResultReg = 0; + Register ResultReg; if (SrcVT == DstVT) { const TargetRegisterClass *SrcClass = TLI.getRegClassFor(SrcVT); const TargetRegisterClass *DstClass = TLI.getRegClassFor(DstVT); @@ -1572,6 +1596,27 @@ bool FastISel::selectBitCast(const User *I) { return true; } +bool FastISel::selectFreeze(const User *I) { + Register Reg = getRegForValue(I->getOperand(0)); + if (!Reg) + // Unhandled operand. + return false; + + EVT ETy = TLI.getValueType(DL, I->getOperand(0)->getType()); + if (ETy == MVT::Other || !TLI.isTypeLegal(ETy)) + // Unhandled type, bail out. + return false; + + MVT Ty = ETy.getSimpleVT(); + const TargetRegisterClass *TyRegClass = TLI.getRegClassFor(Ty); + Register ResultReg = createResultReg(TyRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(Reg); + + updateValueMap(I, ResultReg); + return true; +} + // Remove local value instructions starting from the instruction after // SavedLastLocalValue to the current function insert point. void FastISel::removeDeadLocalValueCode(MachineInstr *SavedLastLocalValue) @@ -1607,9 +1652,9 @@ bool FastISel::selectInstruction(const Instruction *I) { } // FastISel does not handle any operand bundles except OB_funclet. - if (ImmutableCallSite CS = ImmutableCallSite(I)) - for (unsigned i = 0, e = CS.getNumOperandBundles(); i != e; ++i) - if (CS.getOperandBundleAt(i).getTagID() != LLVMContext::OB_funclet) + if (auto *Call = dyn_cast<CallBase>(I)) + for (unsigned i = 0, e = Call->getNumOperandBundles(); i != e; ++i) + if (Call->getOperandBundleAt(i).getTagID() != LLVMContext::OB_funclet) return false; DbgLoc = I->getDebugLoc(); @@ -1710,14 +1755,14 @@ void FastISel::finishCondBranch(const BasicBlock *BranchBB, /// Emit an FNeg operation. bool FastISel::selectFNeg(const User *I, const Value *In) { - unsigned OpReg = getRegForValue(In); + Register OpReg = getRegForValue(In); if (!OpReg) return false; bool OpRegIsKill = hasTrivialKill(In); // If the target has ISD::FNEG, use it. EVT VT = TLI.getValueType(DL, I->getType()); - unsigned ResultReg = fastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(), ISD::FNEG, + Register ResultReg = fastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(), ISD::FNEG, OpReg, OpRegIsKill); if (ResultReg) { updateValueMap(I, ResultReg); @@ -1732,12 +1777,12 @@ bool FastISel::selectFNeg(const User *I, const Value *In) { if (!TLI.isTypeLegal(IntVT)) return false; - unsigned IntReg = fastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(), + Register IntReg = fastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(), ISD::BITCAST, OpReg, OpRegIsKill); if (!IntReg) return false; - unsigned IntResultReg = fastEmit_ri_( + Register IntResultReg = fastEmit_ri_( IntVT.getSimpleVT(), ISD::XOR, IntReg, /*IsKill=*/true, UINT64_C(1) << (VT.getSizeInBits() - 1), IntVT.getSimpleVT()); if (!IntResultReg) @@ -1771,7 +1816,7 @@ bool FastISel::selectExtractValue(const User *U) { // Get the base result register. unsigned ResultReg; - DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(Op0); + DenseMap<const Value *, Register>::iterator I = FuncInfo.ValueMap.find(Op0); if (I != FuncInfo.ValueMap.end()) ResultReg = I->second; else if (isa<Instruction>(Op0)) @@ -1903,7 +1948,7 @@ bool FastISel::selectOperator(const User *I, unsigned Opcode) { return selectCast(I, ISD::ZERO_EXTEND); if (DstVT.bitsLT(SrcVT)) return selectCast(I, ISD::TRUNCATE); - unsigned Reg = getRegForValue(I->getOperand(0)); + Register Reg = getRegForValue(I->getOperand(0)); if (!Reg) return false; updateValueMap(I, Reg); @@ -1913,6 +1958,9 @@ bool FastISel::selectOperator(const User *I, unsigned Opcode) { case Instruction::ExtractValue: return selectExtractValue(I); + case Instruction::Freeze: + return selectFreeze(I); + case Instruction::PHI: llvm_unreachable("FastISel shouldn't visit PHI nodes!"); @@ -1975,7 +2023,7 @@ unsigned FastISel::fastEmit_ri(MVT, MVT, unsigned, unsigned /*Op0*/, /// instruction with an immediate operand using fastEmit_ri. /// If that fails, it materializes the immediate into a register and try /// fastEmit_rr instead. -unsigned FastISel::fastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0, +Register FastISel::fastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0, bool Op0IsKill, uint64_t Imm, MVT ImmType) { // If this is a multiply by a power of two, emit this as a shift left. if (Opcode == ISD::MUL && isPowerOf2_64(Imm)) { @@ -1994,10 +2042,10 @@ unsigned FastISel::fastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0, return 0; // First check if immediate type is legal. If not, we can't use the ri form. - unsigned ResultReg = fastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm); + Register ResultReg = fastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm); if (ResultReg) return ResultReg; - unsigned MaterialReg = fastEmit_i(ImmType, ImmType, ISD::Constant, Imm); + Register MaterialReg = fastEmit_i(ImmType, ImmType, ISD::Constant, Imm); bool IsImmKill = true; if (!MaterialReg) { // This is a bit ugly/slow, but failing here means falling out of @@ -2018,19 +2066,19 @@ unsigned FastISel::fastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0, return fastEmit_rr(VT, VT, Opcode, Op0, Op0IsKill, MaterialReg, IsImmKill); } -unsigned FastISel::createResultReg(const TargetRegisterClass *RC) { +Register FastISel::createResultReg(const TargetRegisterClass *RC) { return MRI.createVirtualRegister(RC); } -unsigned FastISel::constrainOperandRegClass(const MCInstrDesc &II, unsigned Op, +Register FastISel::constrainOperandRegClass(const MCInstrDesc &II, Register Op, unsigned OpNum) { - if (Register::isVirtualRegister(Op)) { + if (Op.isVirtual()) { const TargetRegisterClass *RegClass = TII.getRegClass(II, OpNum, &TRI, *FuncInfo.MF); if (!MRI.constrainRegClass(Op, RegClass)) { // If it's not legal to COPY between the register classes, something // has gone very wrong before we got here. - unsigned NewOp = createResultReg(RegClass); + Register NewOp = createResultReg(RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), NewOp).addReg(Op); return NewOp; @@ -2039,21 +2087,21 @@ unsigned FastISel::constrainOperandRegClass(const MCInstrDesc &II, unsigned Op, return Op; } -unsigned FastISel::fastEmitInst_(unsigned MachineInstOpcode, +Register FastISel::fastEmitInst_(unsigned MachineInstOpcode, const TargetRegisterClass *RC) { - unsigned ResultReg = createResultReg(RC); + Register ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg); return ResultReg; } -unsigned FastISel::fastEmitInst_r(unsigned MachineInstOpcode, +Register FastISel::fastEmitInst_r(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill) { const MCInstrDesc &II = TII.get(MachineInstOpcode); - unsigned ResultReg = createResultReg(RC); + Register ResultReg = createResultReg(RC); Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); if (II.getNumDefs() >= 1) @@ -2069,13 +2117,13 @@ unsigned FastISel::fastEmitInst_r(unsigned MachineInstOpcode, return ResultReg; } -unsigned FastISel::fastEmitInst_rr(unsigned MachineInstOpcode, +Register FastISel::fastEmitInst_rr(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill) { const MCInstrDesc &II = TII.get(MachineInstOpcode); - unsigned ResultReg = createResultReg(RC); + Register ResultReg = createResultReg(RC); Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1); @@ -2093,14 +2141,14 @@ unsigned FastISel::fastEmitInst_rr(unsigned MachineInstOpcode, return ResultReg; } -unsigned FastISel::fastEmitInst_rrr(unsigned MachineInstOpcode, +Register FastISel::fastEmitInst_rrr(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill, unsigned Op2, bool Op2IsKill) { const MCInstrDesc &II = TII.get(MachineInstOpcode); - unsigned ResultReg = createResultReg(RC); + Register ResultReg = createResultReg(RC); Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1); Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 2); @@ -2121,12 +2169,12 @@ unsigned FastISel::fastEmitInst_rrr(unsigned MachineInstOpcode, return ResultReg; } -unsigned FastISel::fastEmitInst_ri(unsigned MachineInstOpcode, +Register FastISel::fastEmitInst_ri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, uint64_t Imm) { const MCInstrDesc &II = TII.get(MachineInstOpcode); - unsigned ResultReg = createResultReg(RC); + Register ResultReg = createResultReg(RC); Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); if (II.getNumDefs() >= 1) @@ -2143,13 +2191,13 @@ unsigned FastISel::fastEmitInst_ri(unsigned MachineInstOpcode, return ResultReg; } -unsigned FastISel::fastEmitInst_rii(unsigned MachineInstOpcode, +Register FastISel::fastEmitInst_rii(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, uint64_t Imm1, uint64_t Imm2) { const MCInstrDesc &II = TII.get(MachineInstOpcode); - unsigned ResultReg = createResultReg(RC); + Register ResultReg = createResultReg(RC); Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); if (II.getNumDefs() >= 1) @@ -2168,12 +2216,12 @@ unsigned FastISel::fastEmitInst_rii(unsigned MachineInstOpcode, return ResultReg; } -unsigned FastISel::fastEmitInst_f(unsigned MachineInstOpcode, +Register FastISel::fastEmitInst_f(unsigned MachineInstOpcode, const TargetRegisterClass *RC, const ConstantFP *FPImm) { const MCInstrDesc &II = TII.get(MachineInstOpcode); - unsigned ResultReg = createResultReg(RC); + Register ResultReg = createResultReg(RC); if (II.getNumDefs() >= 1) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) @@ -2187,13 +2235,13 @@ unsigned FastISel::fastEmitInst_f(unsigned MachineInstOpcode, return ResultReg; } -unsigned FastISel::fastEmitInst_rri(unsigned MachineInstOpcode, +Register FastISel::fastEmitInst_rri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill, uint64_t Imm) { const MCInstrDesc &II = TII.get(MachineInstOpcode); - unsigned ResultReg = createResultReg(RC); + Register ResultReg = createResultReg(RC); Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1); @@ -2213,9 +2261,9 @@ unsigned FastISel::fastEmitInst_rri(unsigned MachineInstOpcode, return ResultReg; } -unsigned FastISel::fastEmitInst_i(unsigned MachineInstOpcode, +Register FastISel::fastEmitInst_i(unsigned MachineInstOpcode, const TargetRegisterClass *RC, uint64_t Imm) { - unsigned ResultReg = createResultReg(RC); + Register ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) @@ -2229,9 +2277,9 @@ unsigned FastISel::fastEmitInst_i(unsigned MachineInstOpcode, return ResultReg; } -unsigned FastISel::fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, +Register FastISel::fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, bool Op0IsKill, uint32_t Idx) { - unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); + Register ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); assert(Register::isVirtualRegister(Op0) && "Cannot yet extract from physregs"); const TargetRegisterClass *RC = MRI.getRegClass(Op0); @@ -2243,7 +2291,7 @@ unsigned FastISel::fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, /// Emit MachineInstrs to compute the value of Op with all but the least /// significant bit set to zero. -unsigned FastISel::fastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) { +Register FastISel::fastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) { return fastEmit_ri(VT, VT, ISD::AND, Op0, Op0IsKill, 1); } @@ -2305,7 +2353,7 @@ bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { if (const auto *Inst = dyn_cast<Instruction>(PHIOp)) DbgLoc = Inst->getDebugLoc(); - unsigned Reg = getRegForValue(PHIOp); + Register Reg = getRegForValue(PHIOp); if (!Reg) { FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate); return false; @@ -2351,7 +2399,7 @@ bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) { // Figure out which vreg this is going into. If there is no assigned vreg yet // then there actually was no reference to it. Perhaps the load is referenced // by a dead instruction. - unsigned LoadReg = getRegForValue(LI); + Register LoadReg = getRegForValue(LI); if (!LoadReg) return false; @@ -2394,18 +2442,18 @@ MachineMemOperand * FastISel::createMachineMemOperandFor(const Instruction *I) const { const Value *Ptr; Type *ValTy; - unsigned Alignment; + MaybeAlign Alignment; MachineMemOperand::Flags Flags; bool IsVolatile; if (const auto *LI = dyn_cast<LoadInst>(I)) { - Alignment = LI->getAlignment(); + Alignment = LI->getAlign(); IsVolatile = LI->isVolatile(); Flags = MachineMemOperand::MOLoad; Ptr = LI->getPointerOperand(); ValTy = LI->getType(); } else if (const auto *SI = dyn_cast<StoreInst>(I)) { - Alignment = SI->getAlignment(); + Alignment = SI->getAlign(); IsVolatile = SI->isVolatile(); Flags = MachineMemOperand::MOStore; Ptr = SI->getPointerOperand(); @@ -2421,8 +2469,8 @@ FastISel::createMachineMemOperandFor(const Instruction *I) const { AAMDNodes AAInfo; I->getAAMetadata(AAInfo); - if (Alignment == 0) // Ensure that codegen never sees alignment 0. - Alignment = DL.getABITypeAlignment(ValTy); + if (!Alignment) // Ensure that codegen never sees alignment 0. + Alignment = DL.getABITypeAlign(ValTy); unsigned Size = DL.getTypeStoreSize(ValTy); @@ -2436,7 +2484,7 @@ FastISel::createMachineMemOperandFor(const Instruction *I) const { Flags |= MachineMemOperand::MOInvariant; return FuncInfo.MF->getMachineMemOperand(MachinePointerInfo(Ptr), Flags, Size, - Alignment, AAInfo, Ranges); + *Alignment, AAInfo, Ranges); } CmpInst::Predicate FastISel::optimizeCmpPredicate(const CmpInst *CI) const { diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index fa33400cd4b3f..5cf83cff3a903 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/ADT/APInt.h" #include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -85,7 +86,6 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, TLI = MF->getSubtarget().getTargetLowering(); RegInfo = &MF->getRegInfo(); const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); - unsigned StackAlign = TFI->getStackAlignment(); DA = DAG->getDivergenceAnalysis(); // Check whether the function can return without sret-demotion. @@ -130,19 +130,31 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, // Initialize the mapping of values to registers. This is only set up for // instruction values that are used outside of the block that defines // them. + const Align StackAlign = TFI->getStackAlign(); for (const BasicBlock &BB : *Fn) { for (const Instruction &I : BB) { if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) { Type *Ty = AI->getAllocatedType(); - unsigned Align = - std::max((unsigned)MF->getDataLayout().getPrefTypeAlignment(Ty), - AI->getAlignment()); + Align TyPrefAlign = MF->getDataLayout().getPrefTypeAlign(Ty); + // The "specified" alignment is the alignment written on the alloca, + // or the preferred alignment of the type if none is specified. + // + // (Unspecified alignment on allocas will be going away soon.) + Align SpecifiedAlign = AI->getAlign(); + + // If the preferred alignment of the type is higher than the specified + // alignment of the alloca, promote the alignment, as long as it doesn't + // require realigning the stack. + // + // FIXME: Do we really want to second-guess the IR in isel? + Align Alignment = + std::max(std::min(TyPrefAlign, StackAlign), SpecifiedAlign); // Static allocas can be folded into the initial stack frame // adjustment. For targets that don't realign the stack, don't // do this if there is an extra alignment requirement. if (AI->isStaticAlloca() && - (TFI->isStackRealignable() || (Align <= StackAlign))) { + (TFI->isStackRealignable() || (Alignment <= StackAlign))) { const ConstantInt *CUI = cast<ConstantInt>(AI->getArraySize()); uint64_t TySize = MF->getDataLayout().getTypeAllocSize(Ty).getKnownMinSize(); @@ -154,15 +166,15 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, if (Iter != CatchObjects.end() && TLI->needsFixedCatchObjects()) { FrameIndex = MF->getFrameInfo().CreateFixedObject( TySize, 0, /*IsImmutable=*/false, /*isAliased=*/true); - MF->getFrameInfo().setObjectAlignment(FrameIndex, Align); + MF->getFrameInfo().setObjectAlignment(FrameIndex, Alignment); } else { - FrameIndex = - MF->getFrameInfo().CreateStackObject(TySize, Align, false, AI); + FrameIndex = MF->getFrameInfo().CreateStackObject(TySize, Alignment, + false, AI); } // Scalable vectors may need a special StackID to distinguish // them from other (fixed size) stack objects. - if (Ty->isVectorTy() && Ty->getVectorIsScalable()) + if (isa<ScalableVectorType>(Ty)) MF->getFrameInfo().setStackID(FrameIndex, TFI->getStackIDForScalableVectors()); @@ -176,21 +188,20 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, // FIXME: Overaligned static allocas should be grouped into // a single dynamic allocation instead of using a separate // stack allocation for each one. - if (Align <= StackAlign) - Align = 0; // Inform the Frame Information that we have variable-sized objects. - MF->getFrameInfo().CreateVariableSizedObject(Align ? Align : 1, AI); + MF->getFrameInfo().CreateVariableSizedObject( + Alignment <= StackAlign ? Align(1) : Alignment, AI); } } // Look for inline asm that clobbers the SP register. - if (isa<CallInst>(I) || isa<InvokeInst>(I)) { - ImmutableCallSite CS(&I); - if (isa<InlineAsm>(CS.getCalledValue())) { + if (auto *Call = dyn_cast<CallBase>(&I)) { + if (Call->isInlineAsm()) { unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); std::vector<TargetLowering::AsmOperandInfo> Ops = - TLI->ParseConstraints(Fn->getParent()->getDataLayout(), TRI, CS); + TLI->ParseConstraints(Fn->getParent()->getDataLayout(), TRI, + *Call); for (TargetLowering::AsmOperandInfo &Op : Ops) { if (Op.Type == InlineAsm::isClobber) { // Clobbers don't have SDValue operands, hence SDValue(). @@ -354,7 +365,7 @@ void FunctionLoweringInfo::clear() { } /// CreateReg - Allocate a single virtual register for the given type. -unsigned FunctionLoweringInfo::CreateReg(MVT VT, bool isDivergent) { +Register FunctionLoweringInfo::CreateReg(MVT VT, bool isDivergent) { return RegInfo->createVirtualRegister( MF->getSubtarget().getTargetLowering()->getRegClassFor(VT, isDivergent)); } @@ -366,29 +377,29 @@ unsigned FunctionLoweringInfo::CreateReg(MVT VT, bool isDivergent) { /// In the case that the given value has struct or array type, this function /// will assign registers for each member or element. /// -unsigned FunctionLoweringInfo::CreateRegs(Type *Ty, bool isDivergent) { +Register FunctionLoweringInfo::CreateRegs(Type *Ty, bool isDivergent) { const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(*TLI, MF->getDataLayout(), Ty, ValueVTs); - unsigned FirstReg = 0; + Register FirstReg; for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { EVT ValueVT = ValueVTs[Value]; MVT RegisterVT = TLI->getRegisterType(Ty->getContext(), ValueVT); unsigned NumRegs = TLI->getNumRegisters(Ty->getContext(), ValueVT); for (unsigned i = 0; i != NumRegs; ++i) { - unsigned R = CreateReg(RegisterVT, isDivergent); + Register R = CreateReg(RegisterVT, isDivergent); if (!FirstReg) FirstReg = R; } } return FirstReg; } -unsigned FunctionLoweringInfo::CreateRegs(const Value *V) { - return CreateRegs(V->getType(), DA && !TLI->requiresUniformRegister(*MF, V) && - DA->isDivergent(V)); +Register FunctionLoweringInfo::CreateRegs(const Value *V) { + return CreateRegs(V->getType(), DA && DA->isDivergent(V) && + !TLI->requiresUniformRegister(*MF, V)); } /// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the @@ -397,7 +408,7 @@ unsigned FunctionLoweringInfo::CreateRegs(const Value *V) { /// the larger bit width by zero extension. The bit width must be no smaller /// than the LiveOutInfo's existing bit width. const FunctionLoweringInfo::LiveOutInfo * -FunctionLoweringInfo::GetLiveOutRegInfo(unsigned Reg, unsigned BitWidth) { +FunctionLoweringInfo::GetLiveOutRegInfo(Register Reg, unsigned BitWidth) { if (!LiveOutRegInfo.inBounds(Reg)) return nullptr; @@ -407,7 +418,7 @@ FunctionLoweringInfo::GetLiveOutRegInfo(unsigned Reg, unsigned BitWidth) { if (BitWidth > LOI->Known.getBitWidth()) { LOI->NumSignBits = 1; - LOI->Known = LOI->Known.zext(BitWidth, false /* => any extend */); + LOI->Known = LOI->Known.anyext(BitWidth); } return LOI; @@ -431,7 +442,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { IntVT = TLI->getTypeToTransformTo(PN->getContext(), IntVT); unsigned BitWidth = IntVT.getSizeInBits(); - unsigned DestReg = ValueMap[PN]; + Register DestReg = ValueMap[PN]; if (!Register::isVirtualRegister(DestReg)) return; LiveOutRegInfo.grow(DestReg); @@ -452,7 +463,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { } else { assert(ValueMap.count(V) && "V should have been placed in ValueMap when its" "CopyToReg node was created."); - unsigned SrcReg = ValueMap[V]; + Register SrcReg = ValueMap[V]; if (!Register::isVirtualRegister(SrcReg)) { DestLOI.IsValid = false; return; @@ -487,8 +498,8 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { assert(ValueMap.count(V) && "V should have been placed in ValueMap when " "its CopyToReg node was created."); - unsigned SrcReg = ValueMap[V]; - if (!Register::isVirtualRegister(SrcReg)) { + Register SrcReg = ValueMap[V]; + if (!SrcReg.isVirtual()) { DestLOI.IsValid = false; return; } @@ -522,11 +533,11 @@ int FunctionLoweringInfo::getArgumentFrameIndex(const Argument *A) { return INT_MAX; } -unsigned FunctionLoweringInfo::getCatchPadExceptionPointerVReg( +Register FunctionLoweringInfo::getCatchPadExceptionPointerVReg( const Value *CPI, const TargetRegisterClass *RC) { MachineRegisterInfo &MRI = MF->getRegInfo(); auto I = CatchPadExceptionPointers.insert({CPI, 0}); - unsigned &VReg = I.first->second; + Register &VReg = I.first->second; if (I.second) VReg = MRI.createVirtualRegister(RC); assert(VReg && "null vreg in exception pointer table!"); @@ -534,7 +545,7 @@ unsigned FunctionLoweringInfo::getCatchPadExceptionPointerVReg( } const Value * -FunctionLoweringInfo::getValueFromVirtualReg(unsigned Vreg) { +FunctionLoweringInfo::getValueFromVirtualReg(Register Vreg) { if (VirtReg2Value.empty()) { SmallVector<EVT, 4> ValueVTs; for (auto &P : ValueMap) { diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index c613c25406285..0e4e99214aa24 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/StackMaps.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" @@ -28,6 +29,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; #define DEBUG_TYPE "instr-emitter" @@ -84,9 +86,9 @@ static unsigned countOperands(SDNode *Node, unsigned NumExpUses, /// implicit physical register output. void InstrEmitter:: EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, - unsigned SrcReg, DenseMap<SDValue, unsigned> &VRBaseMap) { - unsigned VRBase = 0; - if (Register::isVirtualRegister(SrcReg)) { + Register SrcReg, DenseMap<SDValue, Register> &VRBaseMap) { + Register VRBase; + if (SrcReg.isVirtual()) { // Just use the input register directly! SDValue Op(Node, ResNo); if (IsClone) @@ -113,8 +115,8 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, if (User->getOpcode() == ISD::CopyToReg && User->getOperand(2).getNode() == Node && User->getOperand(2).getResNo() == ResNo) { - unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); - if (Register::isVirtualRegister(DestReg)) { + Register DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); + if (DestReg.isVirtual()) { VRBase = DestReg; Match = false; } else if (DestReg != SrcReg) @@ -190,16 +192,19 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstrBuilder &MIB, const MCInstrDesc &II, bool IsClone, bool IsCloned, - DenseMap<SDValue, unsigned> &VRBaseMap) { + DenseMap<SDValue, Register> &VRBaseMap) { assert(Node->getMachineOpcode() != TargetOpcode::IMPLICIT_DEF && "IMPLICIT_DEF should have been handled as a special case elsewhere!"); unsigned NumResults = CountResults(Node); - for (unsigned i = 0; i < II.getNumDefs(); ++i) { + bool HasVRegVariadicDefs = !MF->getTarget().usesPhysRegsForValues() && + II.isVariadic() && II.variadicOpsAreDefs(); + unsigned NumVRegs = HasVRegVariadicDefs ? NumResults : II.getNumDefs(); + for (unsigned i = 0; i < NumVRegs; ++i) { // If the specific node value is only used by a CopyToReg and the dest reg // is a vreg in the same register class, use the CopyToReg'd destination // register instead of creating a new vreg. - unsigned VRBase = 0; + Register VRBase; const TargetRegisterClass *RC = TRI->getAllocatableClass(TII->getRegClass(II, i, TRI, *MF)); // Always let the value type influence the used register class. The @@ -216,10 +221,10 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, RC = VTRC; } - if (II.OpInfo[i].isOptionalDef()) { + if (II.OpInfo != nullptr && II.OpInfo[i].isOptionalDef()) { // Optional def must be a physical register. VRBase = cast<RegisterSDNode>(Node->getOperand(i-NumResults))->getReg(); - assert(Register::isPhysicalRegister(VRBase)); + assert(VRBase.isPhysical()); MIB.addReg(VRBase, RegState::Define); } @@ -263,8 +268,8 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, /// getVR - Return the virtual register corresponding to the specified result /// of the specified node. -unsigned InstrEmitter::getVR(SDValue Op, - DenseMap<SDValue, unsigned> &VRBaseMap) { +Register InstrEmitter::getVR(SDValue Op, + DenseMap<SDValue, Register> &VRBaseMap) { if (Op.isMachineOpcode() && Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) { // Add an IMPLICIT_DEF instruction before every use. @@ -278,7 +283,7 @@ unsigned InstrEmitter::getVR(SDValue Op, return VReg; } - DenseMap<SDValue, unsigned>::iterator I = VRBaseMap.find(Op); + DenseMap<SDValue, Register>::iterator I = VRBaseMap.find(Op); assert(I != VRBaseMap.end() && "Node emitted out of order - late"); return I->second; } @@ -292,13 +297,13 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB, SDValue Op, unsigned IIOpNum, const MCInstrDesc *II, - DenseMap<SDValue, unsigned> &VRBaseMap, + DenseMap<SDValue, Register> &VRBaseMap, bool IsDebug, bool IsClone, bool IsCloned) { assert(Op.getValueType() != MVT::Other && Op.getValueType() != MVT::Glue && "Chain and glue operands should occur at end of operand list!"); // Get/emit the operand. - unsigned VReg = getVR(Op, VRBaseMap); + Register VReg = getVR(Op, VRBaseMap); const MCInstrDesc &MCID = MIB->getDesc(); bool isOptDef = IIOpNum < MCID.getNumOperands() && @@ -363,7 +368,7 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, SDValue Op, unsigned IIOpNum, const MCInstrDesc *II, - DenseMap<SDValue, unsigned> &VRBaseMap, + DenseMap<SDValue, Register> &VRBaseMap, bool IsDebug, bool IsClone, bool IsCloned) { if (Op.isMachineOpcode()) { AddRegisterOperand(MIB, Op, IIOpNum, II, VRBaseMap, @@ -373,7 +378,7 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, } else if (ConstantFPSDNode *F = dyn_cast<ConstantFPSDNode>(Op)) { MIB.addFPImm(F->getConstantFPValue()); } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) { - unsigned VReg = R->getReg(); + Register VReg = R->getReg(); MVT OpVT = Op.getSimpleValueType(); const TargetRegisterClass *IIRC = II ? TRI->getAllocatableClass(TII->getRegClass(*II, IIOpNum, TRI, *MF)) @@ -409,23 +414,14 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, MIB.addJumpTableIndex(JT->getIndex(), JT->getTargetFlags()); } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op)) { int Offset = CP->getOffset(); - unsigned Align = CP->getAlignment(); - Type *Type = CP->getType(); - // MachineConstantPool wants an explicit alignment. - if (Align == 0) { - Align = MF->getDataLayout().getPrefTypeAlignment(Type); - if (Align == 0) { - // Alignment of vector types. FIXME! - Align = MF->getDataLayout().getTypeAllocSize(Type); - } - } + Align Alignment = CP->getAlign(); unsigned Idx; MachineConstantPool *MCP = MF->getConstantPool(); if (CP->isMachineConstantPoolEntry()) - Idx = MCP->getConstantPoolIndex(CP->getMachineCPVal(), Align); + Idx = MCP->getConstantPoolIndex(CP->getMachineCPVal(), Alignment); else - Idx = MCP->getConstantPoolIndex(CP->getConstVal(), Align); + Idx = MCP->getConstantPoolIndex(CP->getConstVal(), Alignment); MIB.addConstantPoolIndex(Idx, Offset, CP->getTargetFlags()); } else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) { MIB.addExternalSymbol(ES->getSymbol(), ES->getTargetFlags()); @@ -446,7 +442,7 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, } } -unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx, +Register InstrEmitter::ConstrainForSubReg(Register VReg, unsigned SubIdx, MVT VT, bool isDivergent, const DebugLoc &DL) { const TargetRegisterClass *VRC = MRI->getRegClass(VReg); const TargetRegisterClass *RC = TRI->getSubClassWithSubReg(VRC, SubIdx); @@ -473,9 +469,9 @@ unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx, /// EmitSubregNode - Generate machine code for subreg nodes. /// void InstrEmitter::EmitSubregNode(SDNode *Node, - DenseMap<SDValue, unsigned> &VRBaseMap, + DenseMap<SDValue, Register> &VRBaseMap, bool IsClone, bool IsCloned) { - unsigned VRBase = 0; + Register VRBase; unsigned Opc = Node->getMachineOpcode(); // If the node is only used by a CopyToReg and the dest reg is a vreg, use @@ -483,8 +479,8 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, for (SDNode *User : Node->uses()) { if (User->getOpcode() == ISD::CopyToReg && User->getOperand(2).getNode() == Node) { - unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); - if (Register::isVirtualRegister(DestReg)) { + Register DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); + if (DestReg.isVirtual()) { VRBase = DestReg; break; } @@ -499,7 +495,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, const TargetRegisterClass *TRC = TLI->getRegClassFor(Node->getSimpleValueType(0), Node->isDivergent()); - unsigned Reg; + Register Reg; MachineInstr *DefMI; RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(0)); if (R && Register::isPhysicalRegister(R->getReg())) { @@ -510,7 +506,8 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, DefMI = MRI->getVRegDef(Reg); } - unsigned SrcReg, DstReg, DefSubIdx; + Register SrcReg, DstReg; + unsigned DefSubIdx; if (DefMI && TII->isCoalescableExtInstr(*DefMI, SrcReg, DstReg, DefSubIdx) && SubIdx == DefSubIdx && @@ -528,19 +525,19 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, // Reg may not support a SubIdx sub-register, and we may need to // constrain its register class or issue a COPY to a compatible register // class. - if (Register::isVirtualRegister(Reg)) + if (Reg.isVirtual()) Reg = ConstrainForSubReg(Reg, SubIdx, Node->getOperand(0).getSimpleValueType(), Node->isDivergent(), Node->getDebugLoc()); // Create the destreg if it is missing. - if (VRBase == 0) + if (!VRBase) VRBase = MRI->createVirtualRegister(TRC); // Create the extract_subreg machine instruction. MachineInstrBuilder CopyMI = BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), VRBase); - if (Register::isVirtualRegister(Reg)) + if (Reg.isVirtual()) CopyMI.addReg(Reg, 0, SubIdx); else CopyMI.addReg(TRI->getSubReg(Reg, SubIdx)); @@ -606,7 +603,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, /// void InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, - DenseMap<SDValue, unsigned> &VRBaseMap) { + DenseMap<SDValue, Register> &VRBaseMap) { unsigned VReg = getVR(Node->getOperand(0), VRBaseMap); // Create the new VReg in the destination class and emit a copy. @@ -626,7 +623,7 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, /// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes. /// void InstrEmitter::EmitRegSequence(SDNode *Node, - DenseMap<SDValue, unsigned> &VRBaseMap, + DenseMap<SDValue, Register> &VRBaseMap, bool IsClone, bool IsCloned) { unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); @@ -675,9 +672,9 @@ void InstrEmitter::EmitRegSequence(SDNode *Node, /// MachineInstr * InstrEmitter::EmitDbgValue(SDDbgValue *SD, - DenseMap<SDValue, unsigned> &VRBaseMap) { + DenseMap<SDValue, Register> &VRBaseMap) { MDNode *Var = SD->getVariable(); - const DIExpression *Expr = SD->getExpression(); + MDNode *Expr = SD->getExpression(); DebugLoc DL = SD->getDebugLoc(); assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); @@ -701,11 +698,12 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, // EmitTargetCodeForFrameDebugValue is responsible for allocation. auto FrameMI = BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE)) .addFrameIndex(SD->getFrameIx()); - if (SD->isIndirect()) - Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref}); - - FrameMI.addReg(0); + // Push [fi + 0] onto the DIExpression stack. + FrameMI.addImm(0); + else + // Push fi onto the DIExpression stack. + FrameMI.addReg(0); return FrameMI.addMetadata(Var).addMetadata(Expr); } // Otherwise, we're going to create an instruction here. @@ -719,7 +717,7 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, // they happen and transfer the debug info, but trying to guarantee that // in all cases would be very fragile; this is a safeguard for any // that were missed. - DenseMap<SDValue, unsigned>::iterator I = VRBaseMap.find(Op); + DenseMap<SDValue, Register>::iterator I = VRBaseMap.find(Op); if (I==VRBaseMap.end()) MIB.addReg(0U); // undef else @@ -751,9 +749,9 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, // Indirect addressing is indicated by an Imm as the second parameter. if (SD->isIndirect()) - Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref}); - - MIB.addReg(0U, RegState::Debug); + MIB.addImm(0U); + else + MIB.addReg(0U, RegState::Debug); MIB.addMetadata(Var); MIB.addMetadata(Expr); @@ -780,7 +778,7 @@ InstrEmitter::EmitDbgLabel(SDDbgLabel *SD) { /// void InstrEmitter:: EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, - DenseMap<SDValue, unsigned> &VRBaseMap) { + DenseMap<SDValue, Register> &VRBaseMap) { unsigned Opc = Node->getMachineOpcode(); // Handle subreg insert/extract specially @@ -828,7 +826,10 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, unsigned NumImpUses = 0; unsigned NodeOperands = countOperands(Node, II.getNumOperands() - NumDefs, NumImpUses); - bool HasPhysRegOuts = NumResults > NumDefs && II.getImplicitDefs()!=nullptr; + bool HasVRegVariadicDefs = !MF->getTarget().usesPhysRegsForValues() && + II.isVariadic() && II.variadicOpsAreDefs(); + bool HasPhysRegOuts = NumResults > NumDefs && + II.getImplicitDefs() != nullptr && !HasVRegVariadicDefs; #ifndef NDEBUG unsigned NumMIOperands = NodeOperands + NumResults; if (II.isVariadic()) @@ -978,7 +979,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, /// needed dependencies. void InstrEmitter:: EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, - DenseMap<SDValue, unsigned> &VRBaseMap) { + DenseMap<SDValue, Register> &VRBaseMap) { switch (Node->getOpcode()) { default: #ifndef NDEBUG @@ -991,7 +992,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, case ISD::TokenFactor: // fall thru break; case ISD::CopyToReg: { - unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); + Register DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); SDValue SrcVal = Node->getOperand(2); if (Register::isVirtualRegister(DestReg) && SrcVal.isMachineOpcode() && SrcVal.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) { @@ -1001,7 +1002,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, TII->get(TargetOpcode::IMPLICIT_DEF), DestReg); break; } - unsigned SrcReg; + Register SrcReg; if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(SrcVal)) SrcReg = R->getReg(); else diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h index cfe99dd977b5b..c3567eae91619 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -17,13 +17,15 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" namespace llvm { class MachineInstrBuilder; class MCInstrDesc; +class SDDbgLabel; class SDDbgValue; +class TargetLowering; class LLVM_LIBRARY_VISIBILITY InstrEmitter { MachineFunction *MF; @@ -39,19 +41,19 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { /// implicit physical register output. void EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, - unsigned SrcReg, - DenseMap<SDValue, unsigned> &VRBaseMap); + Register SrcReg, + DenseMap<SDValue, Register> &VRBaseMap); void CreateVirtualRegisters(SDNode *Node, MachineInstrBuilder &MIB, const MCInstrDesc &II, bool IsClone, bool IsCloned, - DenseMap<SDValue, unsigned> &VRBaseMap); + DenseMap<SDValue, Register> &VRBaseMap); /// getVR - Return the virtual register corresponding to the specified result /// of the specified node. - unsigned getVR(SDValue Op, - DenseMap<SDValue, unsigned> &VRBaseMap); + Register getVR(SDValue Op, + DenseMap<SDValue, Register> &VRBaseMap); /// AddRegisterOperand - Add the specified register as an operand to the /// specified machine instr. Insert register copies if the register is @@ -60,7 +62,7 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { SDValue Op, unsigned IIOpNum, const MCInstrDesc *II, - DenseMap<SDValue, unsigned> &VRBaseMap, + DenseMap<SDValue, Register> &VRBaseMap, bool IsDebug, bool IsClone, bool IsCloned); /// AddOperand - Add the specified operand to the specified machine instr. II @@ -71,18 +73,18 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { SDValue Op, unsigned IIOpNum, const MCInstrDesc *II, - DenseMap<SDValue, unsigned> &VRBaseMap, + DenseMap<SDValue, Register> &VRBaseMap, bool IsDebug, bool IsClone, bool IsCloned); /// ConstrainForSubReg - Try to constrain VReg to a register class that /// supports SubIdx sub-registers. Emit a copy if that isn't possible. /// Return the virtual register to use. - unsigned ConstrainForSubReg(unsigned VReg, unsigned SubIdx, MVT VT, + Register ConstrainForSubReg(Register VReg, unsigned SubIdx, MVT VT, bool isDivergent, const DebugLoc &DL); /// EmitSubregNode - Generate machine code for subreg nodes. /// - void EmitSubregNode(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap, + void EmitSubregNode(SDNode *Node, DenseMap<SDValue, Register> &VRBaseMap, bool IsClone, bool IsCloned); /// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes. @@ -90,11 +92,11 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { /// register is constrained to be in a particular register class. /// void EmitCopyToRegClassNode(SDNode *Node, - DenseMap<SDValue, unsigned> &VRBaseMap); + DenseMap<SDValue, Register> &VRBaseMap); /// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes. /// - void EmitRegSequence(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap, + void EmitRegSequence(SDNode *Node, DenseMap<SDValue, Register> &VRBaseMap, bool IsClone, bool IsCloned); public: /// CountResults - The results of target nodes have register or immediate @@ -105,7 +107,7 @@ public: /// EmitDbgValue - Generate machine instruction for a dbg_value node. /// MachineInstr *EmitDbgValue(SDDbgValue *SD, - DenseMap<SDValue, unsigned> &VRBaseMap); + DenseMap<SDValue, Register> &VRBaseMap); /// Generate machine instruction for a dbg_label node. MachineInstr *EmitDbgLabel(SDDbgLabel *SD); @@ -113,7 +115,7 @@ public: /// EmitNode - Generate machine code for a node and needed dependencies. /// void EmitNode(SDNode *Node, bool IsClone, bool IsCloned, - DenseMap<SDValue, unsigned> &VRBaseMap) { + DenseMap<SDValue, Register> &VRBaseMap) { if (Node->isMachineOpcode()) EmitMachineNode(Node, IsClone, IsCloned, VRBaseMap); else @@ -132,9 +134,9 @@ public: private: void EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, - DenseMap<SDValue, unsigned> &VRBaseMap); + DenseMap<SDValue, Register> &VRBaseMap); void EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, - DenseMap<SDValue, unsigned> &VRBaseMap); + DenseMap<SDValue, Register> &VRBaseMap); }; } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 80ac8b95e4ef4..6a6004c158bb8 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -328,7 +328,7 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy(DAG.getDataLayout())); - unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); + Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign(); if (Extend) { SDValue Result = DAG.getExtLoad( ISD::EXTLOAD, dl, OrigVT, DAG.getEntryNode(), CPIdx, @@ -348,7 +348,7 @@ SDValue SelectionDAGLegalize::ExpandConstant(ConstantSDNode *CP) { EVT VT = CP->getValueType(0); SDValue CPIdx = DAG.getConstantPool(CP->getConstantIntValue(), TLI.getPointerTy(DAG.getDataLayout())); - unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); + Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign(); SDValue Result = DAG.getLoad( VT, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Alignment); @@ -387,7 +387,9 @@ SDValue SelectionDAGLegalize::PerformInsertVectorEltInMemory(SDValue Vec, SDValue StackPtr2 = TLI.getVectorElementPointer(DAG, StackPtr, VT, Tmp3); // Store the scalar value. - Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, MachinePointerInfo(), EltVT); + Ch = DAG.getTruncStore( + Ch, dl, Tmp2, StackPtr2, + MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), EltVT); // Load the updated vector. return DAG.getLoad(VT, dl, Ch, StackPtr, MachinePointerInfo::getFixedStack( DAG.getMachineFunction(), SPFI)); @@ -434,7 +436,6 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { // We generally can't do this one for long doubles. SDValue Chain = ST->getChain(); SDValue Ptr = ST->getBasePtr(); - unsigned Alignment = ST->getAlignment(); MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); AAMDNodes AAInfo = ST->getAAInfo(); SDLoc dl(ST); @@ -444,8 +445,8 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { SDValue Con = DAG.getConstant(CFP->getValueAPF(). bitcastToAPInt().zextOrTrunc(32), SDLoc(CFP), MVT::i32); - return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), Alignment, - MMOFlags, AAInfo); + return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), + ST->getOriginalAlign(), MMOFlags, AAInfo); } if (CFP->getValueType(0) == MVT::f64) { @@ -454,7 +455,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { SDValue Con = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). zextOrTrunc(64), SDLoc(CFP), MVT::i64); return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), - Alignment, MMOFlags, AAInfo); + ST->getOriginalAlign(), MMOFlags, AAInfo); } if (TLI.isTypeLegal(MVT::i32) && !ST->isVolatile()) { @@ -467,12 +468,12 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { if (DAG.getDataLayout().isBigEndian()) std::swap(Lo, Hi); - Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), Alignment, - MMOFlags, AAInfo); + Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), + ST->getOriginalAlign(), MMOFlags, AAInfo); Ptr = DAG.getMemBasePlusOffset(Ptr, 4, dl); Hi = DAG.getStore(Chain, dl, Hi, Ptr, ST->getPointerInfo().getWithOffset(4), - MinAlign(Alignment, 4U), MMOFlags, AAInfo); + ST->getOriginalAlign(), MMOFlags, AAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -487,7 +488,6 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { SDValue Ptr = ST->getBasePtr(); SDLoc dl(Node); - unsigned Alignment = ST->getAlignment(); MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); AAMDNodes AAInfo = ST->getAAInfo(); @@ -528,9 +528,8 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { assert(NVT.getSizeInBits() == VT.getSizeInBits() && "Can only promote stores to same size type"); Value = DAG.getNode(ISD::BITCAST, dl, NVT, Value); - SDValue Result = - DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), - Alignment, MMOFlags, AAInfo); + SDValue Result = DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), + ST->getOriginalAlign(), MMOFlags, AAInfo); ReplaceNode(SDValue(Node, 0), Result); break; } @@ -553,7 +552,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { Value = DAG.getZeroExtendInReg(Value, dl, StVT); SDValue Result = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), NVT, - Alignment, MMOFlags, AAInfo); + ST->getOriginalAlign(), MMOFlags, AAInfo); ReplaceNode(SDValue(Node, 0), Result); } else if (StWidth & (StWidth - 1)) { // If not storing a power-of-2 number of bits, expand as two stores. @@ -575,7 +574,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16) // Store the bottom RoundWidth bits. Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), - RoundVT, Alignment, MMOFlags, AAInfo); + RoundVT, ST->getOriginalAlign(), MMOFlags, AAInfo); // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; @@ -584,10 +583,9 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { ISD::SRL, dl, Value.getValueType(), Value, DAG.getConstant(RoundWidth, dl, TLI.getShiftAmountTy(Value.getValueType(), DL))); - Hi = DAG.getTruncStore( - Chain, dl, Hi, Ptr, - ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, - MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); + Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, + ST->getPointerInfo().getWithOffset(IncrementSize), + ExtraVT, ST->getOriginalAlign(), MMOFlags, AAInfo); } else { // Big endian - avoid unaligned stores. // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X @@ -596,18 +594,17 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { ISD::SRL, dl, Value.getValueType(), Value, DAG.getConstant(ExtraWidth, dl, TLI.getShiftAmountTy(Value.getValueType(), DL))); - Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(), - RoundVT, Alignment, MMOFlags, AAInfo); + Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(), RoundVT, + ST->getOriginalAlign(), MMOFlags, AAInfo); // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); - Lo = DAG.getTruncStore( - Chain, dl, Value, Ptr, - ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, - MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); + Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, + ST->getPointerInfo().getWithOffset(IncrementSize), + ExtraVT, ST->getOriginalAlign(), MMOFlags, AAInfo); } // The order of the stores doesn't matter. @@ -643,15 +640,16 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { if (TLI.isTypeLegal(StVT)) { Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value); Result = DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), - Alignment, MMOFlags, AAInfo); + ST->getOriginalAlign(), MMOFlags, AAInfo); } else { // The in-memory type isn't legal. Truncate to the type it would promote // to, and then do a truncstore. Value = DAG.getNode(ISD::TRUNCATE, dl, TLI.getTypeToTransformTo(*DAG.getContext(), StVT), Value); - Result = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), - StVT, Alignment, MMOFlags, AAInfo); + Result = + DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), StVT, + ST->getOriginalAlign(), MMOFlags, AAInfo); } ReplaceNode(SDValue(Node, 0), Result); @@ -721,7 +719,6 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { LLVM_DEBUG(dbgs() << "Legalizing extending load operation\n"); EVT SrcVT = LD->getMemoryVT(); unsigned SrcWidth = SrcVT.getSizeInBits(); - unsigned Alignment = LD->getAlignment(); MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); AAMDNodes AAInfo = LD->getAAInfo(); @@ -748,9 +745,9 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { ISD::LoadExtType NewExtType = ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD; - SDValue Result = - DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), Chain, Ptr, - LD->getPointerInfo(), NVT, Alignment, MMOFlags, AAInfo); + SDValue Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), + Chain, Ptr, LD->getPointerInfo(), NVT, + LD->getOriginalAlign(), MMOFlags, AAInfo); Ch = Result.getValue(1); // The chain. @@ -788,16 +785,15 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16) // Load the bottom RoundWidth bits. Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr, - LD->getPointerInfo(), RoundVT, Alignment, MMOFlags, - AAInfo); + LD->getPointerInfo(), RoundVT, LD->getOriginalAlign(), + MMOFlags, AAInfo); // Load the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl); Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), - ExtraVT, MinAlign(Alignment, IncrementSize), MMOFlags, - AAInfo); + ExtraVT, LD->getOriginalAlign(), MMOFlags, AAInfo); // Build a factor node to remember that this load is independent of // the other one. @@ -817,16 +813,15 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8 // Load the top RoundWidth bits. Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, - LD->getPointerInfo(), RoundVT, Alignment, MMOFlags, - AAInfo); + LD->getPointerInfo(), RoundVT, LD->getOriginalAlign(), + MMOFlags, AAInfo); // Load the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl); Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), - ExtraVT, MinAlign(Alignment, IncrementSize), MMOFlags, - AAInfo); + ExtraVT, LD->getOriginalAlign(), MMOFlags, AAInfo); // Build a factor node to remember that this load is independent of // the other one. @@ -933,7 +928,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { Result.getValueType(), Result, DAG.getValueType(SrcVT)); else - ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType()); + ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT); Value = ValRes; Chain = Result.getValue(1); break; @@ -1009,6 +1004,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Action = TLI.getOperationAction(Node->getOpcode(), Node->getOperand(0).getValueType()); break; + case ISD::STRICT_FP_TO_FP16: case ISD::STRICT_SINT_TO_FP: case ISD::STRICT_UINT_TO_FP: case ISD::STRICT_LRINT: @@ -1131,7 +1127,9 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::UMULFIX: case ISD::UMULFIXSAT: case ISD::SDIVFIX: - case ISD::UDIVFIX: { + case ISD::SDIVFIXSAT: + case ISD::UDIVFIX: + case ISD::UDIVFIXSAT: { unsigned Scale = Node->getConstantOperandVal(2); Action = TLI.getFixedPointOperationAction(Node->getOpcode(), Node->getValueType(0), Scale); @@ -1383,19 +1381,26 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { SDValue SubStackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); // Store the subvector. - Ch = DAG.getStore(Ch, dl, Part, SubStackPtr, MachinePointerInfo()); + Ch = DAG.getStore( + Ch, dl, Part, SubStackPtr, + MachinePointerInfo::getUnknownStack(DAG.getMachineFunction())); // Finally, load the updated vector. return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo); } SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { + assert((Node->getOpcode() == ISD::BUILD_VECTOR || + Node->getOpcode() == ISD::CONCAT_VECTORS) && + "Unexpected opcode!"); + // We can't handle this case efficiently. Allocate a sufficiently - // aligned object on the stack, store each element into it, then load + // aligned object on the stack, store each operand into it, then load // the result as a vector. // Create the stack frame object. EVT VT = Node->getValueType(0); - EVT EltVT = VT.getVectorElementType(); + EVT MemVT = isa<BuildVectorSDNode>(Node) ? VT.getVectorElementType() + : Node->getOperand(0).getValueType(); SDLoc dl(Node); SDValue FIPtr = DAG.CreateStackTemporary(VT); int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex(); @@ -1404,7 +1409,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { // Emit a store of each element to the stack slot. SmallVector<SDValue, 8> Stores; - unsigned TypeByteSize = EltVT.getSizeInBits() / 8; + unsigned TypeByteSize = MemVT.getSizeInBits() / 8; assert(TypeByteSize > 0 && "Vector element type too small for stack store!"); // Store (in the right endianness) the elements to memory. for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) { @@ -1413,16 +1418,15 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { unsigned Offset = TypeByteSize*i; - SDValue Idx = DAG.getConstant(Offset, dl, FIPtr.getValueType()); - Idx = DAG.getMemBasePlusOffset(FIPtr, Idx, dl); + SDValue Idx = DAG.getMemBasePlusOffset(FIPtr, Offset, dl); // If the destination vector element type is narrower than the source // element type, only store the bits necessary. - if (EltVT.bitsLT(Node->getOperand(i).getValueType().getScalarType())) { + if (MemVT.bitsLT(Node->getOperand(i).getValueType())) Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl, Node->getOperand(i), Idx, - PtrInfo.getWithOffset(Offset), EltVT)); - } else + PtrInfo.getWithOffset(Offset), MemVT)); + else Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, Node->getOperand(i), Idx, PtrInfo.getWithOffset(Offset))); } @@ -1600,13 +1604,17 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, SDValue Size = Tmp2.getOperand(1); SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT); Chain = SP.getValue(1); - unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue(); - unsigned StackAlign = - DAG.getSubtarget().getFrameLowering()->getStackAlignment(); - Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value - if (Align > StackAlign) + Align Alignment = cast<ConstantSDNode>(Tmp3)->getAlignValue(); + const TargetFrameLowering *TFL = DAG.getSubtarget().getFrameLowering(); + unsigned Opc = + TFL->getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp ? + ISD::ADD : ISD::SUB; + + Align StackAlign = TFL->getStackAlign(); + Tmp1 = DAG.getNode(Opc, dl, VT, SP, Size); // Value + if (Alignment > StackAlign) Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1, - DAG.getConstant(-(uint64_t)Align, dl, VT)); + DAG.getConstant(-Alignment.value(), dl, VT)); Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true), @@ -1968,7 +1976,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { Constant *CP = ConstantVector::get(CV); SDValue CPIdx = DAG.getConstantPool(CP, TLI.getPointerTy(DAG.getDataLayout())); - unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); + Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign(); return DAG.getLoad( VT, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), @@ -2360,36 +2368,34 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode *Node, // Get the stack frame index of a 8 byte buffer. SDValue StackSlot = DAG.CreateStackTemporary(MVT::f64); - // word offset constant for Hi/Lo address computation - SDValue WordOff = DAG.getConstant(sizeof(int), dl, - StackSlot.getValueType()); - // set up Hi and Lo (into buffer) address based on endian - SDValue Hi = StackSlot; - SDValue Lo = DAG.getNode(ISD::ADD, dl, StackSlot.getValueType(), - StackSlot, WordOff); - if (DAG.getDataLayout().isLittleEndian()) - std::swap(Hi, Lo); - + SDValue Lo = Op0; // if signed map to unsigned space - SDValue Op0Mapped; if (isSigned) { - // constant used to invert sign bit (signed to unsigned mapping) - SDValue SignBit = DAG.getConstant(0x80000000u, dl, MVT::i32); - Op0Mapped = DAG.getNode(ISD::XOR, dl, MVT::i32, Op0, SignBit); - } else { - Op0Mapped = Op0; + // Invert sign bit (signed to unsigned mapping). + Lo = DAG.getNode(ISD::XOR, dl, MVT::i32, Lo, + DAG.getConstant(0x80000000u, dl, MVT::i32)); } - // store the lo of the constructed double - based on integer input - SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op0Mapped, Lo, + // Initial hi portion of constructed double. + SDValue Hi = DAG.getConstant(0x43300000u, dl, MVT::i32); + + // If this a big endian target, swap the lo and high data. + if (DAG.getDataLayout().isBigEndian()) + std::swap(Lo, Hi); + + SDValue MemChain = DAG.getEntryNode(); + + // Store the lo of the constructed double. + SDValue Store1 = DAG.getStore(MemChain, dl, Lo, StackSlot, MachinePointerInfo()); - // initial hi portion of constructed double - SDValue InitialHi = DAG.getConstant(0x43300000u, dl, MVT::i32); - // store the hi of the constructed double - biased exponent + // Store the hi of the constructed double. + SDValue HiPtr = DAG.getMemBasePlusOffset(StackSlot, 4, dl); SDValue Store2 = - DAG.getStore(Store1, dl, InitialHi, Hi, MachinePointerInfo()); + DAG.getStore(MemChain, dl, Hi, HiPtr, MachinePointerInfo()); + MemChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); + // load the constructed double SDValue Load = - DAG.getLoad(MVT::f64, dl, Store2, StackSlot, MachinePointerInfo()); + DAG.getLoad(MVT::f64, dl, MemChain, StackSlot, MachinePointerInfo()); // FP constant to bias correct the final result SDValue Bias = DAG.getConstantFP(isSigned ? BitsToDouble(0x4330000080000000ULL) : @@ -2417,10 +2423,65 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode *Node, } return Result; } - assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet"); // Code below here assumes !isSigned without checking again. - // FIXME: This can produce slightly incorrect results. See details in - // FIXME: https://reviews.llvm.org/D69275 + assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet"); + + // TODO: Generalize this for use with other types. + if ((SrcVT == MVT::i32 || SrcVT == MVT::i64) && DestVT == MVT::f32) { + LLVM_DEBUG(dbgs() << "Converting unsigned i32/i64 to f32\n"); + // For unsigned conversions, convert them to signed conversions using the + // algorithm from the x86_64 __floatundisf in compiler_rt. That method + // should be valid for i32->f32 as well. + + // TODO: This really should be implemented using a branch rather than a + // select. We happen to get lucky and machinesink does the right + // thing most of the time. This would be a good candidate for a + // pseudo-op, or, even better, for whole-function isel. + EVT SetCCVT = getSetCCResultType(SrcVT); + + SDValue SignBitTest = DAG.getSetCC( + dl, SetCCVT, Op0, DAG.getConstant(0, dl, SrcVT), ISD::SETLT); + + EVT ShiftVT = TLI.getShiftAmountTy(SrcVT, DAG.getDataLayout()); + SDValue ShiftConst = DAG.getConstant(1, dl, ShiftVT); + SDValue Shr = DAG.getNode(ISD::SRL, dl, SrcVT, Op0, ShiftConst); + SDValue AndConst = DAG.getConstant(1, dl, SrcVT); + SDValue And = DAG.getNode(ISD::AND, dl, SrcVT, Op0, AndConst); + SDValue Or = DAG.getNode(ISD::OR, dl, SrcVT, And, Shr); + + SDValue Slow, Fast; + if (Node->isStrictFPOpcode()) { + // In strict mode, we must avoid spurious exceptions, and therefore + // must make sure to only emit a single STRICT_SINT_TO_FP. + SDValue InCvt = DAG.getSelect(dl, SrcVT, SignBitTest, Or, Op0); + Fast = DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, { DestVT, MVT::Other }, + { Node->getOperand(0), InCvt }); + Slow = DAG.getNode(ISD::STRICT_FADD, dl, { DestVT, MVT::Other }, + { Fast.getValue(1), Fast, Fast }); + Chain = Slow.getValue(1); + // The STRICT_SINT_TO_FP inherits the exception mode from the + // incoming STRICT_UINT_TO_FP node; the STRICT_FADD node can + // never raise any exception. + SDNodeFlags Flags; + Flags.setNoFPExcept(Node->getFlags().hasNoFPExcept()); + Fast->setFlags(Flags); + Flags.setNoFPExcept(true); + Slow->setFlags(Flags); + } else { + SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Or); + Slow = DAG.getNode(ISD::FADD, dl, DestVT, SignCvt, SignCvt); + Fast = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0); + } + + return DAG.getSelect(dl, DestVT, SignBitTest, Slow, Fast); + } + + // The following optimization is valid only if every value in SrcVT (when + // treated as signed) is representable in DestVT. Check that the mantissa + // size of DestVT is >= than the number of bits in SrcVT -1. + assert(APFloat::semanticsPrecision(DAG.EVTToAPFloatSemantics(DestVT)) >= + SrcVT.getSizeInBits() - 1 && + "Cannot perform lossless SINT_TO_FP!"); SDValue Tmp1; if (Node->isStrictFPOpcode()) { @@ -2454,9 +2515,9 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode *Node, SDValue CPIdx = DAG.getConstantPool(FudgeFactor, TLI.getPointerTy(DAG.getDataLayout())); - unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); + Align Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlign(); CPIdx = DAG.getNode(ISD::ADD, dl, CPIdx.getValueType(), CPIdx, CstOffset); - Alignment = std::min(Alignment, 4u); + Alignment = commonAlignment(Alignment, 4); SDValue FudgeInReg; if (DestVT == MVT::f32) FudgeInReg = DAG.getLoad( @@ -2765,6 +2826,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { } case ISD::FLT_ROUNDS_: Results.push_back(DAG.getConstant(1, dl, Node->getValueType(0))); + Results.push_back(Node->getOperand(0)); break; case ISD::EH_RETURN: case ISD::EH_LABEL: @@ -3090,14 +3152,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { } unsigned Idx = Mask[i]; if (Idx < NumElems) - Ops.push_back(DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0, - DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())))); + Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0, + DAG.getVectorIdxConstant(Idx, dl))); else - Ops.push_back(DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op1, - DAG.getConstant(Idx - NumElems, dl, - TLI.getVectorIdxTy(DAG.getDataLayout())))); + Ops.push_back( + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op1, + DAG.getVectorIdxConstant(Idx - NumElems, dl))); } Tmp1 = DAG.getBuildVector(VT, dl, Ops); @@ -3219,6 +3279,21 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { DAG.getNode(ISD::FP_EXTEND, dl, Node->getValueType(0), Res)); } break; + case ISD::STRICT_FP16_TO_FP: + if (Node->getValueType(0) != MVT::f32) { + // We can extend to types bigger than f32 in two steps without changing + // the result. Since "f16 -> f32" is much more commonly available, give + // CodeGen the option of emitting that before resorting to a libcall. + SDValue Res = + DAG.getNode(ISD::STRICT_FP16_TO_FP, dl, {MVT::f32, MVT::Other}, + {Node->getOperand(0), Node->getOperand(1)}); + Res = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, + {Node->getValueType(0), MVT::Other}, + {Res.getValue(1), Res}); + Results.push_back(Res); + Results.push_back(Res.getValue(1)); + } + break; case ISD::FP_TO_FP16: LLVM_DEBUG(dbgs() << "Legalizing FP_TO_FP16\n"); if (!TLI.useSoftFloat() && TM.Options.UnsafeFPMath) { @@ -3273,26 +3348,10 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { break; } case ISD::UREM: - case ISD::SREM: { - EVT VT = Node->getValueType(0); - bool isSigned = Node->getOpcode() == ISD::SREM; - unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV; - unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; - Tmp2 = Node->getOperand(0); - Tmp3 = Node->getOperand(1); - if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) { - SDVTList VTs = DAG.getVTList(VT, VT); - Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1); - Results.push_back(Tmp1); - } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) { - // X % Y -> X-X/Y*Y - Tmp1 = DAG.getNode(DivOpc, dl, VT, Tmp2, Tmp3); - Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3); - Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1); + case ISD::SREM: + if (TLI.expandREM(Node, Tmp1, DAG)) Results.push_back(Tmp1); - } break; - } case ISD::UDIV: case ISD::SDIV: { bool isSigned = Node->getOpcode() == ISD::SDIV; @@ -3420,7 +3479,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(TLI.expandFixedPointMul(Node, DAG)); break; case ISD::SDIVFIX: + case ISD::SDIVFIXSAT: case ISD::UDIVFIX: + case ISD::UDIVFIXSAT: if (SDValue V = TLI.expandFixedPointDiv(Node->getOpcode(), SDLoc(Node), Node->getOperand(0), Node->getOperand(1), @@ -3457,8 +3518,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { SDValue Overflow = DAG.getSetCC(dl, SetCCType, Sum, LHS, CC); // Add of the sum and the carry. + SDValue One = DAG.getConstant(1, dl, VT); SDValue CarryExt = - DAG.getZeroExtendInReg(DAG.getZExtOrTrunc(Carry, dl, VT), dl, MVT::i1); + DAG.getNode(ISD::AND, dl, VT, DAG.getZExtOrTrunc(Carry, dl, VT), One); SDValue Sum2 = DAG.getNode(Op, dl, VT, Sum, CarryExt); // Second check for overflow. If we are adding, we can only overflow if the @@ -3780,12 +3842,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { SmallVector<SDValue, 8> Scalars; for (unsigned Idx = 0; Idx < NumElem; Idx++) { - SDValue Ex = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, VT.getScalarType(), Node->getOperand(0), - DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - SDValue Sh = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, VT.getScalarType(), Node->getOperand(1), - DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue Ex = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getScalarType(), + Node->getOperand(0), DAG.getVectorIdxConstant(Idx, dl)); + SDValue Sh = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getScalarType(), + Node->getOperand(1), DAG.getVectorIdxConstant(Idx, dl)); Scalars.push_back(DAG.getNode(Node->getOpcode(), dl, VT.getScalarType(), Ex, Sh)); } @@ -3867,7 +3929,6 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { SmallVector<SDValue, 8> Results; SDLoc dl(Node); // FIXME: Check flags on the node to see if we can use a finite call. - bool CanUseFiniteLibCall = TM.Options.NoInfsFPMath && TM.Options.NoNaNsFPMath; unsigned Opc = Node->getOpcode(); switch (Opc) { case ISD::ATOMIC_FENCE: { @@ -3976,68 +4037,28 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { break; case ISD::FLOG: case ISD::STRICT_FLOG: - if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log_finite)) - ExpandFPLibCall(Node, RTLIB::LOG_FINITE_F32, - RTLIB::LOG_FINITE_F64, - RTLIB::LOG_FINITE_F80, - RTLIB::LOG_FINITE_F128, - RTLIB::LOG_FINITE_PPCF128, Results); - else - ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64, - RTLIB::LOG_F80, RTLIB::LOG_F128, - RTLIB::LOG_PPCF128, Results); + ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64, RTLIB::LOG_F80, + RTLIB::LOG_F128, RTLIB::LOG_PPCF128, Results); break; case ISD::FLOG2: case ISD::STRICT_FLOG2: - if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log2_finite)) - ExpandFPLibCall(Node, RTLIB::LOG2_FINITE_F32, - RTLIB::LOG2_FINITE_F64, - RTLIB::LOG2_FINITE_F80, - RTLIB::LOG2_FINITE_F128, - RTLIB::LOG2_FINITE_PPCF128, Results); - else - ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64, - RTLIB::LOG2_F80, RTLIB::LOG2_F128, - RTLIB::LOG2_PPCF128, Results); + ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64, RTLIB::LOG2_F80, + RTLIB::LOG2_F128, RTLIB::LOG2_PPCF128, Results); break; case ISD::FLOG10: case ISD::STRICT_FLOG10: - if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_log10_finite)) - ExpandFPLibCall(Node, RTLIB::LOG10_FINITE_F32, - RTLIB::LOG10_FINITE_F64, - RTLIB::LOG10_FINITE_F80, - RTLIB::LOG10_FINITE_F128, - RTLIB::LOG10_FINITE_PPCF128, Results); - else - ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64, - RTLIB::LOG10_F80, RTLIB::LOG10_F128, - RTLIB::LOG10_PPCF128, Results); + ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64, RTLIB::LOG10_F80, + RTLIB::LOG10_F128, RTLIB::LOG10_PPCF128, Results); break; case ISD::FEXP: case ISD::STRICT_FEXP: - if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_exp_finite)) - ExpandFPLibCall(Node, RTLIB::EXP_FINITE_F32, - RTLIB::EXP_FINITE_F64, - RTLIB::EXP_FINITE_F80, - RTLIB::EXP_FINITE_F128, - RTLIB::EXP_FINITE_PPCF128, Results); - else - ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64, - RTLIB::EXP_F80, RTLIB::EXP_F128, - RTLIB::EXP_PPCF128, Results); + ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64, RTLIB::EXP_F80, + RTLIB::EXP_F128, RTLIB::EXP_PPCF128, Results); break; case ISD::FEXP2: case ISD::STRICT_FEXP2: - if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_exp2_finite)) - ExpandFPLibCall(Node, RTLIB::EXP2_FINITE_F32, - RTLIB::EXP2_FINITE_F64, - RTLIB::EXP2_FINITE_F80, - RTLIB::EXP2_FINITE_F128, - RTLIB::EXP2_FINITE_PPCF128, Results); - else - ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64, - RTLIB::EXP2_F80, RTLIB::EXP2_F128, - RTLIB::EXP2_PPCF128, Results); + ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64, RTLIB::EXP2_F80, + RTLIB::EXP2_F128, RTLIB::EXP2_PPCF128, Results); break; case ISD::FTRUNC: case ISD::STRICT_FTRUNC: @@ -4079,6 +4100,14 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::ROUND_F128, RTLIB::ROUND_PPCF128, Results); break; + case ISD::FROUNDEVEN: + case ISD::STRICT_FROUNDEVEN: + ExpandFPLibCall(Node, RTLIB::ROUNDEVEN_F32, + RTLIB::ROUNDEVEN_F64, + RTLIB::ROUNDEVEN_F80, + RTLIB::ROUNDEVEN_F128, + RTLIB::ROUNDEVEN_PPCF128, Results); + break; case ISD::FPOWI: case ISD::STRICT_FPOWI: { RTLIB::Libcall LC; @@ -4107,16 +4136,8 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { } case ISD::FPOW: case ISD::STRICT_FPOW: - if (CanUseFiniteLibCall && DAG.getLibInfo().has(LibFunc_pow_finite)) - ExpandFPLibCall(Node, RTLIB::POW_FINITE_F32, - RTLIB::POW_FINITE_F64, - RTLIB::POW_FINITE_F80, - RTLIB::POW_FINITE_F128, - RTLIB::POW_FINITE_PPCF128, Results); - else - ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64, - RTLIB::POW_F80, RTLIB::POW_F128, - RTLIB::POW_PPCF128, Results); + ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64, RTLIB::POW_F80, + RTLIB::POW_F128, RTLIB::POW_PPCF128, Results); break; case ISD::LROUND: case ISD::STRICT_LROUND: @@ -4181,6 +4202,17 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false)); } break; + case ISD::STRICT_FP16_TO_FP: { + if (Node->getValueType(0) == MVT::f32) { + TargetLowering::MakeLibCallOptions CallOptions; + std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall( + DAG, RTLIB::FPEXT_F16_F32, MVT::f32, Node->getOperand(1), CallOptions, + SDLoc(Node), Node->getOperand(0)); + Results.push_back(Tmp.first); + Results.push_back(Tmp.second); + } + break; + } case ISD::FP_TO_FP16: { RTLIB::Libcall LC = RTLIB::getFPROUND(Node->getOperand(0).getValueType(), MVT::f16); @@ -4188,6 +4220,19 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { Results.push_back(ExpandLibCall(LC, Node, false)); break; } + case ISD::STRICT_FP_TO_FP16: { + RTLIB::Libcall LC = + RTLIB::getFPROUND(Node->getOperand(1).getValueType(), MVT::f16); + assert(LC != RTLIB::UNKNOWN_LIBCALL && + "Unable to expand strict_fp_to_fp16"); + TargetLowering::MakeLibCallOptions CallOptions; + std::pair<SDValue, SDValue> Tmp = + TLI.makeLibCall(DAG, LC, Node->getValueType(0), Node->getOperand(1), + CallOptions, SDLoc(Node), Node->getOperand(0)); + Results.push_back(Tmp.first); + Results.push_back(Tmp.second); + break; + } case ISD::FSUB: case ISD::STRICT_FSUB: ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64, @@ -4289,8 +4334,13 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: case ISD::CTPOP: - // Zero extend the argument. - Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0)); + // Zero extend the argument unless its cttz, then use any_extend. + if (Node->getOpcode() == ISD::CTTZ || + Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF) + Tmp1 = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Node->getOperand(0)); + else + Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0)); + if (Node->getOpcode() == ISD::CTTZ) { // The count is the same in the promoted type except if the original // value was zero. This can be handled by setting the bit just off @@ -4552,6 +4602,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { case ISD::FRINT: case ISD::FNEARBYINT: case ISD::FROUND: + case ISD::FROUNDEVEN: case ISD::FTRUNC: case ISD::FNEG: case ISD::FSQRT: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index f191160dee4fc..7e8ad28f9b143 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -113,6 +113,8 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FRINT: R = SoftenFloatRes_FRINT(N); break; case ISD::STRICT_FROUND: case ISD::FROUND: R = SoftenFloatRes_FROUND(N); break; + case ISD::STRICT_FROUNDEVEN: + case ISD::FROUNDEVEN: R = SoftenFloatRes_FROUNDEVEN(N); break; case ISD::STRICT_FSIN: case ISD::FSIN: R = SoftenFloatRes_FSIN(N); break; case ISD::STRICT_FSQRT: @@ -125,6 +127,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break; case ISD::SELECT: R = SoftenFloatRes_SELECT(N); break; case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N); break; + case ISD::FREEZE: R = SoftenFloatRes_FREEZE(N); break; case ISD::STRICT_SINT_TO_FP: case ISD::STRICT_UINT_TO_FP: case ISD::SINT_TO_FP: @@ -184,6 +187,12 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) { return BitConvertToInteger(N->getOperand(0)); } +SDValue DAGTypeLegalizer::SoftenFloatRes_FREEZE(SDNode *N) { + EVT Ty = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + return DAG.getNode(ISD::FREEZE, SDLoc(N), Ty, + GetSoftenedFloat(N->getOperand(0))); +} + SDValue DAGTypeLegalizer::SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo) { SDValue Op = DisintegrateMERGE_VALUES(N, ResNo); @@ -609,6 +618,15 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) { RTLIB::ROUND_PPCF128)); } +SDValue DAGTypeLegalizer::SoftenFloatRes_FROUNDEVEN(SDNode *N) { + return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::ROUNDEVEN_F32, + RTLIB::ROUNDEVEN_F64, + RTLIB::ROUNDEVEN_F80, + RTLIB::ROUNDEVEN_F128, + RTLIB::ROUNDEVEN_PPCF128)); +} + SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) { return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), RTLIB::SIN_F32, @@ -658,8 +676,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { if (L->getExtensionType() == ISD::NON_EXTLOAD) { NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(), - L->getPointerInfo(), NVT, L->getAlignment(), MMOFlags, - L->getAAInfo()); + L->getPointerInfo(), NVT, L->getOriginalAlign(), + MMOFlags, L->getAAInfo()); // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); @@ -669,8 +687,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { // Do a non-extending load followed by FP_EXTEND. NewL = DAG.getLoad(L->getAddressingMode(), ISD::NON_EXTLOAD, L->getMemoryVT(), dl, L->getChain(), L->getBasePtr(), L->getOffset(), - L->getPointerInfo(), L->getMemoryVT(), L->getAlignment(), - MMOFlags, L->getAAInfo()); + L->getPointerInfo(), L->getMemoryVT(), + L->getOriginalAlign(), MMOFlags, L->getAAInfo()); // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); @@ -1166,10 +1184,13 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { case ISD::FPOW: ExpandFloatRes_FPOW(N, Lo, Hi); break; case ISD::STRICT_FPOWI: case ISD::FPOWI: ExpandFloatRes_FPOWI(N, Lo, Hi); break; + case ISD::FREEZE: ExpandFloatRes_FREEZE(N, Lo, Hi); break; case ISD::STRICT_FRINT: case ISD::FRINT: ExpandFloatRes_FRINT(N, Lo, Hi); break; case ISD::STRICT_FROUND: case ISD::FROUND: ExpandFloatRes_FROUND(N, Lo, Hi); break; + case ISD::STRICT_FROUNDEVEN: + case ISD::FROUNDEVEN: ExpandFloatRes_FROUNDEVEN(N, Lo, Hi); break; case ISD::STRICT_FSIN: case ISD::FSIN: ExpandFloatRes_FSIN(N, Lo, Hi); break; case ISD::STRICT_FSQRT: @@ -1459,6 +1480,17 @@ void DAGTypeLegalizer::ExpandFloatRes_FPOWI(SDNode *N, RTLIB::POWI_PPCF128), Lo, Hi); } +void DAGTypeLegalizer::ExpandFloatRes_FREEZE(SDNode *N, + SDValue &Lo, SDValue &Hi) { + assert(N->getValueType(0) == MVT::ppcf128 && + "Logic only correct for ppcf128!"); + + SDLoc dl(N); + GetExpandedFloat(N->getOperand(0), Lo, Hi); + Lo = DAG.getNode(ISD::FREEZE, dl, Lo.getValueType(), Lo); + Hi = DAG.getNode(ISD::FREEZE, dl, Hi.getValueType(), Hi); +} + void DAGTypeLegalizer::ExpandFloatRes_FREM(SDNode *N, SDValue &Lo, SDValue &Hi) { ExpandFloatRes_Binary(N, GetFPLibCall(N->getValueType(0), @@ -1485,6 +1517,16 @@ void DAGTypeLegalizer::ExpandFloatRes_FROUND(SDNode *N, RTLIB::ROUND_PPCF128), Lo, Hi); } +void DAGTypeLegalizer::ExpandFloatRes_FROUNDEVEN(SDNode *N, + SDValue &Lo, SDValue &Hi) { + ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), + RTLIB::ROUNDEVEN_F32, + RTLIB::ROUNDEVEN_F64, + RTLIB::ROUNDEVEN_F80, + RTLIB::ROUNDEVEN_F128, + RTLIB::ROUNDEVEN_PPCF128), Lo, Hi); +} + void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N, SDValue &Lo, SDValue &Hi) { ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), @@ -2117,6 +2159,7 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { case ISD::FNEG: case ISD::FRINT: case ISD::FROUND: + case ISD::FROUNDEVEN: case ISD::FSIN: case ISD::FSQRT: case ISD::FTRUNC: @@ -2328,12 +2371,10 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_LOAD(SDNode *N) { // Load the value as an integer value with the same number of bits. EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); - SDValue newL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), IVT, - SDLoc(N), L->getChain(), L->getBasePtr(), - L->getOffset(), L->getPointerInfo(), IVT, - L->getAlignment(), - L->getMemOperand()->getFlags(), - L->getAAInfo()); + SDValue newL = DAG.getLoad( + L->getAddressingMode(), L->getExtensionType(), IVT, SDLoc(N), + L->getChain(), L->getBasePtr(), L->getOffset(), L->getPointerInfo(), IVT, + L->getOriginalAlign(), L->getMemOperand()->getFlags(), L->getAAInfo()); // Legalize the chain result by replacing uses of the old value chain with the // new one ReplaceValueWith(SDValue(N, 1), newL.getValue(1)); @@ -2412,3 +2453,421 @@ SDValue DAGTypeLegalizer::BitcastToInt_ATOMIC_SWAP(SDNode *N) { } +//===----------------------------------------------------------------------===// +// Half Result Soft Promotion +//===----------------------------------------------------------------------===// + +void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) { + LLVM_DEBUG(dbgs() << "Soft promote half result " << ResNo << ": "; + N->dump(&DAG); dbgs() << "\n"); + SDValue R = SDValue(); + + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(ResNo), true)) { + LLVM_DEBUG(dbgs() << "Node has been custom expanded, done\n"); + return; + } + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "SoftPromoteHalfResult #" << ResNo << ": "; + N->dump(&DAG); dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to soft promote this operator's result!"); + + case ISD::BITCAST: R = SoftPromoteHalfRes_BITCAST(N); break; + case ISD::ConstantFP: R = SoftPromoteHalfRes_ConstantFP(N); break; + case ISD::EXTRACT_VECTOR_ELT: + R = SoftPromoteHalfRes_EXTRACT_VECTOR_ELT(N); break; + case ISD::FCOPYSIGN: R = SoftPromoteHalfRes_FCOPYSIGN(N); break; + case ISD::STRICT_FP_ROUND: + case ISD::FP_ROUND: R = SoftPromoteHalfRes_FP_ROUND(N); break; + + // Unary FP Operations + case ISD::FABS: + case ISD::FCBRT: + case ISD::FCEIL: + case ISD::FCOS: + case ISD::FEXP: + case ISD::FEXP2: + case ISD::FFLOOR: + case ISD::FLOG: + case ISD::FLOG2: + case ISD::FLOG10: + case ISD::FNEARBYINT: + case ISD::FNEG: + case ISD::FREEZE: + case ISD::FRINT: + case ISD::FROUND: + case ISD::FROUNDEVEN: + case ISD::FSIN: + case ISD::FSQRT: + case ISD::FTRUNC: + case ISD::FCANONICALIZE: R = SoftPromoteHalfRes_UnaryOp(N); break; + + // Binary FP Operations + case ISD::FADD: + case ISD::FDIV: + case ISD::FMAXIMUM: + case ISD::FMINIMUM: + case ISD::FMAXNUM: + case ISD::FMINNUM: + case ISD::FMUL: + case ISD::FPOW: + case ISD::FREM: + case ISD::FSUB: R = SoftPromoteHalfRes_BinOp(N); break; + + case ISD::FMA: // FMA is same as FMAD + case ISD::FMAD: R = SoftPromoteHalfRes_FMAD(N); break; + + case ISD::FPOWI: R = SoftPromoteHalfRes_FPOWI(N); break; + + case ISD::LOAD: R = SoftPromoteHalfRes_LOAD(N); break; + case ISD::SELECT: R = SoftPromoteHalfRes_SELECT(N); break; + case ISD::SELECT_CC: R = SoftPromoteHalfRes_SELECT_CC(N); break; + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: R = SoftPromoteHalfRes_XINT_TO_FP(N); break; + case ISD::UNDEF: R = SoftPromoteHalfRes_UNDEF(N); break; + case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break; + } + + if (R.getNode()) + SetSoftPromotedHalf(SDValue(N, ResNo), R); +} + +SDValue DAGTypeLegalizer::SoftPromoteHalfRes_BITCAST(SDNode *N) { + return BitConvertToInteger(N->getOperand(0)); +} + +SDValue DAGTypeLegalizer::SoftPromoteHalfRes_ConstantFP(SDNode *N) { + ConstantFPSDNode *CN = cast<ConstantFPSDNode>(N); + + // Get the (bit-cast) APInt of the APFloat and build an integer constant + return DAG.getConstant(CN->getValueAPF().bitcastToAPInt(), SDLoc(CN), + MVT::i16); +} + +SDValue DAGTypeLegalizer::SoftPromoteHalfRes_EXTRACT_VECTOR_ELT(SDNode *N) { + SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0)); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), + NewOp.getValueType().getVectorElementType(), NewOp, + N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FCOPYSIGN(SDNode *N) { + SDValue LHS = GetSoftPromotedHalf(N->getOperand(0)); + SDValue RHS = BitConvertToInteger(N->getOperand(1)); + SDLoc dl(N); + + EVT LVT = LHS.getValueType(); + EVT RVT = RHS.getValueType(); + + unsigned LSize = LVT.getSizeInBits(); + unsigned RSize = RVT.getSizeInBits(); + + // First get the sign bit of second operand. + SDValue SignBit = DAG.getNode( + ISD::SHL, dl, RVT, DAG.getConstant(1, dl, RVT), + DAG.getConstant(RSize - 1, dl, + TLI.getShiftAmountTy(RVT, DAG.getDataLayout()))); + SignBit = DAG.getNode(ISD::AND, dl, RVT, RHS, SignBit); + + // Shift right or sign-extend it if the two operands have different types. + int SizeDiff = RVT.getSizeInBits() - LVT.getSizeInBits(); + if (SizeDiff > 0) { + SignBit = + DAG.getNode(ISD::SRL, dl, RVT, SignBit, + DAG.getConstant(SizeDiff, dl, + TLI.getShiftAmountTy(SignBit.getValueType(), + DAG.getDataLayout()))); + SignBit = DAG.getNode(ISD::TRUNCATE, dl, LVT, SignBit); + } else if (SizeDiff < 0) { + SignBit = DAG.getNode(ISD::ANY_EXTEND, dl, LVT, SignBit); + SignBit = + DAG.getNode(ISD::SHL, dl, LVT, SignBit, + DAG.getConstant(-SizeDiff, dl, + TLI.getShiftAmountTy(SignBit.getValueType(), + DAG.getDataLayout()))); + } + + // Clear the sign bit of the first operand. + SDValue Mask = DAG.getNode( + ISD::SHL, dl, LVT, DAG.getConstant(1, dl, LVT), + DAG.getConstant(LSize - 1, dl, + TLI.getShiftAmountTy(LVT, DAG.getDataLayout()))); + Mask = DAG.getNode(ISD::SUB, dl, LVT, Mask, DAG.getConstant(1, dl, LVT)); + LHS = DAG.getNode(ISD::AND, dl, LVT, LHS, Mask); + + // Or the value with the sign bit. + return DAG.getNode(ISD::OR, dl, LVT, LHS, SignBit); +} + +SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FMAD(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op0 = GetSoftPromotedHalf(N->getOperand(0)); + SDValue Op1 = GetSoftPromotedHalf(N->getOperand(1)); + SDValue Op2 = GetSoftPromotedHalf(N->getOperand(2)); + SDLoc dl(N); + + // Promote to the larger FP type. + Op0 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op0); + Op1 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op1); + Op2 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op2); + + SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, Op0, Op1, Op2); + + // Convert back to FP16 as an integer. + return DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, Res); +} + +SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FPOWI(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op0 = GetSoftPromotedHalf(N->getOperand(0)); + SDValue Op1 = N->getOperand(1); + SDLoc dl(N); + + Op0 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op0); + + SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, Op0, Op1); + + // Convert back to FP16 as an integer. + return DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, Res); +} + +SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FP_ROUND(SDNode *N) { + if (N->isStrictFPOpcode()) { + SDValue Res = + DAG.getNode(ISD::STRICT_FP_TO_FP16, SDLoc(N), {MVT::i16, MVT::Other}, + {N->getOperand(0), N->getOperand(1)}); + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; + } + + return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), MVT::i16, N->getOperand(0)); +} + +SDValue DAGTypeLegalizer::SoftPromoteHalfRes_LOAD(SDNode *N) { + LoadSDNode *L = cast<LoadSDNode>(N); + + // Load the value as an integer value with the same number of bits. + assert(L->getExtensionType() == ISD::NON_EXTLOAD && "Unexpected extension!"); + SDValue NewL = + DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), MVT::i16, + SDLoc(N), L->getChain(), L->getBasePtr(), L->getOffset(), + L->getPointerInfo(), MVT::i16, L->getOriginalAlign(), + L->getMemOperand()->getFlags(), L->getAAInfo()); + // Legalize the chain result by replacing uses of the old value chain with the + // new one + ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); + return NewL; +} + +SDValue DAGTypeLegalizer::SoftPromoteHalfRes_SELECT(SDNode *N) { + SDValue Op1 = GetSoftPromotedHalf(N->getOperand(1)); + SDValue Op2 = GetSoftPromotedHalf(N->getOperand(2)); + return DAG.getSelect(SDLoc(N), Op1.getValueType(), N->getOperand(0), Op1, + Op2); +} + +SDValue DAGTypeLegalizer::SoftPromoteHalfRes_SELECT_CC(SDNode *N) { + SDValue Op2 = GetSoftPromotedHalf(N->getOperand(2)); + SDValue Op3 = GetSoftPromotedHalf(N->getOperand(3)); + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), Op2.getValueType(), + N->getOperand(0), N->getOperand(1), Op2, Op3, + N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::SoftPromoteHalfRes_XINT_TO_FP(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDLoc dl(N); + + SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0)); + + // Round the value to the softened type. + return DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, Res); +} + +SDValue DAGTypeLegalizer::SoftPromoteHalfRes_UNDEF(SDNode *N) { + return DAG.getUNDEF(MVT::i16); +} + +SDValue DAGTypeLegalizer::SoftPromoteHalfRes_UnaryOp(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftPromotedHalf(N->getOperand(0)); + SDLoc dl(N); + + // Promote to the larger FP type. + Op = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op); + + SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, Op); + + // Convert back to FP16 as an integer. + return DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, Res); +} + +SDValue DAGTypeLegalizer::SoftPromoteHalfRes_BinOp(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op0 = GetSoftPromotedHalf(N->getOperand(0)); + SDValue Op1 = GetSoftPromotedHalf(N->getOperand(1)); + SDLoc dl(N); + + // Promote to the larger FP type. + Op0 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op0); + Op1 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op1); + + SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, Op0, Op1); + + // Convert back to FP16 as an integer. + return DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, Res); +} + +//===----------------------------------------------------------------------===// +// Half Operand Soft Promotion +//===----------------------------------------------------------------------===// + +bool DAGTypeLegalizer::SoftPromoteHalfOperand(SDNode *N, unsigned OpNo) { + LLVM_DEBUG(dbgs() << "Soft promote half operand " << OpNo << ": "; + N->dump(&DAG); dbgs() << "\n"); + SDValue Res = SDValue(); + + if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) { + LLVM_DEBUG(dbgs() << "Node has been custom lowered, done\n"); + return false; + } + + // Nodes that use a promotion-requiring floating point operand, but doesn't + // produce a soft promotion-requiring floating point result, need to be + // legalized to use the soft promoted float operand. Nodes that produce at + // least one soft promotion-requiring floating point result have their + // operands legalized as a part of PromoteFloatResult. + switch (N->getOpcode()) { + default: + #ifndef NDEBUG + dbgs() << "SoftPromoteHalfOperand Op #" << OpNo << ": "; + N->dump(&DAG); dbgs() << "\n"; + #endif + llvm_unreachable("Do not know how to soft promote this operator's operand!"); + + case ISD::BITCAST: Res = SoftPromoteHalfOp_BITCAST(N); break; + case ISD::FCOPYSIGN: Res = SoftPromoteHalfOp_FCOPYSIGN(N, OpNo); break; + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: Res = SoftPromoteHalfOp_FP_TO_XINT(N); break; + case ISD::STRICT_FP_EXTEND: + case ISD::FP_EXTEND: Res = SoftPromoteHalfOp_FP_EXTEND(N); break; + case ISD::SELECT_CC: Res = SoftPromoteHalfOp_SELECT_CC(N, OpNo); break; + case ISD::SETCC: Res = SoftPromoteHalfOp_SETCC(N); break; + case ISD::STORE: Res = SoftPromoteHalfOp_STORE(N, OpNo); break; + } + + if (!Res.getNode()) + return false; + + assert(Res.getNode() != N && "Expected a new node!"); + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +SDValue DAGTypeLegalizer::SoftPromoteHalfOp_BITCAST(SDNode *N) { + SDValue Op0 = GetSoftPromotedHalf(N->getOperand(0)); + + return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op0); +} + +SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FCOPYSIGN(SDNode *N, + unsigned OpNo) { + assert(OpNo == 1 && "Only Operand 1 must need promotion here"); + SDValue Op1 = N->getOperand(1); + SDLoc dl(N); + + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op1.getValueType()); + + Op1 = GetSoftPromotedHalf(Op1); + Op1 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op1); + + return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), N->getOperand(0), + Op1); +} + +SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_EXTEND(SDNode *N) { + bool IsStrict = N->isStrictFPOpcode(); + SDValue Op = GetSoftPromotedHalf(N->getOperand(IsStrict ? 1 : 0)); + + if (IsStrict) { + SDValue Res = + DAG.getNode(ISD::STRICT_FP16_TO_FP, SDLoc(N), + {N->getValueType(0), MVT::Other}, {N->getOperand(0), Op}); + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + ReplaceValueWith(SDValue(N, 0), Res); + return SDValue(); + } + + return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0), Op); +} + +SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_TO_XINT(SDNode *N) { + SDValue Op = N->getOperand(0); + SDLoc dl(N); + + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()); + + Op = GetSoftPromotedHalf(Op); + + SDValue Res = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op); + + return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), Res); +} + +SDValue DAGTypeLegalizer::SoftPromoteHalfOp_SELECT_CC(SDNode *N, + unsigned OpNo) { + assert(OpNo == 0 && "Can only soften the comparison values"); + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + SDLoc dl(N); + + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op0.getValueType()); + + Op0 = GetSoftPromotedHalf(Op0); + Op1 = GetSoftPromotedHalf(Op1); + + // Promote to the larger FP type. + Op0 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op0); + Op1 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op1); + + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0), Op0, Op1, + N->getOperand(2), N->getOperand(3), N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::SoftPromoteHalfOp_SETCC(SDNode *N) { + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get(); + SDLoc dl(N); + + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op0.getValueType()); + + Op0 = GetSoftPromotedHalf(Op0); + Op1 = GetSoftPromotedHalf(Op1); + + // Promote to the larger FP type. + Op0 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op0); + Op1 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op1); + + return DAG.getSetCC(SDLoc(N), N->getValueType(0), Op0, Op1, CCCode); +} + +SDValue DAGTypeLegalizer::SoftPromoteHalfOp_STORE(SDNode *N, unsigned OpNo) { + assert(OpNo == 1 && "Can only soften the stored value!"); + StoreSDNode *ST = cast<StoreSDNode>(N); + SDValue Val = ST->getValue(); + SDLoc dl(N); + + assert(!ST->isTruncatingStore() && "Unexpected truncating store."); + SDValue Promoted = GetSoftPromotedHalf(Val); + return DAG.getStore(ST->getChain(), dl, Promoted, ST->getBasePtr(), + ST->getMemOperand()); +} diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 0e46f8d68f839..74071f763dbf0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -91,6 +91,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::TRUNCATE: Res = PromoteIntRes_TRUNCATE(N); break; case ISD::UNDEF: Res = PromoteIntRes_UNDEF(N); break; case ISD::VAARG: Res = PromoteIntRes_VAARG(N); break; + case ISD::VSCALE: Res = PromoteIntRes_VSCALE(N); break; case ISD::EXTRACT_SUBVECTOR: Res = PromoteIntRes_EXTRACT_SUBVECTOR(N); break; @@ -161,7 +162,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::UMULFIXSAT: Res = PromoteIntRes_MULFIX(N); break; case ISD::SDIVFIX: - case ISD::UDIVFIX: Res = PromoteIntRes_DIVFIX(N); break; + case ISD::SDIVFIXSAT: + case ISD::UDIVFIX: + case ISD::UDIVFIXSAT: Res = PromoteIntRes_DIVFIX(N); break; case ISD::ABS: Res = PromoteIntRes_ABS(N); break; @@ -198,6 +201,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::VECREDUCE_UMIN: Res = PromoteIntRes_VECREDUCE(N); break; + + case ISD::FREEZE: + Res = PromoteIntRes_FREEZE(N); + break; } // If the result is null then the sub-method took care of registering it. @@ -271,8 +278,24 @@ SDValue DAGTypeLegalizer::PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N, return Res.getValue(1); } - SDValue Op2 = GetPromotedInteger(N->getOperand(2)); + // Op2 is used for the comparison and thus must be extended according to the + // target's atomic operations. Op3 is merely stored and so can be left alone. + SDValue Op2 = N->getOperand(2); SDValue Op3 = GetPromotedInteger(N->getOperand(3)); + switch (TLI.getExtendForAtomicCmpSwapArg()) { + case ISD::SIGN_EXTEND: + Op2 = SExtPromotedInteger(Op2); + break; + case ISD::ZERO_EXTEND: + Op2 = ZExtPromotedInteger(Op2); + break; + case ISD::ANY_EXTEND: + Op2 = GetPromotedInteger(Op2); + break; + default: + llvm_unreachable("Invalid atomic op extension"); + } + SDVTList VTs = DAG.getVTList(Op2.getValueType(), N->getValueType(1), MVT::Other); SDValue Res = DAG.getAtomicCmpSwap( @@ -303,6 +326,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { case TargetLowering::TypeSoftenFloat: // Promote the integer operand by hand. return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftenedFloat(InOp)); + case TargetLowering::TypeSoftPromoteHalf: + // Promote the integer operand by hand. + return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftPromotedHalf(InOp)); case TargetLowering::TypePromoteFloat: { // Convert the promoted float by hand. if (!NOutVT.isVector()) @@ -318,6 +344,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, BitConvertToInteger(GetScalarizedVector(InOp))); break; + case TargetLowering::TypeScalarizeScalableVector: + report_fatal_error("Scalarization of scalable vectors is not supported."); case TargetLowering::TypeSplitVector: { if (!NOutVT.isVector()) { // For example, i32 = BITCAST v2i16 on alpha. Convert the split @@ -370,9 +398,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { OutVT.getVectorNumElements() * Scale); if (isTypeLegal(WideOutVT)) { InOp = DAG.getBitcast(WideOutVT, GetWidenedVector(InOp)); - MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OutVT, InOp, - DAG.getConstant(0, dl, IdxTy)); + DAG.getVectorIdxConstant(0, dl)); return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, InOp); } } @@ -396,6 +423,12 @@ static EVT getShiftAmountTyForConstant(EVT VT, const TargetLowering &TLI, return ShiftVT; } +SDValue DAGTypeLegalizer::PromoteIntRes_FREEZE(SDNode *N) { + SDValue V = GetPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::FREEZE, SDLoc(N), + V.getValueType(), V); +} + SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) { SDValue Op = GetPromotedInteger(N->getOperand(0)); EVT OVT = N->getValueType(0); @@ -558,7 +591,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FLT_ROUNDS(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDLoc dl(N); - return DAG.getNode(N->getOpcode(), dl, NVT); + SDValue Res = + DAG.getNode(N->getOpcode(), dl, {NVT, MVT::Other}, N->getOperand(0)); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; } SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) { @@ -578,8 +617,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) { return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res, DAG.getValueType(N->getOperand(0).getValueType())); if (N->getOpcode() == ISD::ZERO_EXTEND) - return DAG.getZeroExtendInReg(Res, dl, - N->getOperand(0).getValueType().getScalarType()); + return DAG.getZeroExtendInReg(Res, dl, N->getOperand(0).getValueType()); assert(N->getOpcode() == ISD::ANY_EXTEND && "Unknown integer extension!"); return Res; } @@ -781,22 +819,51 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) { N->getOperand(2)); } +static SDValue SaturateWidenedDIVFIX(SDValue V, SDLoc &dl, + unsigned SatW, bool Signed, + const TargetLowering &TLI, + SelectionDAG &DAG) { + EVT VT = V.getValueType(); + unsigned VTW = VT.getScalarSizeInBits(); + + if (!Signed) { + // Saturate to the unsigned maximum by getting the minimum of V and the + // maximum. + return DAG.getNode(ISD::UMIN, dl, VT, V, + DAG.getConstant(APInt::getLowBitsSet(VTW, SatW), + dl, VT)); + } + + // Saturate to the signed maximum (the low SatW - 1 bits) by taking the + // signed minimum of it and V. + V = DAG.getNode(ISD::SMIN, dl, VT, V, + DAG.getConstant(APInt::getLowBitsSet(VTW, SatW - 1), + dl, VT)); + // Saturate to the signed minimum (the high SatW + 1 bits) by taking the + // signed maximum of it and V. + V = DAG.getNode(ISD::SMAX, dl, VT, V, + DAG.getConstant(APInt::getHighBitsSet(VTW, VTW - SatW + 1), + dl, VT)); + return V; +} + static SDValue earlyExpandDIVFIX(SDNode *N, SDValue LHS, SDValue RHS, - unsigned Scale, const TargetLowering &TLI, - SelectionDAG &DAG) { + unsigned Scale, const TargetLowering &TLI, + SelectionDAG &DAG, unsigned SatW = 0) { EVT VT = LHS.getValueType(); - bool Signed = N->getOpcode() == ISD::SDIVFIX; + unsigned VTSize = VT.getScalarSizeInBits(); + bool Signed = N->getOpcode() == ISD::SDIVFIX || + N->getOpcode() == ISD::SDIVFIXSAT; + bool Saturating = N->getOpcode() == ISD::SDIVFIXSAT || + N->getOpcode() == ISD::UDIVFIXSAT; SDLoc dl(N); - // See if we can perform the division in this type without widening. - if (SDValue V = TLI.expandFixedPointDiv(N->getOpcode(), dl, LHS, RHS, Scale, - DAG)) - return V; - - // If that didn't work, double the type width and try again. That must work, - // or something is wrong. - EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), - VT.getScalarSizeInBits() * 2); + // Widen the types by a factor of two. This is guaranteed to expand, since it + // will always have enough high bits in the LHS to shift into. + EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VTSize * 2); + if (VT.isVector()) + WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT, + VT.getVectorElementCount()); if (Signed) { LHS = DAG.getSExtOrTrunc(LHS, dl, WideVT); RHS = DAG.getSExtOrTrunc(RHS, dl, WideVT); @@ -805,18 +872,28 @@ static SDValue earlyExpandDIVFIX(SDNode *N, SDValue LHS, SDValue RHS, RHS = DAG.getZExtOrTrunc(RHS, dl, WideVT); } - // TODO: Saturation. - SDValue Res = TLI.expandFixedPointDiv(N->getOpcode(), dl, LHS, RHS, Scale, DAG); assert(Res && "Expanding DIVFIX with wide type failed?"); + if (Saturating) { + // If the caller has told us to saturate at something less, use that width + // instead of the type before doubling. However, it cannot be more than + // what we just widened! + assert(SatW <= VTSize && + "Tried to saturate to more than the original type?"); + Res = SaturateWidenedDIVFIX(Res, dl, SatW == 0 ? VTSize : SatW, Signed, + TLI, DAG); + } return DAG.getZExtOrTrunc(Res, dl, VT); } SDValue DAGTypeLegalizer::PromoteIntRes_DIVFIX(SDNode *N) { SDLoc dl(N); SDValue Op1Promoted, Op2Promoted; - bool Signed = N->getOpcode() == ISD::SDIVFIX; + bool Signed = N->getOpcode() == ISD::SDIVFIX || + N->getOpcode() == ISD::SDIVFIXSAT; + bool Saturating = N->getOpcode() == ISD::SDIVFIXSAT || + N->getOpcode() == ISD::UDIVFIXSAT; if (Signed) { Op1Promoted = SExtPromotedInteger(N->getOperand(0)); Op2Promoted = SExtPromotedInteger(N->getOperand(1)); @@ -827,23 +904,41 @@ SDValue DAGTypeLegalizer::PromoteIntRes_DIVFIX(SDNode *N) { EVT PromotedType = Op1Promoted.getValueType(); unsigned Scale = N->getConstantOperandVal(2); - SDValue Res; // If the type is already legal and the operation is legal in that type, we // should not early expand. if (TLI.isTypeLegal(PromotedType)) { TargetLowering::LegalizeAction Action = TLI.getFixedPointOperationAction(N->getOpcode(), PromotedType, Scale); - if (Action == TargetLowering::Legal || Action == TargetLowering::Custom) - Res = DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted, - Op2Promoted, N->getOperand(2)); + if (Action == TargetLowering::Legal || Action == TargetLowering::Custom) { + EVT ShiftTy = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout()); + unsigned Diff = PromotedType.getScalarSizeInBits() - + N->getValueType(0).getScalarSizeInBits(); + if (Saturating) + Op1Promoted = DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, + DAG.getConstant(Diff, dl, ShiftTy)); + SDValue Res = DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted, + Op2Promoted, N->getOperand(2)); + if (Saturating) + Res = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, PromotedType, Res, + DAG.getConstant(Diff, dl, ShiftTy)); + return Res; + } } - if (!Res) - Res = earlyExpandDIVFIX(N, Op1Promoted, Op2Promoted, Scale, TLI, DAG); - - // TODO: Saturation. - - return Res; + // See if we can perform the division in this type without expanding. + if (SDValue Res = TLI.expandFixedPointDiv(N->getOpcode(), dl, Op1Promoted, + Op2Promoted, Scale, DAG)) { + if (Saturating) + Res = SaturateWidenedDIVFIX(Res, dl, + N->getValueType(0).getScalarSizeInBits(), + Signed, TLI, DAG); + return Res; + } + // If we cannot, expand it to twice the type width. If we are saturating, give + // it the original width as a saturating width so we don't need to emit + // two saturations. + return earlyExpandDIVFIX(N, Op1Promoted, Op2Promoted, Scale, TLI, DAG, + N->getValueType(0).getScalarSizeInBits()); } SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) { @@ -1048,8 +1143,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) { SDValue WideExt = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVT, WideTrunc); // Extract the low NVT subvector. - MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); - SDValue ZeroIdx = DAG.getConstant(0, dl, IdxTy); + SDValue ZeroIdx = DAG.getVectorIdxConstant(0, dl); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, WideExt, ZeroIdx); } } @@ -1076,7 +1170,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) { // Calculate the overflow flag: zero extend the arithmetic result from // the original type. - SDValue Ofl = DAG.getZeroExtendInReg(Res, dl, OVT.getScalarType()); + SDValue Ofl = DAG.getZeroExtendInReg(Res, dl, OVT); // Overflowed if and only if this is not equal to Res. Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE); @@ -1181,6 +1275,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UNDEF(SDNode *N) { N->getValueType(0))); } +SDValue DAGTypeLegalizer::PromoteIntRes_VSCALE(SDNode *N) { + EVT VT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + + APInt MulImm = cast<ConstantSDNode>(N->getOperand(0))->getAPIntValue(); + return DAG.getVScale(SDLoc(N), VT, MulImm.sextOrSelf(VT.getSizeInBits())); +} + SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) { SDValue Chain = N->getOperand(0); // Get the chain. SDValue Ptr = N->getOperand(1); // Get the pointer. @@ -1233,7 +1334,6 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { LLVM_DEBUG(dbgs() << "Promote integer operand: "; N->dump(&DAG); dbgs() << "\n"); SDValue Res = SDValue(); - if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) { LLVM_DEBUG(dbgs() << "Node has been custom lowered, done\n"); return false; @@ -1307,7 +1407,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::UMULFIX: case ISD::UMULFIXSAT: case ISD::SDIVFIX: - case ISD::UDIVFIX: Res = PromoteIntOp_FIX(N); break; + case ISD::SDIVFIXSAT: + case ISD::UDIVFIX: + case ISD::UDIVFIXSAT: Res = PromoteIntOp_FIX(N); break; case ISD::FPOWI: Res = PromoteIntOp_FPOWI(N); break; @@ -1330,10 +1432,17 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { if (Res.getNode() == N) return true; - assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + const bool IsStrictFp = N->isStrictFPOpcode(); + assert(Res.getValueType() == N->getValueType(0) && + N->getNumValues() == (IsStrictFp ? 2 : 1) && "Invalid operand expansion"); + LLVM_DEBUG(dbgs() << "Replacing: "; N->dump(&DAG); dbgs() << " with: "; + Res.dump()); ReplaceValueWith(SDValue(N, 0), Res); + if (IsStrictFp) + ReplaceValueWith(SDValue(N, 1), SDValue(Res.getNode(), 1)); + return false; } @@ -1614,7 +1723,14 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, SDValue Mask = PromoteTargetBoolean(N->getOperand(OpNo), DataVT); SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end()); NewOps[OpNo] = Mask; - return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); + SDNode *Res = DAG.UpdateNodeOperands(N, NewOps); + if (Res == N) + return SDValue(Res, 0); + + // Update triggered CSE, do our own replacement since caller can't. + ReplaceValueWith(SDValue(N, 0), SDValue(Res, 0)); + ReplaceValueWith(SDValue(N, 1), SDValue(Res, 1)); + return SDValue(); } SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N, @@ -1635,7 +1751,14 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N, } else NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo)); - return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); + SDNode *Res = DAG.UpdateNodeOperands(N, NewOps); + if (Res == N) + return SDValue(Res, 0); + + // Update triggered CSE, do our own replacement since caller can't. + ReplaceValueWith(SDValue(N, 0), SDValue(Res, 0)); + ReplaceValueWith(SDValue(N, 1), SDValue(Res, 1)); + return SDValue(); } SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, @@ -1676,8 +1799,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) { SDLoc dl(N); SDValue Op = GetPromotedInteger(N->getOperand(0)); Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op); - return DAG.getZeroExtendInReg(Op, dl, - N->getOperand(0).getValueType().getScalarType()); + return DAG.getZeroExtendInReg(Op, dl, N->getOperand(0).getValueType()); } SDValue DAGTypeLegalizer::PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo) { @@ -1786,6 +1908,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break; case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; + case ISD::FREEZE: SplitRes_FREEZE(N, Lo, Hi); break; case ISD::BITCAST: ExpandRes_BITCAST(N, Lo, Hi); break; case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break; @@ -1908,7 +2031,9 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::UMULFIXSAT: ExpandIntRes_MULFIX(N, Lo, Hi); break; case ISD::SDIVFIX: - case ISD::UDIVFIX: ExpandIntRes_DIVFIX(N, Lo, Hi); break; + case ISD::SDIVFIXSAT: + case ISD::UDIVFIX: + case ISD::UDIVFIXSAT: ExpandIntRes_DIVFIX(N, Lo, Hi); break; case ISD::VECREDUCE_ADD: case ISD::VECREDUCE_MUL: @@ -2666,10 +2791,15 @@ void DAGTypeLegalizer::ExpandIntRes_FLT_ROUNDS(SDNode *N, SDValue &Lo, unsigned NBitWidth = NVT.getSizeInBits(); EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout()); - Lo = DAG.getNode(ISD::FLT_ROUNDS_, dl, NVT); + Lo = DAG.getNode(ISD::FLT_ROUNDS_, dl, {NVT, MVT::Other}, N->getOperand(0)); + SDValue Chain = Lo.getValue(1); // The high part is the sign of Lo, as -1 is a valid value for FLT_ROUNDS Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo, DAG.getConstant(NBitWidth - 1, dl, ShiftAmtTy)); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Chain); } void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo, @@ -2683,6 +2813,12 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo, if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) Op = GetPromotedFloat(Op); + if (getTypeAction(Op.getValueType()) == TargetLowering::TypeSoftPromoteHalf) { + EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()); + Op = GetSoftPromotedHalf(Op); + Op = DAG.getNode(ISD::FP16_TO_FP, dl, NFPVT, Op); + } + RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!"); TargetLowering::MakeLibCallOptions CallOptions; @@ -2706,6 +2842,12 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo, if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) Op = GetPromotedFloat(Op); + if (getTypeAction(Op.getValueType()) == TargetLowering::TypeSoftPromoteHalf) { + EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()); + Op = GetSoftPromotedHalf(Op); + Op = DAG.getNode(ISD::FP16_TO_FP, dl, NFPVT, Op); + } + RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!"); TargetLowering::MakeLibCallOptions CallOptions; @@ -2800,7 +2942,6 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, SDValue Ch = N->getChain(); SDValue Ptr = N->getBasePtr(); ISD::LoadExtType ExtType = N->getExtensionType(); - unsigned Alignment = N->getAlignment(); MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags(); AAMDNodes AAInfo = N->getAAInfo(); SDLoc dl(N); @@ -2811,7 +2952,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, EVT MemVT = N->getMemoryVT(); Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(), MemVT, - Alignment, MMOFlags, AAInfo); + N->getOriginalAlign(), MMOFlags, AAInfo); // Remember the chain. Ch = Lo.getValue(1); @@ -2833,8 +2974,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, } } else if (DAG.getDataLayout().isLittleEndian()) { // Little-endian - low bits are at low addresses. - Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(), Alignment, MMOFlags, - AAInfo); + Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(), + N->getOriginalAlign(), MMOFlags, AAInfo); unsigned ExcessBits = N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); @@ -2845,7 +2986,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl); Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), NEVT, - MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); + N->getOriginalAlign(), MMOFlags, AAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -2863,7 +3004,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(), EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits() - ExcessBits), - Alignment, MMOFlags, AAInfo); + N->getOriginalAlign(), MMOFlags, AAInfo); // Increment the pointer to the other half. Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl); @@ -2871,7 +3012,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), EVT::getIntegerVT(*DAG.getContext(), ExcessBits), - MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); + N->getOriginalAlign(), MMOFlags, AAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -3226,8 +3367,15 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::ExpandIntRes_DIVFIX(SDNode *N, SDValue &Lo, SDValue &Hi) { - SDValue Res = earlyExpandDIVFIX(N, N->getOperand(0), N->getOperand(1), - N->getConstantOperandVal(2), TLI, DAG); + SDLoc dl(N); + // Try expanding in the existing type first. + SDValue Res = TLI.expandFixedPointDiv(N->getOpcode(), dl, N->getOperand(0), + N->getOperand(1), + N->getConstantOperandVal(2), DAG); + + if (!Res) + Res = earlyExpandDIVFIX(N, N->getOperand(0), N->getOperand(1), + N->getConstantOperandVal(2), TLI, DAG); SplitInteger(Res, Lo, Hi); } @@ -4071,7 +4219,6 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); SDValue Ch = N->getChain(); SDValue Ptr = N->getBasePtr(); - unsigned Alignment = N->getAlignment(); MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags(); AAMDNodes AAInfo = N->getAAInfo(); SDLoc dl(N); @@ -4082,15 +4229,16 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { if (N->getMemoryVT().bitsLE(NVT)) { GetExpandedInteger(N->getValue(), Lo, Hi); return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo(), - N->getMemoryVT(), Alignment, MMOFlags, AAInfo); + N->getMemoryVT(), N->getOriginalAlign(), MMOFlags, + AAInfo); } if (DAG.getDataLayout().isLittleEndian()) { // Little-endian - low bits are at low addresses. GetExpandedInteger(N->getValue(), Lo, Hi); - Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(), Alignment, MMOFlags, - AAInfo); + Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(), + N->getOriginalAlign(), MMOFlags, AAInfo); unsigned ExcessBits = N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); @@ -4099,9 +4247,9 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits()/8; Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); - Hi = DAG.getTruncStore( - Ch, dl, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), NEVT, - MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); + Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, + N->getPointerInfo().getWithOffset(IncrementSize), + NEVT, N->getOriginalAlign(), MMOFlags, AAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -4129,8 +4277,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { } // Store both the high bits and maybe some of the low bits. - Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(), HiVT, Alignment, - MMOFlags, AAInfo); + Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(), HiVT, + N->getOriginalAlign(), MMOFlags, AAInfo); // Increment the pointer to the other half. Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); @@ -4138,7 +4286,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), EVT::getIntegerVT(*DAG.getContext(), ExcessBits), - MinAlign(Alignment, IncrementSize), MMOFlags, AAInfo); + N->getOriginalAlign(), MMOFlags, AAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -4186,18 +4334,43 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) { EVT OutVT = N->getValueType(0); EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); assert(NOutVT.isVector() && "This type must be promoted to a vector type"); - unsigned OutNumElems = OutVT.getVectorNumElements(); EVT NOutVTElem = NOutVT.getVectorElementType(); SDLoc dl(N); SDValue BaseIdx = N->getOperand(1); + // TODO: We may be able to use this for types other than scalable + // vectors and fix those tests that expect BUILD_VECTOR to be used + if (OutVT.isScalableVector()) { + SDValue InOp0 = N->getOperand(0); + EVT InVT = InOp0.getValueType(); + + // Promote operands and see if this is handled by target lowering, + // Otherwise, use the BUILD_VECTOR approach below + if (getTypeAction(InVT) == TargetLowering::TypePromoteInteger) { + // Collect the (promoted) operands + SDValue Ops[] = { GetPromotedInteger(InOp0), BaseIdx }; + + EVT PromEltVT = Ops[0].getValueType().getVectorElementType(); + assert(PromEltVT.bitsLE(NOutVTElem) && + "Promoted operand has an element type greater than result"); + + EVT ExtVT = NOutVT.changeVectorElementType(PromEltVT); + SDValue Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), ExtVT, Ops); + return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, Ext); + } + } + + if (OutVT.isScalableVector()) + report_fatal_error("Unable to promote scalable types using BUILD_VECTOR"); + SDValue InOp0 = N->getOperand(0); if (getTypeAction(InOp0.getValueType()) == TargetLowering::TypePromoteInteger) InOp0 = GetPromotedInteger(N->getOperand(0)); EVT InVT = InOp0.getValueType(); + unsigned OutNumElems = OutVT.getVectorNumElements(); SmallVector<SDValue, 8> Ops; Ops.reserve(OutNumElems); for (unsigned i = 0; i != OutNumElems; ++i) { @@ -4319,9 +4492,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { "Unexpected number of elements"); for (unsigned j = 0; j < NumElem; ++j) { - SDValue Ext = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, SclrTy, Op, - DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SclrTy, Op, + DAG.getVectorIdxConstant(j, dl)); Ops[i * NumElem + j] = DAG.getAnyExtOrTrunc(Ext, dl, OutElemTy); } } @@ -4429,9 +4601,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) { for (unsigned i=0; i<NumElem; ++i) { // Extract element from incoming vector - SDValue Ex = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, SclrTy, Incoming, - DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SclrTy, Incoming, + DAG.getVectorIdxConstant(i, dl)); SDValue Tr = DAG.getNode(ISD::TRUNCATE, dl, RetSclrTy, Ex); NewOps.push_back(Tr); } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 63ddb59fce68c..ae087d3bbd8cb 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -124,6 +124,8 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { Mapped |= 128; if (ResId && PromotedFloats.find(ResId) != PromotedFloats.end()) Mapped |= 256; + if (ResId && SoftPromotedHalfs.find(ResId) != SoftPromotedHalfs.end()) + Mapped |= 512; if (Node.getNodeId() != Processed) { // Since we allow ReplacedValues to map deleted nodes, it may map nodes @@ -168,12 +170,15 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { dbgs() << " WidenedVectors"; if (Mapped & 256) dbgs() << " PromotedFloats"; + if (Mapped & 512) + dbgs() << " SoftPromoteHalfs"; dbgs() << "\n"; llvm_unreachable(nullptr); } } } +#ifndef NDEBUG // Checked that NewNodes are only used by other NewNodes. for (unsigned i = 0, e = NewNodes.size(); i != e; ++i) { SDNode *N = NewNodes[i]; @@ -181,6 +186,7 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { UI != UE; ++UI) assert(UI->getNodeId() == NewNode && "NewNode used by non-NewNode!"); } +#endif } /// This is the main entry point for the type legalizer. This does a top-down @@ -239,6 +245,9 @@ bool DAGTypeLegalizer::run() { case TargetLowering::TypeLegal: LLVM_DEBUG(dbgs() << "Legal result type\n"); break; + case TargetLowering::TypeScalarizeScalableVector: + report_fatal_error( + "Scalarization of scalable vectors is not supported."); // The following calls must take care of *all* of the node's results, // not just the illegal result they were passed (this includes results // with a legal type). Results can be remapped using ReplaceValueWith, @@ -276,6 +285,10 @@ bool DAGTypeLegalizer::run() { PromoteFloatResult(N, i); Changed = true; goto NodeDone; + case TargetLowering::TypeSoftPromoteHalf: + SoftPromoteHalfResult(N, i); + Changed = true; + goto NodeDone; } } @@ -297,6 +310,9 @@ ScanOperands: case TargetLowering::TypeLegal: LLVM_DEBUG(dbgs() << "Legal operand\n"); continue; + case TargetLowering::TypeScalarizeScalableVector: + report_fatal_error( + "Scalarization of scalable vectors is not supported."); // The following calls must either replace all of the node's results // using ReplaceValueWith, and return "false"; or update the node's // operands in place, and return "true". @@ -332,6 +348,10 @@ ScanOperands: NeedsReanalyzing = PromoteFloatOperand(N, i); Changed = true; break; + case TargetLowering::TypeSoftPromoteHalf: + NeedsReanalyzing = SoftPromoteHalfOperand(N, i); + Changed = true; + break; } break; } @@ -719,6 +739,16 @@ void DAGTypeLegalizer::SetPromotedFloat(SDValue Op, SDValue Result) { OpIdEntry = getTableId(Result); } +void DAGTypeLegalizer::SetSoftPromotedHalf(SDValue Op, SDValue Result) { + assert(Result.getValueType() == MVT::i16 && + "Invalid type for soft-promoted half"); + AnalyzeNewValue(Result); + + auto &OpIdEntry = SoftPromotedHalfs[getTableId(Op)]; + assert((OpIdEntry == 0) && "Node is already promoted!"); + OpIdEntry = getTableId(Result); +} + void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) { // Note that in some cases vector operation operands may be greater than // the vector element type. For example BUILD_VECTOR of type <1 x i1> with @@ -805,9 +835,9 @@ void DAGTypeLegalizer::GetSplitVector(SDValue Op, SDValue &Lo, void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo, SDValue Hi) { assert(Lo.getValueType().getVectorElementType() == - Op.getValueType().getVectorElementType() && - 2*Lo.getValueType().getVectorNumElements() == - Op.getValueType().getVectorNumElements() && + Op.getValueType().getVectorElementType() && + Lo.getValueType().getVectorElementCount() * 2 == + Op.getValueType().getVectorElementCount() && Hi.getValueType() == Lo.getValueType() && "Invalid type for split vector"); // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant. @@ -859,12 +889,19 @@ SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op, SDLoc dl(Op); // Create the stack frame object. Make sure it is aligned for both // the source and destination types. - SDValue StackPtr = DAG.CreateStackTemporary(Op.getValueType(), DestVT); + + // In cases where the vector is illegal it will be broken down into parts + // and stored in parts - we should use the alignment for the smallest part. + Align DestAlign = DAG.getReducedAlign(DestVT, /*UseABI=*/false); + Align OpAlign = DAG.getReducedAlign(Op.getValueType(), /*UseABI=*/false); + Align Align = std::max(DestAlign, OpAlign); + SDValue StackPtr = + DAG.CreateStackTemporary(Op.getValueType().getStoreSize(), Align); // Emit a store to the stack slot. - SDValue Store = - DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, MachinePointerInfo()); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, + MachinePointerInfo(), Align); // Result is a load from the stack slot. - return DAG.getLoad(DestVT, dl, Store, StackPtr, MachinePointerInfo()); + return DAG.getLoad(DestVT, dl, Store, StackPtr, MachinePointerInfo(), Align); } /// Replace the node's results with custom code provided by the target and @@ -890,17 +927,6 @@ bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult) { // The target didn't want to custom lower it after all. return false; - // When called from DAGTypeLegalizer::ExpandIntegerResult, we might need to - // provide the same kind of custom splitting behavior. - if (Results.size() == N->getNumValues() + 1 && LegalizeResult) { - // We've legalized a return type by splitting it. If there is a chain, - // replace that too. - SetExpandedInteger(SDValue(N, 0), Results[0], Results[1]); - if (N->getNumValues() > 1) - ReplaceValueWith(SDValue(N, 1), Results[2]); - return true; - } - // Make everything that once used N's values now use those in Results instead. assert(Results.size() == N->getNumValues() && "Custom lowering returned the wrong number of results!"); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index faae14444d51c..0fa6d653a8364 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -109,6 +109,10 @@ private: /// supported precision, this map indicates what promoted value to use. SmallDenseMap<TableId, TableId, 8> PromotedFloats; + /// For floating-point nodes that have a smaller precision than the smallest + /// supported precision, this map indicates the converted value to use. + SmallDenseMap<TableId, TableId, 8> SoftPromotedHalfs; + /// For float nodes that need to be expanded this map indicates which operands /// are the expanded version of the input. SmallDenseMap<TableId, std::pair<TableId, TableId>, 8> ExpandedFloats; @@ -155,7 +159,9 @@ private: const SDValue &getSDValue(TableId &Id) { RemapId(Id); assert(Id && "TableId should be non-zero"); - return IdToValueMap[Id]; + auto I = IdToValueMap.find(Id); + assert(I != IdToValueMap.end() && "cannot find Id in map"); + return I->second; } public: @@ -172,24 +178,30 @@ public: bool run(); void NoteDeletion(SDNode *Old, SDNode *New) { + assert(Old != New && "node replaced with self"); for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) { TableId NewId = getTableId(SDValue(New, i)); TableId OldId = getTableId(SDValue(Old, i)); - if (OldId != NewId) + if (OldId != NewId) { ReplacedValues[OldId] = NewId; - // Delete Node from tables. + // Delete Node from tables. We cannot do this when OldId == NewId, + // because NewId can still have table references to it in + // ReplacedValues. + IdToValueMap.erase(OldId); + PromotedIntegers.erase(OldId); + ExpandedIntegers.erase(OldId); + SoftenedFloats.erase(OldId); + PromotedFloats.erase(OldId); + SoftPromotedHalfs.erase(OldId); + ExpandedFloats.erase(OldId); + ScalarizedVectors.erase(OldId); + SplitVectors.erase(OldId); + WidenedVectors.erase(OldId); + } + ValueToIdMap.erase(SDValue(Old, i)); - IdToValueMap.erase(OldId); - PromotedIntegers.erase(OldId); - ExpandedIntegers.erase(OldId); - SoftenedFloats.erase(OldId); - PromotedFloats.erase(OldId); - ExpandedFloats.erase(OldId); - ScalarizedVectors.erase(OldId); - SplitVectors.erase(OldId); - WidenedVectors.erase(OldId); } } @@ -260,7 +272,7 @@ private: EVT OldVT = Op.getValueType(); SDLoc dl(Op); Op = GetPromotedInteger(Op); - return DAG.getZeroExtendInReg(Op, dl, OldVT.getScalarType()); + return DAG.getZeroExtendInReg(Op, dl, OldVT); } // Get a promoted operand and sign or zero extend it to the final size @@ -274,7 +286,7 @@ private: if (TLI.isSExtCheaperThanZExt(OldVT, Op.getValueType())) return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), Op, DAG.getValueType(OldVT)); - return DAG.getZeroExtendInReg(Op, DL, OldVT.getScalarType()); + return DAG.getZeroExtendInReg(Op, DL, OldVT); } // Integer Result Promotion. @@ -304,6 +316,7 @@ private: SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N); SDValue PromoteIntRes_FP_TO_XINT(SDNode *N); SDValue PromoteIntRes_FP_TO_FP16(SDNode *N); + SDValue PromoteIntRes_FREEZE(SDNode *N); SDValue PromoteIntRes_INT_EXTEND(SDNode *N); SDValue PromoteIntRes_LOAD(LoadSDNode *N); SDValue PromoteIntRes_MLOAD(MaskedLoadSDNode *N); @@ -326,6 +339,7 @@ private: SDValue PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo); SDValue PromoteIntRes_UNDEF(SDNode *N); SDValue PromoteIntRes_VAARG(SDNode *N); + SDValue PromoteIntRes_VSCALE(SDNode *N); SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo); SDValue PromoteIntRes_ADDSUBSAT(SDNode *N); SDValue PromoteIntRes_MULFIX(SDNode *N); @@ -512,9 +526,11 @@ private: SDValue SoftenFloatRes_FP_ROUND(SDNode *N); SDValue SoftenFloatRes_FPOW(SDNode *N); SDValue SoftenFloatRes_FPOWI(SDNode *N); + SDValue SoftenFloatRes_FREEZE(SDNode *N); SDValue SoftenFloatRes_FREM(SDNode *N); SDValue SoftenFloatRes_FRINT(SDNode *N); SDValue SoftenFloatRes_FROUND(SDNode *N); + SDValue SoftenFloatRes_FROUNDEVEN(SDNode *N); SDValue SoftenFloatRes_FSIN(SDNode *N); SDValue SoftenFloatRes_FSQRT(SDNode *N); SDValue SoftenFloatRes_FSUB(SDNode *N); @@ -584,9 +600,11 @@ private: void ExpandFloatRes_FP_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FPOW (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FPOWI (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FREEZE (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FREM (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FRINT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FROUND (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FROUNDEVEN(SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSUB (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -651,6 +669,43 @@ private: SDValue PromoteFloatOp_SETCC(SDNode *N, unsigned OpNo); //===--------------------------------------------------------------------===// + // Half soft promotion support: LegalizeFloatTypes.cpp + //===--------------------------------------------------------------------===// + + SDValue GetSoftPromotedHalf(SDValue Op) { + TableId &PromotedId = SoftPromotedHalfs[getTableId(Op)]; + SDValue PromotedOp = getSDValue(PromotedId); + assert(PromotedOp.getNode() && "Operand wasn't promoted?"); + return PromotedOp; + } + void SetSoftPromotedHalf(SDValue Op, SDValue Result); + + void SoftPromoteHalfResult(SDNode *N, unsigned ResNo); + SDValue SoftPromoteHalfRes_BinOp(SDNode *N); + SDValue SoftPromoteHalfRes_BITCAST(SDNode *N); + SDValue SoftPromoteHalfRes_ConstantFP(SDNode *N); + SDValue SoftPromoteHalfRes_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue SoftPromoteHalfRes_FCOPYSIGN(SDNode *N); + SDValue SoftPromoteHalfRes_FMAD(SDNode *N); + SDValue SoftPromoteHalfRes_FPOWI(SDNode *N); + SDValue SoftPromoteHalfRes_FP_ROUND(SDNode *N); + SDValue SoftPromoteHalfRes_LOAD(SDNode *N); + SDValue SoftPromoteHalfRes_SELECT(SDNode *N); + SDValue SoftPromoteHalfRes_SELECT_CC(SDNode *N); + SDValue SoftPromoteHalfRes_UnaryOp(SDNode *N); + SDValue SoftPromoteHalfRes_XINT_TO_FP(SDNode *N); + SDValue SoftPromoteHalfRes_UNDEF(SDNode *N); + + bool SoftPromoteHalfOperand(SDNode *N, unsigned OpNo); + SDValue SoftPromoteHalfOp_BITCAST(SDNode *N); + SDValue SoftPromoteHalfOp_FCOPYSIGN(SDNode *N, unsigned OpNo); + SDValue SoftPromoteHalfOp_FP_EXTEND(SDNode *N); + SDValue SoftPromoteHalfOp_FP_TO_XINT(SDNode *N); + SDValue SoftPromoteHalfOp_SETCC(SDNode *N); + SDValue SoftPromoteHalfOp_SELECT_CC(SDNode *N, unsigned OpNo); + SDValue SoftPromoteHalfOp_STORE(SDNode *N, unsigned OpNo); + + //===--------------------------------------------------------------------===// // Scalarization Support: LegalizeVectorTypes.cpp //===--------------------------------------------------------------------===// @@ -721,6 +776,11 @@ private: void GetSplitVector(SDValue Op, SDValue &Lo, SDValue &Hi); void SetSplitVector(SDValue Op, SDValue Lo, SDValue Hi); + // Helper function for incrementing the pointer when splitting + // memory operations + void IncrementPointer(MemSDNode *N, EVT MemVT, + MachinePointerInfo &MPI, SDValue &Ptr); + // Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>. void SplitVectorResult(SDNode *N, unsigned ResNo); void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -918,6 +978,7 @@ private: void SplitRes_SELECT (SDNode *N, SDValue &Lo, SDValue &Hi); void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi); void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitRes_FREEZE (SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVSETCC(const SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index c45c62cabc05b..9cd3b8f76d6ca 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -50,6 +50,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { case TargetLowering::TypePromoteInteger: break; case TargetLowering::TypePromoteFloat: + case TargetLowering::TypeSoftPromoteHalf: llvm_unreachable("Bitcast of a promotion-needing float should never need" "expansion"); case TargetLowering::TypeSoftenFloat: @@ -82,6 +83,8 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); return; + case TargetLowering::TypeScalarizeScalableVector: + report_fatal_error("Scalarization of scalable vectors is not supported."); case TargetLowering::TypeWidenVector: { assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BITCAST"); InOp = GetWidenedVector(InOp); @@ -119,9 +122,8 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { SmallVector<SDValue, 8> Vals; for (unsigned i = 0; i < NumElems; ++i) - Vals.push_back(DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, ElemVT, CastInOp, - DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())))); + Vals.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ElemVT, + CastInOp, DAG.getVectorIdxConstant(i, dl))); // Build Lo, Hi pair by pairing extracted elements if needed. unsigned Slot = 0; @@ -154,9 +156,13 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { // Create the stack frame object. Make sure it is aligned for both // the source and expanded destination types. - unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment( - NOutVT.getTypeForEVT(*DAG.getContext())); - SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment); + + // In cases where the vector is illegal it will be broken down into parts + // and stored in parts - we should use the alignment for the smallest part. + Align InAlign = DAG.getReducedAlign(InVT, /*UseABI=*/false); + Align NOutAlign = DAG.getReducedAlign(NOutVT, /*UseABI=*/false); + Align Align = std::max(InAlign, NOutAlign); + SDValue StackPtr = DAG.CreateStackTemporary(InVT.getStoreSize(), Align); int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); @@ -165,7 +171,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, PtrInfo); // Load the first half from the stack slot. - Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo); + Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo, NOutAlign); // Increment the pointer to the other half. unsigned IncrementSize = NOutVT.getSizeInBits() / 8; @@ -173,8 +179,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { // Load the second half from the stack slot. Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, - PtrInfo.getWithOffset(IncrementSize), - MinAlign(Alignment, IncrementSize)); + PtrInfo.getWithOffset(IncrementSize), NOutAlign); // Handle endianness of the load. if (TLI.hasBigEndianPartOrdering(OutVT, DAG.getDataLayout())) @@ -251,21 +256,20 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ValueVT); SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); - unsigned Alignment = LD->getAlignment(); AAMDNodes AAInfo = LD->getAAInfo(); assert(NVT.isByteSized() && "Expanded type not byte sized!"); - Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(), Alignment, - LD->getMemOperand()->getFlags(), AAInfo); + Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(), + LD->getOriginalAlign(), LD->getMemOperand()->getFlags(), + AAInfo); // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits() / 8; Ptr = DAG.getMemBasePlusOffset(Ptr, IncrementSize, dl); - Hi = DAG.getLoad(NVT, dl, Chain, Ptr, - LD->getPointerInfo().getWithOffset(IncrementSize), - MinAlign(Alignment, IncrementSize), - LD->getMemOperand()->getFlags(), AAInfo); + Hi = DAG.getLoad( + NVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), + LD->getOriginalAlign(), LD->getMemOperand()->getFlags(), AAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -462,7 +466,6 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ValueVT); SDValue Chain = St->getChain(); SDValue Ptr = St->getBasePtr(); - unsigned Alignment = St->getAlignment(); AAMDNodes AAInfo = St->getAAInfo(); assert(NVT.isByteSized() && "Expanded type not byte sized!"); @@ -474,14 +477,14 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout())) std::swap(Lo, Hi); - Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(), Alignment, - St->getMemOperand()->getFlags(), AAInfo); + Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(), + St->getOriginalAlign(), St->getMemOperand()->getFlags(), + AAInfo); Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); - Hi = DAG.getStore(Chain, dl, Hi, Ptr, - St->getPointerInfo().getWithOffset(IncrementSize), - MinAlign(Alignment, IncrementSize), - St->getMemOperand()->getFlags(), AAInfo); + Hi = DAG.getStore( + Chain, dl, Hi, Ptr, St->getPointerInfo().getWithOffset(IncrementSize), + St->getOriginalAlign(), St->getMemOperand()->getFlags(), AAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -558,3 +561,12 @@ void DAGTypeLegalizer::SplitRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi) { Lo = DAG.getUNDEF(LoVT); Hi = DAG.getUNDEF(HiVT); } + +void DAGTypeLegalizer::SplitRes_FREEZE(SDNode *N, SDValue &Lo, SDValue &Hi) { + SDValue L, H; + SDLoc dl(N); + GetSplitOp(N->getOperand(0), L, H); + + Lo = DAG.getNode(ISD::FREEZE, dl, L.getValueType(), L); + Hi = DAG.getNode(ISD::FREEZE, dl, H.getValueType(), H); +} diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 7d0b1ee6ae073..6409f924920dd 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -142,9 +142,10 @@ class VectorLegalizer { void ExpandUADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results); void ExpandSADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results); void ExpandMULO(SDNode *Node, SmallVectorImpl<SDValue> &Results); - SDValue ExpandFixedPointDiv(SDNode *Node); + void ExpandFixedPointDiv(SDNode *Node, SmallVectorImpl<SDValue> &Results); SDValue ExpandStrictFPOp(SDNode *Node); void ExpandStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results); + void ExpandREM(SDNode *Node, SmallVectorImpl<SDValue> &Results); void UnrollStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results); @@ -182,9 +183,7 @@ bool VectorLegalizer::Run() { E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) { // Check if the values of the nodes contain vectors. We don't need to check // the operands because we are going to check their values at some point. - for (SDNode::value_iterator J = I->value_begin(), E = I->value_end(); - J != E; ++J) - HasVectors |= J->isVector(); + HasVectors = llvm::any_of(I->values(), [](EVT T) { return T.isVector(); }); // If we found a vector node we can start the legalization. if (HasVectors) @@ -318,12 +317,10 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { } } - bool HasVectorValueOrOp = false; - for (auto J = Node->value_begin(), E = Node->value_end(); J != E; ++J) - HasVectorValueOrOp |= J->isVector(); - for (const SDValue &Oper : Node->op_values()) - HasVectorValueOrOp |= Oper.getValueType().isVector(); - + bool HasVectorValueOrOp = + llvm::any_of(Node->values(), [](EVT T) { return T.isVector(); }) || + llvm::any_of(Node->op_values(), + [](SDValue O) { return O.getValueType().isVector(); }); if (!HasVectorValueOrOp) return TranslateLegalizeResults(Op, Node); @@ -339,7 +336,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { if (Action == TargetLowering::Legal) Action = TargetLowering::Expand; break; -#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ +#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ case ISD::STRICT_##DAGN: #include "llvm/IR/ConstrainedOps.def" ValVT = Node->getValueType(0); @@ -431,6 +428,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FRINT: case ISD::FNEARBYINT: case ISD::FROUND: + case ISD::FROUNDEVEN: case ISD::FFLOOR: case ISD::FP_ROUND: case ISD::FP_EXTEND: @@ -463,7 +461,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::UMULFIX: case ISD::UMULFIXSAT: case ISD::SDIVFIX: - case ISD::UDIVFIX: { + case ISD::SDIVFIXSAT: + case ISD::UDIVFIX: + case ISD::UDIVFIXSAT: { unsigned Scale = Node->getConstantOperandVal(2); Action = TLI.getFixedPointOperationAction(Node->getOpcode(), Node->getValueType(0), Scale); @@ -704,132 +704,7 @@ void VectorLegalizer::PromoteFP_TO_INT(SDNode *Node, std::pair<SDValue, SDValue> VectorLegalizer::ExpandLoad(SDNode *N) { LoadSDNode *LD = cast<LoadSDNode>(N); - - EVT SrcVT = LD->getMemoryVT(); - EVT SrcEltVT = SrcVT.getScalarType(); - unsigned NumElem = SrcVT.getVectorNumElements(); - - SDValue NewChain; - SDValue Value; - if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) { - SDLoc dl(N); - - SmallVector<SDValue, 8> Vals; - SmallVector<SDValue, 8> LoadChains; - - EVT DstEltVT = LD->getValueType(0).getScalarType(); - SDValue Chain = LD->getChain(); - SDValue BasePTR = LD->getBasePtr(); - ISD::LoadExtType ExtType = LD->getExtensionType(); - - // When elements in a vector is not byte-addressable, we cannot directly - // load each element by advancing pointer, which could only address bytes. - // Instead, we load all significant words, mask bits off, and concatenate - // them to form each element. Finally, they are extended to destination - // scalar type to build the destination vector. - EVT WideVT = TLI.getPointerTy(DAG.getDataLayout()); - - assert(WideVT.isRound() && - "Could not handle the sophisticated case when the widest integer is" - " not power of 2."); - assert(WideVT.bitsGE(SrcEltVT) && - "Type is not legalized?"); - - unsigned WideBytes = WideVT.getStoreSize(); - unsigned Offset = 0; - unsigned RemainingBytes = SrcVT.getStoreSize(); - SmallVector<SDValue, 8> LoadVals; - while (RemainingBytes > 0) { - SDValue ScalarLoad; - unsigned LoadBytes = WideBytes; - - if (RemainingBytes >= LoadBytes) { - ScalarLoad = - DAG.getLoad(WideVT, dl, Chain, BasePTR, - LD->getPointerInfo().getWithOffset(Offset), - MinAlign(LD->getAlignment(), Offset), - LD->getMemOperand()->getFlags(), LD->getAAInfo()); - } else { - EVT LoadVT = WideVT; - while (RemainingBytes < LoadBytes) { - LoadBytes >>= 1; // Reduce the load size by half. - LoadVT = EVT::getIntegerVT(*DAG.getContext(), LoadBytes << 3); - } - ScalarLoad = - DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR, - LD->getPointerInfo().getWithOffset(Offset), LoadVT, - MinAlign(LD->getAlignment(), Offset), - LD->getMemOperand()->getFlags(), LD->getAAInfo()); - } - - RemainingBytes -= LoadBytes; - Offset += LoadBytes; - - BasePTR = DAG.getObjectPtrOffset(dl, BasePTR, LoadBytes); - - LoadVals.push_back(ScalarLoad.getValue(0)); - LoadChains.push_back(ScalarLoad.getValue(1)); - } - - unsigned BitOffset = 0; - unsigned WideIdx = 0; - unsigned WideBits = WideVT.getSizeInBits(); - - // Extract bits, pack and extend/trunc them into destination type. - unsigned SrcEltBits = SrcEltVT.getSizeInBits(); - SDValue SrcEltBitMask = DAG.getConstant( - APInt::getLowBitsSet(WideBits, SrcEltBits), dl, WideVT); - - for (unsigned Idx = 0; Idx != NumElem; ++Idx) { - assert(BitOffset < WideBits && "Unexpected offset!"); - - SDValue ShAmt = DAG.getConstant( - BitOffset, dl, TLI.getShiftAmountTy(WideVT, DAG.getDataLayout())); - SDValue Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt); - - BitOffset += SrcEltBits; - if (BitOffset >= WideBits) { - WideIdx++; - BitOffset -= WideBits; - if (BitOffset > 0) { - ShAmt = DAG.getConstant( - SrcEltBits - BitOffset, dl, - TLI.getShiftAmountTy(WideVT, DAG.getDataLayout())); - SDValue Hi = - DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt); - Lo = DAG.getNode(ISD::OR, dl, WideVT, Lo, Hi); - } - } - - Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask); - - switch (ExtType) { - default: llvm_unreachable("Unknown extended-load op!"); - case ISD::EXTLOAD: - Lo = DAG.getAnyExtOrTrunc(Lo, dl, DstEltVT); - break; - case ISD::ZEXTLOAD: - Lo = DAG.getZExtOrTrunc(Lo, dl, DstEltVT); - break; - case ISD::SEXTLOAD: - ShAmt = - DAG.getConstant(WideBits - SrcEltBits, dl, - TLI.getShiftAmountTy(WideVT, DAG.getDataLayout())); - Lo = DAG.getNode(ISD::SHL, dl, WideVT, Lo, ShAmt); - Lo = DAG.getNode(ISD::SRA, dl, WideVT, Lo, ShAmt); - Lo = DAG.getSExtOrTrunc(Lo, dl, DstEltVT); - break; - } - Vals.push_back(Lo); - } - - NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); - Value = DAG.getBuildVector(N->getValueType(0), dl, Vals); - } else { - std::tie(Value, NewChain) = TLI.scalarizeVectorLoad(LD, DAG); - } - - return std::make_pair(Value, NewChain); + return TLI.scalarizeVectorLoad(LD, DAG); } SDValue VectorLegalizer::ExpandStore(SDNode *N) { @@ -968,9 +843,12 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { break; case ISD::SDIVFIX: case ISD::UDIVFIX: - Results.push_back(ExpandFixedPointDiv(Node)); + ExpandFixedPointDiv(Node, Results); return; -#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ + case ISD::SDIVFIXSAT: + case ISD::UDIVFIXSAT: + break; +#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ case ISD::STRICT_##DAGN: #include "llvm/IR/ConstrainedOps.def" ExpandStrictFPOp(Node, Results); @@ -990,6 +868,10 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { case ISD::VECREDUCE_FMIN: Results.push_back(TLI.expandVecReduce(Node, DAG)); return; + case ISD::SREM: + case ISD::UREM: + ExpandREM(Node, Results); + return; } Results.push_back(DAG.UnrollVectorOp(Node)); @@ -1087,9 +969,8 @@ SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node) { NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits(); SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), NumSrcElements); - Src = DAG.getNode( - ISD::INSERT_SUBVECTOR, DL, SrcVT, DAG.getUNDEF(SrcVT), Src, - DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SrcVT, DAG.getUNDEF(SrcVT), + Src, DAG.getVectorIdxConstant(0, DL)); } // Build a base mask of undef shuffles. @@ -1147,9 +1028,8 @@ SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node) { NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits(); SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), NumSrcElements); - Src = DAG.getNode( - ISD::INSERT_SUBVECTOR, DL, SrcVT, DAG.getUNDEF(SrcVT), Src, - DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SrcVT, DAG.getUNDEF(SrcVT), + Src, DAG.getVectorIdxConstant(0, DL)); } // Build up a zero vector to blend into this one. @@ -1456,12 +1336,12 @@ void VectorLegalizer::ExpandMULO(SDNode *Node, Results.push_back(Overflow); } -SDValue VectorLegalizer::ExpandFixedPointDiv(SDNode *Node) { +void VectorLegalizer::ExpandFixedPointDiv(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { SDNode *N = Node; if (SDValue Expanded = TLI.expandFixedPointDiv(N->getOpcode(), SDLoc(N), N->getOperand(0), N->getOperand(1), N->getConstantOperandVal(2), DAG)) - return Expanded; - return DAG.UnrollVectorOp(N); + Results.push_back(Expanded); } void VectorLegalizer::ExpandStrictFPOp(SDNode *Node, @@ -1478,6 +1358,17 @@ void VectorLegalizer::ExpandStrictFPOp(SDNode *Node, UnrollStrictFPOp(Node, Results); } +void VectorLegalizer::ExpandREM(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { + assert((Node->getOpcode() == ISD::SREM || Node->getOpcode() == ISD::UREM) && + "Expected REM node"); + + SDValue Result; + if (!TLI.expandREM(Node, Result, DAG)) + Result = DAG.UnrollVectorOp(Node); + Results.push_back(Result); +} + void VectorLegalizer::UnrollStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results) { EVT VT = Node->getValueType(0); @@ -1500,8 +1391,7 @@ void VectorLegalizer::UnrollStrictFPOp(SDNode *Node, SmallVector<SDValue, 32> OpChains; for (unsigned i = 0; i < NumElems; ++i) { SmallVector<SDValue, 4> Opers; - SDValue Idx = DAG.getConstant(i, dl, - TLI.getVectorIdxTy(DAG.getDataLayout())); + SDValue Idx = DAG.getVectorIdxConstant(i, dl); // The Chain is the first operand. Opers.push_back(Chain); @@ -1551,12 +1441,10 @@ SDValue VectorLegalizer::UnrollVSETCC(SDNode *Node) { SDLoc dl(Node); SmallVector<SDValue, 8> Ops(NumElems); for (unsigned i = 0; i < NumElems; ++i) { - SDValue LHSElem = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS, - DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - SDValue RHSElem = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS, - DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS, + DAG.getVectorIdxConstant(i, dl)); + SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS, + DAG.getVectorIdxConstant(i, dl)); Ops[i] = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), TmpEltVT), diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 974914d00d052..414ba25ffd5ff 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -20,10 +20,11 @@ //===----------------------------------------------------------------------===// #include "LegalizeTypes.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/IR/DataLayout.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/TypeSize.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; #define DEBUG_TYPE "legalize-types" @@ -88,11 +89,13 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::FLOG2: case ISD::FNEARBYINT: case ISD::FNEG: + case ISD::FREEZE: case ISD::FP_EXTEND: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::FRINT: case ISD::FROUND: + case ISD::FROUNDEVEN: case ISD::FSIN: case ISD::FSQRT: case ISD::FTRUNC: @@ -147,7 +150,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { R = ScalarizeVecRes_TernaryOp(N); break; -#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ +#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ case ISD::STRICT_##DAGN: #include "llvm/IR/ConstrainedOps.def" R = ScalarizeVecRes_StrictFPOp(N); @@ -166,7 +169,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::UMULFIX: case ISD::UMULFIXSAT: case ISD::SDIVFIX: + case ISD::SDIVFIXSAT: case ISD::UDIVFIX: + case ISD::UDIVFIXSAT: R = ScalarizeVecRes_FIX(N); break; } @@ -187,8 +192,8 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) { SDValue Op0 = GetScalarizedVector(N->getOperand(0)); SDValue Op1 = GetScalarizedVector(N->getOperand(1)); SDValue Op2 = GetScalarizedVector(N->getOperand(2)); - return DAG.getNode(N->getOpcode(), SDLoc(N), - Op0.getValueType(), Op0, Op1, Op2); + return DAG.getNode(N->getOpcode(), SDLoc(N), Op0.getValueType(), Op0, Op1, + Op2, N->getFlags()); } SDValue DAGTypeLegalizer::ScalarizeVecRes_FIX(SDNode *N) { @@ -196,7 +201,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_FIX(SDNode *N) { SDValue Op1 = GetScalarizedVector(N->getOperand(1)); SDValue Op2 = N->getOperand(2); return DAG.getNode(N->getOpcode(), SDLoc(N), Op0.getValueType(), Op0, Op1, - Op2); + Op2, N->getFlags()); } SDValue DAGTypeLegalizer::ScalarizeVecRes_StrictFPOp(SDNode *N) { @@ -221,7 +226,8 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_StrictFPOp(SDNode *N) { Opers[i] = Oper; } - SDValue Result = DAG.getNode(N->getOpcode(), dl, ValueVTs, Opers); + SDValue Result = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(ValueVTs), + Opers, N->getFlags()); // Legalize the chain result - switch anything that used the old chain to // use the new one. @@ -251,6 +257,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_OverflowOp(SDNode *N, ResVT.getVectorElementType(), OvVT.getVectorElementType()); SDNode *ScalarNode = DAG.getNode( N->getOpcode(), DL, ScalarVTs, ScalarLHS, ScalarRHS).getNode(); + ScalarNode->setFlags(N->getFlags()); // Replace the other vector result not being explicitly scalarized here. unsigned OtherNo = 1 - ResNo; @@ -331,8 +338,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { N->getValueType(0).getVectorElementType(), SDLoc(N), N->getChain(), N->getBasePtr(), DAG.getUNDEF(N->getBasePtr().getValueType()), N->getPointerInfo(), N->getMemoryVT().getVectorElementType(), - N->getOriginalAlignment(), N->getMemOperand()->getFlags(), - N->getAAInfo()); + N->getOriginalAlign(), N->getMemOperand()->getFlags(), N->getAAInfo()); // Legalize the chain result - switch anything that used the old chain to // use the new one. @@ -357,11 +363,10 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) { Op = GetScalarizedVector(Op); } else { EVT VT = OpVT.getVectorElementType(); - Op = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, DL, VT, Op, - DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op, + DAG.getVectorIdxConstant(0, DL)); } - return DAG.getNode(N->getOpcode(), SDLoc(N), DestVT, Op); + return DAG.getNode(N->getOpcode(), SDLoc(N), DestVT, Op, N->getFlags()); } SDValue DAGTypeLegalizer::ScalarizeVecRes_InregOp(SDNode *N) { @@ -383,9 +388,8 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VecInregOp(SDNode *N) { if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) { Op = GetScalarizedVector(Op); } else { - Op = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, DL, OpEltVT, Op, - DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpEltVT, Op, + DAG.getVectorIdxConstant(0, DL)); } switch (N->getOpcode()) { @@ -421,9 +425,8 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) { Cond = GetScalarizedVector(Cond); } else { EVT VT = OpVT.getVectorElementType(); - Cond = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, DL, VT, Cond, - DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + Cond = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Cond, + DAG.getVectorIdxConstant(0, DL)); } SDValue LHS = GetScalarizedVector(N->getOperand(1)); @@ -523,12 +526,10 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) { RHS = GetScalarizedVector(RHS); } else { EVT VT = OpVT.getVectorElementType(); - LHS = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, DL, VT, LHS, - DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); - RHS = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, DL, VT, RHS, - DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + LHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, LHS, + DAG.getVectorIdxConstant(0, DL)); + RHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, RHS, + DAG.getVectorIdxConstant(0, DL)); } // Turn it into a scalar SETCC. @@ -749,12 +750,12 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ return DAG.getTruncStore( N->getChain(), dl, GetScalarizedVector(N->getOperand(1)), N->getBasePtr(), N->getPointerInfo(), - N->getMemoryVT().getVectorElementType(), N->getAlignment(), + N->getMemoryVT().getVectorElementType(), N->getOriginalAlign(), N->getMemOperand()->getFlags(), N->getAAInfo()); return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)), N->getBasePtr(), N->getPointerInfo(), - N->getOriginalAlignment(), N->getMemOperand()->getFlags(), + N->getOriginalAlign(), N->getMemOperand()->getFlags(), N->getAAInfo()); } @@ -881,12 +882,14 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FLOG2: case ISD::FNEARBYINT: case ISD::FNEG: + case ISD::FREEZE: case ISD::FP_EXTEND: case ISD::FP_ROUND: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::FRINT: case ISD::FROUND: + case ISD::FROUNDEVEN: case ISD::FSIN: case ISD::FSQRT: case ISD::FTRUNC: @@ -942,7 +945,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SplitVecRes_TernaryOp(N, Lo, Hi); break; -#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ +#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ case ISD::STRICT_##DAGN: #include "llvm/IR/ConstrainedOps.def" SplitVecRes_StrictFPOp(N, Lo, Hi); @@ -961,7 +964,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::UMULFIX: case ISD::UMULFIXSAT: case ISD::SDIVFIX: + case ISD::SDIVFIXSAT: case ISD::UDIVFIX: + case ISD::UDIVFIXSAT: SplitVecRes_FIX(N, Lo, Hi); break; } @@ -971,6 +976,25 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SetSplitVector(SDValue(N, ResNo), Lo, Hi); } +void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT, + MachinePointerInfo &MPI, + SDValue &Ptr) { + SDLoc DL(N); + unsigned IncrementSize = MemVT.getSizeInBits().getKnownMinSize() / 8; + + if (MemVT.isScalableVector()) { + SDValue BytesIncrement = DAG.getVScale( + DL, Ptr.getValueType(), + APInt(Ptr.getValueSizeInBits().getFixedSize(), IncrementSize)); + MPI = MachinePointerInfo(N->getPointerInfo().getAddrSpace()); + Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, BytesIncrement); + } else { + MPI = N->getPointerInfo().getWithOffset(IncrementSize); + // Increment the pointer to the other half. + Ptr = DAG.getObjectPtrOffset(DL, Ptr, IncrementSize); + } +} + void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue LHSLo, LHSHi; @@ -995,10 +1019,10 @@ void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, GetSplitVector(N->getOperand(2), Op2Lo, Op2Hi); SDLoc dl(N); - Lo = DAG.getNode(N->getOpcode(), dl, Op0Lo.getValueType(), - Op0Lo, Op1Lo, Op2Lo); - Hi = DAG.getNode(N->getOpcode(), dl, Op0Hi.getValueType(), - Op0Hi, Op1Hi, Op2Hi); + Lo = DAG.getNode(N->getOpcode(), dl, Op0Lo.getValueType(), Op0Lo, Op1Lo, + Op2Lo, N->getFlags()); + Hi = DAG.getNode(N->getOpcode(), dl, Op0Hi.getValueType(), Op0Hi, Op1Hi, + Op2Hi, N->getFlags()); } void DAGTypeLegalizer::SplitVecRes_FIX(SDNode *N, SDValue &Lo, SDValue &Hi) { @@ -1010,8 +1034,10 @@ void DAGTypeLegalizer::SplitVecRes_FIX(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Op2 = N->getOperand(2); unsigned Opcode = N->getOpcode(); - Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Op2); - Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Op2); + Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Op2, + N->getFlags()); + Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Op2, + N->getFlags()); } void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, @@ -1030,6 +1056,7 @@ void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, case TargetLowering::TypeLegal: case TargetLowering::TypePromoteInteger: case TargetLowering::TypePromoteFloat: + case TargetLowering::TypeSoftPromoteHalf: case TargetLowering::TypeSoftenFloat: case TargetLowering::TypeScalarizeVector: case TargetLowering::TypeWidenVector: @@ -1055,6 +1082,8 @@ void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi); return; + case TargetLowering::TypeScalarizeScalableVector: + report_fatal_error("Scalarization of scalable vectors is not supported."); } // In the general case, convert the input to an integer and split it by hand. @@ -1116,9 +1145,9 @@ void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx); uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); - Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec, - DAG.getConstant(IdxVal + LoVT.getVectorNumElements(), dl, - TLI.getVectorIdxTy(DAG.getDataLayout()))); + Hi = DAG.getNode( + ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec, + DAG.getVectorIdxConstant(IdxVal + LoVT.getVectorNumElements(), dl)); } void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, @@ -1137,40 +1166,45 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, // boundary between the halves, we can avoid spilling the vector, and insert // into the lower half of the split vector directly. // TODO: The IdxVal == 0 constraint is artificial, we could do this whenever - // the index is constant and there is no boundary crossing. But those cases - // don't seem to get hit in practice. - if (ConstantSDNode *ConstIdx = dyn_cast<ConstantSDNode>(Idx)) { - unsigned IdxVal = ConstIdx->getZExtValue(); - if ((IdxVal == 0) && (IdxVal + SubElems <= VecElems / 2)) { - EVT LoVT, HiVT; - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); - Lo = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, LoVT, Lo, SubVec, Idx); - return; - } + // there is no boundary crossing. But those cases don't seem to get hit in + // practice. + unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); + if ((IdxVal == 0) && (IdxVal + SubElems <= VecElems / 2)) { + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + Lo = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, LoVT, Lo, SubVec, Idx); + return; } // Spill the vector to the stack. - SDValue StackPtr = DAG.CreateStackTemporary(VecVT); - SDValue Store = - DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo()); + // In cases where the vector is illegal it will be broken down into parts + // and stored in parts - we should use the alignment for the smallest part. + Align SmallestAlign = DAG.getReducedAlign(VecVT, /*UseABI=*/false); + SDValue StackPtr = + DAG.CreateStackTemporary(VecVT.getStoreSize(), SmallestAlign); + auto &MF = DAG.getMachineFunction(); + auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); + auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex); + + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo, + SmallestAlign); // Store the new subvector into the specified index. SDValue SubVecPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); - Type *VecType = VecVT.getTypeForEVT(*DAG.getContext()); - unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType); - Store = DAG.getStore(Store, dl, SubVec, SubVecPtr, MachinePointerInfo()); + Store = DAG.getStore(Store, dl, SubVec, SubVecPtr, + MachinePointerInfo::getUnknownStack(MF)); // Load the Lo part from the stack slot. - Lo = - DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo()); + Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, PtrInfo, + SmallestAlign); // Increment the pointer to the other part. unsigned IncrementSize = Lo.getValueSizeInBits() / 8; StackPtr = DAG.getMemBasePlusOffset(StackPtr, IncrementSize, dl); // Load the Hi part from the stack slot. - Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), - MinAlign(Alignment, IncrementSize)); + Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, + PtrInfo.getWithOffset(IncrementSize), SmallestAlign); } void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, @@ -1291,8 +1325,10 @@ void DAGTypeLegalizer::SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo, EVT LoValueVTs[] = {LoVT, MVT::Other}; EVT HiValueVTs[] = {HiVT, MVT::Other}; - Lo = DAG.getNode(N->getOpcode(), dl, LoValueVTs, OpsLo); - Hi = DAG.getNode(N->getOpcode(), dl, HiValueVTs, OpsHi); + Lo = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(LoValueVTs), OpsLo, + N->getFlags()); + Hi = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(HiValueVTs), OpsHi, + N->getFlags()); // Build a factor node to remember that this Op is independent of the // other one. @@ -1332,10 +1368,8 @@ SDValue DAGTypeLegalizer::UnrollVectorOp_StrictFP(SDNode *N, unsigned ResNE) { EVT OperandVT = Operand.getValueType(); if (OperandVT.isVector()) { EVT OperandEltVT = OperandVT.getVectorElementType(); - Operands[j] = - DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, Operand, - DAG.getConstant(i, dl, TLI.getVectorIdxTy( - DAG.getDataLayout()))); + Operands[j] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, + Operand, DAG.getVectorIdxConstant(i, dl)); } else { Operands[j] = Operand; } @@ -1384,6 +1418,8 @@ void DAGTypeLegalizer::SplitVecRes_OverflowOp(SDNode *N, unsigned ResNo, SDVTList HiVTs = DAG.getVTList(HiResVT, HiOvVT); SDNode *LoNode = DAG.getNode(Opcode, dl, LoVTs, LoLHS, LoRHS).getNode(); SDNode *HiNode = DAG.getNode(Opcode, dl, HiVTs, HiLHS, HiRHS).getNode(); + LoNode->setFlags(N->getFlags()); + HiNode->setFlags(N->getFlags()); Lo = SDValue(LoNode, ResNo); Hi = SDValue(HiNode, ResNo); @@ -1417,10 +1453,8 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, Lo = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Lo.getValueType(), Lo, Elt, Idx); else - Hi = - DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt, - DAG.getConstant(IdxVal - LoNumElts, dl, - TLI.getVectorIdxTy(DAG.getDataLayout()))); + Hi = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt, + DAG.getVectorIdxConstant(IdxVal - LoNumElts, dl)); return; } @@ -1442,36 +1476,38 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, } // Spill the vector to the stack. - SDValue StackPtr = DAG.CreateStackTemporary(VecVT); + // In cases where the vector is illegal it will be broken down into parts + // and stored in parts - we should use the alignment for the smallest part. + Align SmallestAlign = DAG.getReducedAlign(VecVT, /*UseABI=*/false); + SDValue StackPtr = + DAG.CreateStackTemporary(VecVT.getStoreSize(), SmallestAlign); auto &MF = DAG.getMachineFunction(); auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex); - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo); + + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo, + SmallestAlign); // Store the new element. This may be larger than the vector element type, // so use a truncating store. SDValue EltPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); - Type *VecType = VecVT.getTypeForEVT(*DAG.getContext()); - unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType); - Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, - MachinePointerInfo::getUnknownStack(MF), EltVT); + Store = DAG.getTruncStore( + Store, dl, Elt, EltPtr, MachinePointerInfo::getUnknownStack(MF), EltVT, + commonAlignment(SmallestAlign, EltVT.getSizeInBits() / 8)); EVT LoVT, HiVT; std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT); // Load the Lo part from the stack slot. - Lo = DAG.getLoad(LoVT, dl, Store, StackPtr, PtrInfo); + Lo = DAG.getLoad(LoVT, dl, Store, StackPtr, PtrInfo, SmallestAlign); // Increment the pointer to the other part. unsigned IncrementSize = LoVT.getSizeInBits() / 8; - StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, - DAG.getConstant(IncrementSize, dl, - StackPtr.getValueType())); + StackPtr = DAG.getMemBasePlusOffset(StackPtr, IncrementSize, dl); // Load the Hi part from the stack slot. Hi = DAG.getLoad(HiVT, dl, Store, StackPtr, - PtrInfo.getWithOffset(IncrementSize), - MinAlign(Alignment, IncrementSize)); + PtrInfo.getWithOffset(IncrementSize), SmallestAlign); // If we adjusted the original type, we need to truncate the results. std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); @@ -1502,21 +1538,29 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue Ptr = LD->getBasePtr(); SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); EVT MemoryVT = LD->getMemoryVT(); - unsigned Alignment = LD->getOriginalAlignment(); MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); AAMDNodes AAInfo = LD->getAAInfo(); EVT LoMemVT, HiMemVT; std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + if (!LoMemVT.isByteSized() || !HiMemVT.isByteSized()) { + SDValue Value, NewChain; + std::tie(Value, NewChain) = TLI.scalarizeVectorLoad(LD, DAG); + std::tie(Lo, Hi) = DAG.SplitVector(Value, dl); + ReplaceValueWith(SDValue(LD, 1), NewChain); + return; + } + Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset, - LD->getPointerInfo(), LoMemVT, Alignment, MMOFlags, AAInfo); + LD->getPointerInfo(), LoMemVT, LD->getOriginalAlign(), + MMOFlags, AAInfo); + + MachinePointerInfo MPI; + IncrementPointer(LD, LoMemVT, MPI, Ptr); - unsigned IncrementSize = LoMemVT.getSizeInBits()/8; - Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); - Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, - LD->getPointerInfo().getWithOffset(IncrementSize), HiMemVT, - Alignment, MMOFlags, AAInfo); + Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, MPI, + HiMemVT, LD->getOriginalAlign(), MMOFlags, AAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -1541,7 +1585,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, assert(Offset.isUndef() && "Unexpected indexed masked load offset"); SDValue Mask = MLD->getMask(); SDValue PassThru = MLD->getPassThru(); - unsigned Alignment = MLD->getOriginalAlignment(); + Align Alignment = MLD->getOriginalAlign(); ISD::LoadExtType ExtType = MLD->getExtensionType(); // Split Mask operand @@ -1557,7 +1601,9 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, EVT MemoryVT = MLD->getMemoryVT(); EVT LoMemVT, HiMemVT; - std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + bool HiIsEmpty = false; + std::tie(LoMemVT, HiMemVT) = + DAG.GetDependentSplitDestVTs(MemoryVT, LoVT, &HiIsEmpty); SDValue PassThruLo, PassThruHi; if (getTypeAction(PassThru.getValueType()) == TargetLowering::TypeSplitVector) @@ -1565,27 +1611,33 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, else std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl); - MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MLD->getPointerInfo(), - MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), - Alignment, MLD->getAAInfo(), MLD->getRanges()); + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MLD->getPointerInfo(), MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), + Alignment, MLD->getAAInfo(), MLD->getRanges()); Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, Offset, MaskLo, PassThruLo, LoMemVT, MMO, MLD->getAddressingMode(), ExtType, MLD->isExpandingLoad()); - Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG, - MLD->isExpandingLoad()); - unsigned HiOffset = LoMemVT.getStoreSize(); - - MMO = DAG.getMachineFunction().getMachineMemOperand( - MLD->getPointerInfo().getWithOffset(HiOffset), MachineMemOperand::MOLoad, - HiMemVT.getStoreSize(), Alignment, MLD->getAAInfo(), - MLD->getRanges()); - - Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, Offset, MaskHi, PassThruHi, HiMemVT, - MMO, MLD->getAddressingMode(), ExtType, - MLD->isExpandingLoad()); + if (HiIsEmpty) { + // The hi masked load has zero storage size. We therefore simply set it to + // the low masked load and rely on subsequent removal from the chain. + Hi = Lo; + } else { + // Generate hi masked load. + Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG, + MLD->isExpandingLoad()); + unsigned HiOffset = LoMemVT.getStoreSize(); + + MMO = DAG.getMachineFunction().getMachineMemOperand( + MLD->getPointerInfo().getWithOffset(HiOffset), + MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), Alignment, + MLD->getAAInfo(), MLD->getRanges()); + + Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, Offset, MaskHi, PassThruHi, + HiMemVT, MMO, MLD->getAddressingMode(), ExtType, + MLD->isExpandingLoad()); + } // Build a factor node to remember that this load is independent of the // other one. @@ -1610,7 +1662,7 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, SDValue PassThru = MGT->getPassThru(); SDValue Index = MGT->getIndex(); SDValue Scale = MGT->getScale(); - unsigned Alignment = MGT->getOriginalAlignment(); + Align Alignment = MGT->getOriginalAlign(); // Split Mask operand SDValue MaskLo, MaskHi; @@ -1623,11 +1675,6 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); } - EVT MemoryVT = MGT->getMemoryVT(); - EVT LoMemVT, HiMemVT; - // Split MemoryVT - std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); - SDValue PassThruLo, PassThruHi; if (getTypeAction(PassThru.getValueType()) == TargetLowering::TypeSplitVector) GetSplitVector(PassThru, PassThruLo, PassThruHi); @@ -1640,10 +1687,10 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, else std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl); - MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MGT->getPointerInfo(), - MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), - Alignment, MGT->getAAInfo(), MGT->getRanges()); + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MGT->getPointerInfo(), MachineMemOperand::MOLoad, + MemoryLocation::UnknownSize, Alignment, MGT->getAAInfo(), + MGT->getRanges()); SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale}; Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, OpsLo, @@ -1708,11 +1755,13 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, OpNo); if (N->getOpcode() == ISD::FP_ROUND) { - Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1)); - Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1)); + Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1), + N->getFlags()); + Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1), + N->getFlags()); } else { - Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo); - Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi); + Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getFlags()); + Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getFlags()); } } @@ -1737,8 +1786,7 @@ void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, // more effectively move in the right direction and prevent falling down // to scalarization in many cases due to the input vector being split too // far. - unsigned NumElements = SrcVT.getVectorNumElements(); - if ((NumElements & 1) == 0 && + if ((SrcVT.getVectorMinNumElements() & 1) == 0 && SrcVT.getSizeInBits() * 2 < DestVT.getSizeInBits()) { LLVMContext &Ctx = *DAG.getContext(); EVT NewSrcVT = SrcVT.widenIntegerVectorElementType(Ctx); @@ -1851,9 +1899,9 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, Idx -= Input * NewElts; // Extract the vector element by hand. - SVOps.push_back(DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Inputs[Input], - DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())))); + SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, + Inputs[Input], + DAG.getVectorIdxConstant(Idx, dl))); } // Construct the Lo/Hi output using a BUILD_VECTOR. @@ -1882,11 +1930,11 @@ void DAGTypeLegalizer::SplitVecRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue SV = N->getOperand(2); SDLoc dl(N); - const unsigned Alignment = DAG.getDataLayout().getABITypeAlignment( - NVT.getTypeForEVT(*DAG.getContext())); + const Align Alignment = + DAG.getDataLayout().getABITypeAlign(NVT.getTypeForEVT(*DAG.getContext())); - Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, SV, Alignment); - Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, SV, Alignment); + Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, SV, Alignment.value()); + Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, SV, Alignment.value()); Chain = Hi.getValue(1); // Modified the chain - switch anything that used the old chain to use @@ -2160,8 +2208,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) { return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx); } else { return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Hi, - DAG.getConstant(IdxVal - LoElts, dl, - Idx.getValueType())); + DAG.getVectorIdxConstant(IdxVal - LoElts, dl)); } } @@ -2200,11 +2247,16 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { } // Store the vector to the stack. - SDValue StackPtr = DAG.CreateStackTemporary(VecVT); + // In cases where the vector is illegal it will be broken down into parts + // and stored in parts - we should use the alignment for the smallest part. + Align SmallestAlign = DAG.getReducedAlign(VecVT, /*UseABI=*/false); + SDValue StackPtr = + DAG.CreateStackTemporary(VecVT.getStoreSize(), SmallestAlign); auto &MF = DAG.getMachineFunction(); auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex); - SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo, + SmallestAlign); // Load back the required element. StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); @@ -2219,7 +2271,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { return DAG.getExtLoad( ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr, - MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), EltVT); + MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), EltVT, + commonAlignment(SmallestAlign, EltVT.getSizeInBits() / 8)); } SDValue DAGTypeLegalizer::SplitVecOp_ExtVecInRegOp(SDNode *N) { @@ -2244,7 +2297,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, SDValue Scale = MGT->getScale(); SDValue Mask = MGT->getMask(); SDValue PassThru = MGT->getPassThru(); - unsigned Alignment = MGT->getOriginalAlignment(); + Align Alignment = MGT->getOriginalAlign(); SDValue MaskLo, MaskHi; if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) @@ -2269,21 +2322,15 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, else std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl); - MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MGT->getPointerInfo(), - MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), - Alignment, MGT->getAAInfo(), MGT->getRanges()); + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MGT->getPointerInfo(), MachineMemOperand::MOLoad, + MemoryLocation::UnknownSize, Alignment, MGT->getAAInfo(), + MGT->getRanges()); SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale}; SDValue Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, OpsLo, MMO, MGT->getIndexType()); - MMO = DAG.getMachineFunction(). - getMachineMemOperand(MGT->getPointerInfo(), - MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), - Alignment, MGT->getAAInfo(), - MGT->getRanges()); - SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale}; SDValue Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, OpsHi, MMO, MGT->getIndexType()); @@ -2312,13 +2359,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, assert(Offset.isUndef() && "Unexpected indexed masked store offset"); SDValue Mask = N->getMask(); SDValue Data = N->getValue(); - EVT MemoryVT = N->getMemoryVT(); - unsigned Alignment = N->getOriginalAlignment(); + Align Alignment = N->getOriginalAlign(); SDLoc DL(N); - EVT LoMemVT, HiMemVT; - std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); - SDValue DataLo, DataHi; if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector) // Split Data operand @@ -2337,32 +2380,45 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL); } - SDValue Lo, Hi; - MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(N->getPointerInfo(), - MachineMemOperand::MOStore, LoMemVT.getStoreSize(), - Alignment, N->getAAInfo(), N->getRanges()); + EVT MemoryVT = N->getMemoryVT(); + EVT LoMemVT, HiMemVT; + bool HiIsEmpty = false; + std::tie(LoMemVT, HiMemVT) = + DAG.GetDependentSplitDestVTs(MemoryVT, DataLo.getValueType(), &HiIsEmpty); + + SDValue Lo, Hi, Res; + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + N->getPointerInfo(), MachineMemOperand::MOStore, LoMemVT.getStoreSize(), + Alignment, N->getAAInfo(), N->getRanges()); Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, Offset, MaskLo, LoMemVT, MMO, N->getAddressingMode(), N->isTruncatingStore(), N->isCompressingStore()); - Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, - N->isCompressingStore()); - unsigned HiOffset = LoMemVT.getStoreSize(); + if (HiIsEmpty) { + // The hi masked store has zero storage size. + // Only the lo masked store is needed. + Res = Lo; + } else { - MMO = DAG.getMachineFunction().getMachineMemOperand( - N->getPointerInfo().getWithOffset(HiOffset), MachineMemOperand::MOStore, - HiMemVT.getStoreSize(), Alignment, N->getAAInfo(), - N->getRanges()); + Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, + N->isCompressingStore()); + unsigned HiOffset = LoMemVT.getStoreSize(); - Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, Offset, MaskHi, HiMemVT, MMO, - N->getAddressingMode(), N->isTruncatingStore(), - N->isCompressingStore()); + MMO = DAG.getMachineFunction().getMachineMemOperand( + N->getPointerInfo().getWithOffset(HiOffset), MachineMemOperand::MOStore, + HiMemVT.getStoreSize(), Alignment, N->getAAInfo(), N->getRanges()); - // Build a factor node to remember that this store is independent of the - // other one. - return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); + Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, Offset, MaskHi, HiMemVT, MMO, + N->getAddressingMode(), N->isTruncatingStore(), + N->isCompressingStore()); + + // Build a factor node to remember that this store is independent of the + // other one. + Res = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); + } + + return Res; } SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, @@ -2373,13 +2429,10 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, SDValue Index = N->getIndex(); SDValue Scale = N->getScale(); SDValue Data = N->getValue(); - EVT MemoryVT = N->getMemoryVT(); - unsigned Alignment = N->getOriginalAlignment(); + Align Alignment = N->getOriginalAlign(); SDLoc DL(N); // Split all operands - EVT LoMemVT, HiMemVT; - std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); SDValue DataLo, DataHi; if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector) @@ -2406,20 +2459,14 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL); SDValue Lo; - MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(N->getPointerInfo(), - MachineMemOperand::MOStore, LoMemVT.getStoreSize(), - Alignment, N->getAAInfo(), N->getRanges()); + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + N->getPointerInfo(), MachineMemOperand::MOStore, + MemoryLocation::UnknownSize, Alignment, N->getAAInfo(), N->getRanges()); SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo, Scale}; Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(), DL, OpsLo, MMO, N->getIndexType()); - MMO = DAG.getMachineFunction(). - getMachineMemOperand(N->getPointerInfo(), - MachineMemOperand::MOStore, HiMemVT.getStoreSize(), - Alignment, N->getAAInfo(), N->getRanges()); - // The order of the Scatter operation after split is well defined. The "Hi" // part comes after the "Lo". So these two operations should be chained one // after another. @@ -2437,7 +2484,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { SDValue Ch = N->getChain(); SDValue Ptr = N->getBasePtr(); EVT MemoryVT = N->getMemoryVT(); - unsigned Alignment = N->getOriginalAlignment(); + Align Alignment = N->getOriginalAlign(); MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags(); AAMDNodes AAInfo = N->getAAInfo(); SDValue Lo, Hi; @@ -2450,8 +2497,6 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { if (!LoMemVT.isByteSized() || !HiMemVT.isByteSized()) return TLI.scalarizeVectorStore(N, DAG); - unsigned IncrementSize = LoMemVT.getSizeInBits()/8; - if (isTruncating) Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), LoMemVT, Alignment, MMOFlags, AAInfo); @@ -2459,17 +2504,14 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), Alignment, MMOFlags, AAInfo); - // Increment the pointer to the other half. - Ptr = DAG.getObjectPtrOffset(DL, Ptr, IncrementSize); + MachinePointerInfo MPI; + IncrementPointer(N, LoMemVT, MPI, Ptr); if (isTruncating) - Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, - N->getPointerInfo().getWithOffset(IncrementSize), + Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, MPI, HiMemVT, Alignment, MMOFlags, AAInfo); else - Hi = DAG.getStore(Ch, DL, Hi, Ptr, - N->getPointerInfo().getWithOffset(IncrementSize), - Alignment, MMOFlags, AAInfo); + Hi = DAG.getStore(Ch, DL, Hi, Ptr, MPI, Alignment, MMOFlags, AAInfo); return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); } @@ -2487,9 +2529,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) { for (const SDValue &Op : N->op_values()) { for (unsigned i = 0, e = Op.getValueType().getVectorNumElements(); i != e; ++i) { - Elts.push_back(DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op, - DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())))); + Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op, + DAG.getVectorIdxConstant(i, DL))); } } @@ -2565,9 +2606,9 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) { SDValue Chain; if (N->isStrictFPOpcode()) { HalfLo = DAG.getNode(N->getOpcode(), DL, {HalfVT, MVT::Other}, - {N->getOperand(0), HalfLo}); + {N->getOperand(0), InLoVec}); HalfHi = DAG.getNode(N->getOpcode(), DL, {HalfVT, MVT::Other}, - {N->getOperand(0), HalfHi}); + {N->getOperand(0), InHiVec}); // Legalize the chain result - switch anything that used the old chain to // use the new one. Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, HalfLo.getValue(1), @@ -2611,9 +2652,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) { SDLoc DL(N); GetSplitVector(N->getOperand(0), Lo0, Hi0); GetSplitVector(N->getOperand(1), Lo1, Hi1); - unsigned PartElements = Lo0.getValueType().getVectorNumElements(); - EVT PartResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, PartElements); - EVT WideResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, 2*PartElements); + auto PartEltCnt = Lo0.getValueType().getVectorElementCount(); + + LLVMContext &Context = *DAG.getContext(); + EVT PartResVT = EVT::getVectorVT(Context, MVT::i1, PartEltCnt); + EVT WideResVT = EVT::getVectorVT(Context, MVT::i1, PartEltCnt*2); LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2)); HiRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Hi0, Hi1, N->getOperand(2)); @@ -2753,7 +2796,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_BinaryWithExtraScalarOp(N); break; -#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ +#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ case ISD::STRICT_##DAGN: #include "llvm/IR/ConstrainedOps.def" Res = WidenVecRes_StrictFP(N); @@ -2813,6 +2856,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::FNEARBYINT: case ISD::FRINT: case ISD::FROUND: + case ISD::FROUNDEVEN: case ISD::FSIN: case ISD::FSQRT: case ISD::FTRUNC: { @@ -2842,6 +2886,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: case ISD::FNEG: + case ISD::FREEZE: case ISD::FCANONICALIZE: Res = WidenVecRes_Unary(N); break; @@ -2924,9 +2969,8 @@ static SDValue CollectOpsToWiden(SelectionDAG &DAG, const TargetLowering &TLI, SDValue VecOp = DAG.getUNDEF(NextVT); unsigned NumToInsert = ConcatEnd - Idx - 1; for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) { - VecOp = DAG.getNode( - ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp, ConcatOps[OpIdx], - DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp, + ConcatOps[OpIdx], DAG.getVectorIdxConstant(i, dl)); } ConcatOps[Idx+1] = VecOp; ConcatEnd = Idx + 2; @@ -3008,12 +3052,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { // } while (CurNumElts != 0) { while (CurNumElts >= NumElts) { - SDValue EOp1 = DAG.getNode( - ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1, - DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - SDValue EOp2 = DAG.getNode( - ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2, - DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1, + DAG.getVectorIdxConstant(Idx, dl)); + SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2, + DAG.getVectorIdxConstant(Idx, dl)); ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2, Flags); Idx += NumElts; CurNumElts -= NumElts; @@ -3025,12 +3067,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { if (NumElts == 1) { for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) { - SDValue EOp1 = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp1, - DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - SDValue EOp2 = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp2, - DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, + InOp1, DAG.getVectorIdxConstant(Idx, dl)); + SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, + InOp2, DAG.getVectorIdxConstant(Idx, dl)); ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT, EOp1, EOp2, Flags); } @@ -3108,14 +3148,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) { while (CurNumElts != 0) { while (CurNumElts >= NumElts) { SmallVector<SDValue, 4> EOps; - + for (unsigned i = 0; i < NumOpers; ++i) { SDValue Op = InOps[i]; - - if (Op.getValueType().isVector()) - Op = DAG.getNode( - ISD::EXTRACT_SUBVECTOR, dl, VT, Op, - DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + + if (Op.getValueType().isVector()) + Op = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Op, + DAG.getVectorIdxConstant(Idx, dl)); EOps.push_back(Op); } @@ -3140,10 +3179,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) { SDValue Op = InOps[i]; if (Op.getValueType().isVector()) - Op = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, Op, - DAG.getConstant(Idx, dl, - TLI.getVectorIdxTy(DAG.getDataLayout()))); + Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, Op, + DAG.getVectorIdxConstant(Idx, dl)); EOps.push_back(Op); } @@ -3190,8 +3227,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_OverflowOp(SDNode *N, unsigned ResNo) { *DAG.getContext(), ResVT.getVectorElementType(), WideOvVT.getVectorNumElements()); - SDValue Zero = DAG.getConstant( - 0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())); + SDValue Zero = DAG.getVectorIdxConstant(0, DL); WideLHS = DAG.getNode( ISD::INSERT_SUBVECTOR, DL, WideResVT, DAG.getUNDEF(WideResVT), N->getOperand(0), Zero); @@ -3210,8 +3246,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_OverflowOp(SDNode *N, unsigned ResNo) { if (getTypeAction(OtherVT) == TargetLowering::TypeWidenVector) { SetWidenedVector(SDValue(N, OtherNo), SDValue(WideNode, OtherNo)); } else { - SDValue Zero = DAG.getConstant( - 0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())); + SDValue Zero = DAG.getVectorIdxConstant(0, DL); SDValue OtherVal = DAG.getNode( ISD::EXTRACT_SUBVECTOR, DL, OtherVT, SDValue(WideNode, OtherNo), Zero); ReplaceValueWith(SDValue(N, OtherNo), OtherVal); @@ -3274,9 +3309,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { } if (InVTNumElts % WidenNumElts == 0) { - SDValue InVal = DAG.getNode( - ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, InOp, - DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, InOp, + DAG.getVectorIdxConstant(0, DL)); // Extract the input and convert the shorten input vector. if (N->getNumOperands() == 1) return DAG.getNode(Opcode, DL, WidenVT, InVal); @@ -3291,9 +3325,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { // necessary. unsigned MinElts = N->getValueType(0).getVectorNumElements(); for (unsigned i=0; i < MinElts; ++i) { - SDValue Val = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp, - DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp, + DAG.getVectorIdxConstant(i, DL)); if (N->getNumOperands() == 1) Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val); else @@ -3310,7 +3343,6 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert_StrictFP(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned WidenNumElts = WidenVT.getVectorNumElements(); - SmallVector<EVT, 2> WidenVTs = { WidenVT, MVT::Other }; EVT InVT = InOp.getValueType(); EVT InEltVT = InVT.getVectorElementType(); @@ -3321,16 +3353,15 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert_StrictFP(SDNode *N) { // Otherwise unroll into some nasty scalar code and rebuild the vector. EVT EltVT = WidenVT.getVectorElementType(); - SmallVector<EVT, 2> EltVTs = { EltVT, MVT::Other }; + std::array<EVT, 2> EltVTs = {{EltVT, MVT::Other}}; SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT)); SmallVector<SDValue, 32> OpChains; // Use the original element count so we don't do more scalar opts than // necessary. unsigned MinElts = N->getValueType(0).getVectorNumElements(); for (unsigned i=0; i < MinElts; ++i) { - NewOps[1] = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp, - DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + NewOps[1] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp, + DAG.getVectorIdxConstant(i, DL)); Ops[i] = DAG.getNode(Opcode, DL, EltVTs, NewOps); OpChains.push_back(Ops[i].getValue(1)); } @@ -3370,7 +3401,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTEND_VECTOR_INREG(SDNode *N) { SmallVector<SDValue, 16> Ops; for (unsigned i = 0, e = std::min(InVTNumElts, WidenNumElts); i != e; ++i) { SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InSVT, InOp, - DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + DAG.getVectorIdxConstant(i, DL)); switch (Opcode) { case ISD::ANY_EXTEND_VECTOR_INREG: Val = DAG.getNode(ISD::ANY_EXTEND, DL, WidenSVT, Val); @@ -3463,6 +3494,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { switch (getTypeAction(InVT)) { case TargetLowering::TypeLegal: break; + case TargetLowering::TypeScalarizeScalableVector: + report_fatal_error("Scalarization of scalable vectors is not supported."); case TargetLowering::TypePromoteInteger: { // If the incoming type is a vector that is being promoted, then // we know that the elements are arranged differently and that we @@ -3492,6 +3525,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { } case TargetLowering::TypeSoftenFloat: case TargetLowering::TypePromoteFloat: + case TargetLowering::TypeSoftPromoteHalf: case TargetLowering::TypeExpandInteger: case TargetLowering::TypeExpandFloat: case TargetLowering::TypeScalarizeVector: @@ -3626,10 +3660,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { SDValue InOp = N->getOperand(i); if (InputWidened) InOp = GetWidenedVector(InOp); - for (unsigned j=0; j < NumInElts; ++j) - Ops[Idx++] = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, - DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + for (unsigned j = 0; j < NumInElts; ++j) + Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getVectorIdxConstant(j, dl)); } SDValue UndefVal = DAG.getUNDEF(EltVT); for (; Idx < WidenNumElts; ++Idx) @@ -3666,11 +3699,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { EVT EltVT = VT.getVectorElementType(); unsigned NumElts = VT.getVectorNumElements(); unsigned i; - for (i=0; i < NumElts; ++i) - Ops[i] = - DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, - DAG.getConstant(IdxVal + i, dl, - TLI.getVectorIdxTy(DAG.getDataLayout()))); + for (i = 0; i < NumElts; ++i) + Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getVectorIdxConstant(IdxVal + i, dl)); SDValue UndefVal = DAG.getUNDEF(EltVT); for (; i < WidenNumElts; ++i) @@ -3689,6 +3720,20 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) { LoadSDNode *LD = cast<LoadSDNode>(N); ISD::LoadExtType ExtType = LD->getExtensionType(); + // A vector must always be stored in memory as-is, i.e. without any padding + // between the elements, since various code depend on it, e.g. in the + // handling of a bitcast of a vector type to int, which may be done with a + // vector store followed by an integer load. A vector that does not have + // elements that are byte-sized must therefore be stored as an integer + // built out of the extracted vector elements. + if (!LD->getMemoryVT().isByteSized()) { + SDValue Value, NewChain; + std::tie(Value, NewChain) = TLI.scalarizeVectorLoad(LD, DAG); + ReplaceValueWith(SDValue(LD, 0), Value); + ReplaceValueWith(SDValue(LD, 1), NewChain); + return SDValue(); + } + SDValue Result; SmallVector<SDValue, 16> LdChain; // Chain for the series of load if (ExtType != ISD::NON_EXTLOAD) @@ -3877,8 +3922,7 @@ SDValue DAGTypeLegalizer::convertMask(SDValue InMask, EVT MaskVT, // Adjust Mask to the right number of elements. unsigned CurrMaskNumEls = Mask->getValueType(0).getVectorNumElements(); if (CurrMaskNumEls > ToMaskVT.getVectorNumElements()) { - MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); - SDValue ZeroIdx = DAG.getConstant(0, SDLoc(Mask), IdxTy); + SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(Mask)); Mask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Mask), ToMaskVT, Mask, ZeroIdx); } else if (CurrMaskNumEls < ToMaskVT.getVectorNumElements()) { @@ -4144,12 +4188,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_STRICT_FSETCC(SDNode *N) { SmallVector<SDValue, 8> Scalars(WidenNumElts, DAG.getUNDEF(EltVT)); SmallVector<SDValue, 8> Chains(NumElts); for (unsigned i = 0; i != NumElts; ++i) { - SDValue LHSElem = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS, - DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - SDValue RHSElem = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS, - DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS, + DAG.getVectorIdxConstant(i, dl)); + SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS, + DAG.getVectorIdxConstant(i, dl)); Scalars[i] = DAG.getNode(N->getOpcode(), dl, {MVT::i1, MVT::Other}, {Chain, LHSElem, RHSElem, CC}); @@ -4288,13 +4330,12 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) { assert(FixedVT.getVectorNumElements() != InVT.getVectorNumElements() && "We can't have the same type as we started with!"); if (FixedVT.getVectorNumElements() > InVT.getVectorNumElements()) - InOp = DAG.getNode( - ISD::INSERT_SUBVECTOR, DL, FixedVT, DAG.getUNDEF(FixedVT), InOp, - DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + InOp = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, FixedVT, + DAG.getUNDEF(FixedVT), InOp, + DAG.getVectorIdxConstant(0, DL)); else - InOp = DAG.getNode( - ISD::EXTRACT_SUBVECTOR, DL, FixedVT, InOp, - DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, FixedVT, InOp, + DAG.getVectorIdxConstant(0, DL)); break; } } @@ -4363,9 +4404,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { else Res = DAG.getNode(Opcode, dl, WideVT, InOp); } - return DAG.getNode( - ISD::EXTRACT_SUBVECTOR, dl, VT, Res, - DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res, + DAG.getVectorIdxConstant(0, dl)); } EVT InEltVT = InVT.getVectorElementType(); @@ -4376,9 +4416,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end()); SmallVector<SDValue, 32> OpChains; for (unsigned i=0; i < NumElts; ++i) { - NewOps[1] = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, - DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + NewOps[1] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, + DAG.getVectorIdxConstant(i, dl)); Ops[i] = DAG.getNode(Opcode, dl, { EltVT, MVT::Other }, NewOps); OpChains.push_back(Ops[i].getValue(1)); } @@ -4386,11 +4425,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { ReplaceValueWith(SDValue(N, 1), NewChain); } else { for (unsigned i = 0; i < NumElts; ++i) - Ops[i] = DAG.getNode( - Opcode, dl, EltVT, - DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, - DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())))); + Ops[i] = DAG.getNode(Opcode, dl, EltVT, + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, + InOp, DAG.getVectorIdxConstant(i, dl))); } return DAG.getBuildVector(VT, dl, Ops); @@ -4411,9 +4448,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) { EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts); if (TLI.isTypeLegal(NewVT)) { SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp); - return DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp, - DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp, + DAG.getVectorIdxConstant(0, dl)); } } @@ -4430,7 +4466,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) { if (TLI.isTypeLegal(NewVT)) { SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, BitOp, - DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + DAG.getVectorIdxConstant(0, dl)); } } } @@ -4470,10 +4506,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) { TargetLowering::TypeWidenVector && "Unexpected type action"); InOp = GetWidenedVector(InOp); - for (unsigned j=0; j < NumInElts; ++j) - Ops[Idx++] = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, - DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + for (unsigned j = 0; j < NumInElts; ++j) + Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getVectorIdxConstant(j, dl)); } return DAG.getBuildVector(VT, dl, Ops); } @@ -4630,9 +4665,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { EVT ResVT = EVT::getVectorVT(*DAG.getContext(), SVT.getVectorElementType(), VT.getVectorNumElements()); - SDValue CC = DAG.getNode( - ISD::EXTRACT_SUBVECTOR, dl, ResVT, WideSETCC, - DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResVT, WideSETCC, + DAG.getVectorIdxConstant(0, dl)); EVT OpVT = N->getOperand(0).getValueType(); ISD::NodeType ExtendCode = @@ -4657,12 +4691,10 @@ SDValue DAGTypeLegalizer::WidenVecOp_STRICT_FSETCC(SDNode *N) { SmallVector<SDValue, 8> Chains(NumElts); for (unsigned i = 0; i != NumElts; ++i) { - SDValue LHSElem = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS, - DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - SDValue RHSElem = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS, - DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS, + DAG.getVectorIdxConstant(i, dl)); + SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS, + DAG.getVectorIdxConstant(i, dl)); Scalars[i] = DAG.getNode(N->getOpcode(), dl, {MVT::i1, MVT::Other}, {Chain, LHSElem, RHSElem, CC}); @@ -4716,11 +4748,11 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) { break; case ISD::VECREDUCE_FMAX: NeutralElem = DAG.getConstantFP( - std::numeric_limits<double>::infinity(), dl, ElemVT); + -std::numeric_limits<double>::infinity(), dl, ElemVT); break; case ISD::VECREDUCE_FMIN: NeutralElem = DAG.getConstantFP( - -std::numeric_limits<double>::infinity(), dl, ElemVT); + std::numeric_limits<double>::infinity(), dl, ElemVT); break; } @@ -4729,7 +4761,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) { unsigned WideElts = WideVT.getVectorNumElements(); for (unsigned Idx = OrigElts; Idx < WideElts; Idx++) Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, WideVT, Op, NeutralElem, - DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + DAG.getVectorIdxConstant(Idx, dl)); return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), Op, N->getFlags()); } @@ -4748,9 +4780,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_VSELECT(SDNode *N) { SDValue Select = DAG.getNode(N->getOpcode(), DL, LeftIn.getValueType(), Cond, LeftIn, RightIn); - return DAG.getNode( - ISD::EXTRACT_SUBVECTOR, DL, VT, Select, - DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Select, + DAG.getVectorIdxConstant(0, DL)); } //===----------------------------------------------------------------------===// @@ -4836,7 +4867,6 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy, SmallVectorImpl<SDValue> &LdOps, unsigned Start, unsigned End) { - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDLoc dl(LdOps[Start]); EVT LdTy = LdOps[Start].getValueType(); unsigned Width = VecTy.getSizeInBits(); @@ -4856,9 +4886,8 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy, Idx = Idx * LdTy.getSizeInBits() / NewLdTy.getSizeInBits(); LdTy = NewLdTy; } - VecOp = DAG.getNode( - ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i], - DAG.getConstant(Idx++, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i], + DAG.getVectorIdxConstant(Idx++, dl)); } return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp); } @@ -4879,19 +4908,19 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, // Load information SDValue Chain = LD->getChain(); SDValue BasePtr = LD->getBasePtr(); - unsigned Align = LD->getAlignment(); MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); AAMDNodes AAInfo = LD->getAAInfo(); int LdWidth = LdVT.getSizeInBits(); int WidthDiff = WidenWidth - LdWidth; - unsigned LdAlign = (!LD->isSimple()) ? 0 : Align; // Allow wider loads. + // Allow wider loads. + unsigned LdAlign = (!LD->isSimple()) ? 0 : LD->getAlignment(); // Find the vector type that can load from. EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); int NewVTWidth = NewVT.getSizeInBits(); SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(), - Align, MMOFlags, AAInfo); + LD->getOriginalAlign(), MMOFlags, AAInfo); LdChain.push_back(LdOp.getValue(1)); // Check if we can load the element with one instruction. @@ -4934,7 +4963,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, NewVTWidth = NewVT.getSizeInBits(); L = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo().getWithOffset(Offset), - MinAlign(Align, Increment), MMOFlags, AAInfo); + LD->getOriginalAlign(), MMOFlags, AAInfo); LdChain.push_back(L.getValue(1)); if (L->getValueType(0).isVector() && NewVTWidth >= LdWidth) { // Later code assumes the vector loads produced will be mergeable, so we @@ -4952,7 +4981,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, } else { L = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo().getWithOffset(Offset), - MinAlign(Align, Increment), MMOFlags, AAInfo); + LD->getOriginalAlign(), MMOFlags, AAInfo); LdChain.push_back(L.getValue(1)); } @@ -5029,7 +5058,6 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain, // Load information SDValue Chain = LD->getChain(); SDValue BasePtr = LD->getBasePtr(); - unsigned Align = LD->getAlignment(); MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); AAMDNodes AAInfo = LD->getAAInfo(); @@ -5043,14 +5071,14 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain, unsigned Increment = LdEltVT.getSizeInBits() / 8; Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, LD->getPointerInfo(), - LdEltVT, Align, MMOFlags, AAInfo); + LdEltVT, LD->getOriginalAlign(), MMOFlags, AAInfo); LdChain.push_back(Ops[0].getValue(1)); unsigned i = 0, Offset = Increment; for (i=1; i < NumElts; ++i, Offset += Increment) { SDValue NewBasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Offset); Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, LD->getPointerInfo().getWithOffset(Offset), LdEltVT, - Align, MMOFlags, AAInfo); + LD->getOriginalAlign(), MMOFlags, AAInfo); LdChain.push_back(Ops[i].getValue(1)); } @@ -5069,7 +5097,6 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, // element type or scalar stores. SDValue Chain = ST->getChain(); SDValue BasePtr = ST->getBasePtr(); - unsigned Align = ST->getAlignment(); MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); AAMDNodes AAInfo = ST->getAAInfo(); SDValue ValOp = GetWidenedVector(ST->getValue()); @@ -5093,12 +5120,11 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, if (NewVT.isVector()) { unsigned NumVTElts = NewVT.getVectorNumElements(); do { - SDValue EOp = DAG.getNode( - ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp, - DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue EOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp, + DAG.getVectorIdxConstant(Idx, dl)); StChain.push_back(DAG.getStore( Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset), - MinAlign(Align, Offset), MMOFlags, AAInfo)); + ST->getOriginalAlign(), MMOFlags, AAInfo)); StWidth -= NewVTWidth; Offset += Increment; Idx += NumVTElts; @@ -5113,13 +5139,11 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, // Readjust index position based on new vector type. Idx = Idx * ValEltWidth / NewVTWidth; do { - SDValue EOp = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp, - DAG.getConstant(Idx++, dl, - TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp, + DAG.getVectorIdxConstant(Idx++, dl)); StChain.push_back(DAG.getStore( Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset), - MinAlign(Align, Offset), MMOFlags, AAInfo)); + ST->getOriginalAlign(), MMOFlags, AAInfo)); StWidth -= NewVTWidth; Offset += Increment; BasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Increment); @@ -5137,7 +5161,6 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain, // and then store it. Instead, we extract each element and then store it. SDValue Chain = ST->getChain(); SDValue BasePtr = ST->getBasePtr(); - unsigned Align = ST->getAlignment(); MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); AAMDNodes AAInfo = ST->getAAInfo(); SDValue ValOp = GetWidenedVector(ST->getValue()); @@ -5157,21 +5180,19 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain, EVT ValEltVT = ValVT.getVectorElementType(); unsigned Increment = ValEltVT.getSizeInBits() / 8; unsigned NumElts = StVT.getVectorNumElements(); - SDValue EOp = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, - DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, - ST->getPointerInfo(), StEltVT, Align, - MMOFlags, AAInfo)); + SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, + DAG.getVectorIdxConstant(0, dl)); + StChain.push_back( + DAG.getTruncStore(Chain, dl, EOp, BasePtr, ST->getPointerInfo(), StEltVT, + ST->getOriginalAlign(), MMOFlags, AAInfo)); unsigned Offset = Increment; for (unsigned i=1; i < NumElts; ++i, Offset += Increment) { SDValue NewBasePtr = DAG.getObjectPtrOffset(dl, BasePtr, Offset); - SDValue EOp = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, - DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, + DAG.getVectorIdxConstant(0, dl)); StChain.push_back(DAG.getTruncStore( Chain, dl, EOp, NewBasePtr, ST->getPointerInfo().getWithOffset(Offset), - StEltVT, MinAlign(Align, Offset), MMOFlags, AAInfo)); + StEltVT, ST->getOriginalAlign(), MMOFlags, AAInfo)); } } @@ -5206,9 +5227,8 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT, } if (WidenNumElts < InNumElts && InNumElts % WidenNumElts) - return DAG.getNode( - ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp, - DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp, + DAG.getVectorIdxConstant(0, dl)); // Fall back to extract and build. SmallVector<SDValue, 16> Ops(WidenNumElts); @@ -5216,9 +5236,8 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT, unsigned MinNumElts = std::min(WidenNumElts, InNumElts); unsigned Idx; for (Idx = 0; Idx < MinNumElts; ++Idx) - Ops[Idx] = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, - DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + Ops[Idx] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getVectorIdxConstant(Idx, dl)); SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) : DAG.getUNDEF(EltVT); diff --git a/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp index 34660e3a48ec5..55fe26eb64cda 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -19,9 +19,13 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/ResourcePriorityQueue.h" +#include "llvm/CodeGen/DFAPacketizer.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 7ee44c808fcbf..2902c96c7658c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -761,7 +761,7 @@ void ScheduleDAGLinearize::Schedule() { MachineBasicBlock* ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) { InstrEmitter Emitter(BB, InsertPos); - DenseMap<SDValue, unsigned> VRBaseMap; + DenseMap<SDValue, Register> VRBaseMap; LLVM_DEBUG({ dbgs() << "\n*** Final schedule ***\n"; }); diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index ff806bdb822c2..72e68a5045c69 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -279,7 +279,7 @@ private: SUnit *NewNode = newSUnit(N); // Update the topological ordering. if (NewNode->NodeNum >= NumSUnits) - Topo.MarkDirty(); + Topo.AddSUnitWithoutPredecessors(NewNode); return NewNode; } @@ -289,7 +289,7 @@ private: SUnit *NewNode = Clone(N); // Update the topological ordering. if (NewNode->NodeNum >= NumSUnits) - Topo.MarkDirty(); + Topo.AddSUnitWithoutPredecessors(NewNode); return NewNode; } diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 0e4d783e3505d..ce20d506586f0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -31,6 +31,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; #define DEBUG_TYPE "pre-RA-sched" @@ -198,10 +199,10 @@ static void RemoveUnusedGlue(SDNode *N, SelectionDAG *DAG) { /// outputs to ensure they are scheduled together and in order. This /// optimization may benefit some targets by improving cache locality. void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { - SDNode *Chain = nullptr; + SDValue Chain; unsigned NumOps = Node->getNumOperands(); if (Node->getOperand(NumOps-1).getValueType() == MVT::Other) - Chain = Node->getOperand(NumOps-1).getNode(); + Chain = Node->getOperand(NumOps-1); if (!Chain) return; @@ -234,6 +235,9 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { unsigned UseCount = 0; for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end(); I != E && UseCount < 100; ++I, ++UseCount) { + if (I.getUse().getResNo() != Chain.getResNo()) + continue; + SDNode *User = *I; if (User == Node || !Visited.insert(User).second) continue; @@ -471,6 +475,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() { for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { SDNode *OpN = N->getOperand(i).getNode(); + unsigned DefIdx = N->getOperand(i).getResNo(); if (isPassiveNode(OpN)) continue; // Not scheduled. SUnit *OpSU = &SUnits[OpN->getNodeId()]; assert(OpSU && "Node has no SUnit!"); @@ -505,7 +510,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() { Dep.setLatency(OpLatency); if (!isChain && !UnitLatencies) { computeOperandLatency(OpN, N, i, Dep); - ST.adjustSchedDependency(OpSU, SU, Dep); + ST.adjustSchedDependency(OpSU, DefIdx, SU, i, Dep); } if (!SU->addPred(Dep) && !Dep.isCtrl() && OpSU->NumRegDefsLeft > 1) { @@ -731,7 +736,7 @@ void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp) { static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, SmallVectorImpl<std::pair<unsigned, MachineInstr*> > &Orders, - DenseMap<SDValue, unsigned> &VRBaseMap, unsigned Order) { + DenseMap<SDValue, Register> &VRBaseMap, unsigned Order) { if (!N->getHasDebugValue()) return; @@ -758,9 +763,9 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, // instructions in the right order. static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, - DenseMap<SDValue, unsigned> &VRBaseMap, + DenseMap<SDValue, Register> &VRBaseMap, SmallVectorImpl<std::pair<unsigned, MachineInstr *>> &Orders, - SmallSet<unsigned, 8> &Seen, MachineInstr *NewInsn) { + SmallSet<Register, 8> &Seen, MachineInstr *NewInsn) { unsigned Order = N->getIROrder(); if (!Order || Seen.count(Order)) { // Process any valid SDDbgValues even if node does not have any order @@ -784,17 +789,17 @@ ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, } void ScheduleDAGSDNodes:: -EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap, +EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, Register> &VRBaseMap, MachineBasicBlock::iterator InsertPos) { for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { if (I->isCtrl()) continue; // ignore chain preds if (I->getSUnit()->CopyDstRC) { // Copy to physical register. - DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->getSUnit()); + DenseMap<SUnit*, Register>::iterator VRI = VRBaseMap.find(I->getSUnit()); assert(VRI != VRBaseMap.end() && "Node emitted out of order - late"); // Find the destination physical register. - unsigned Reg = 0; + Register Reg; for (SUnit::const_succ_iterator II = SU->Succs.begin(), EE = SU->Succs.end(); II != EE; ++II) { if (II->isCtrl()) continue; // ignore chain preds @@ -826,17 +831,17 @@ EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap, MachineBasicBlock *ScheduleDAGSDNodes:: EmitSchedule(MachineBasicBlock::iterator &InsertPos) { InstrEmitter Emitter(BB, InsertPos); - DenseMap<SDValue, unsigned> VRBaseMap; - DenseMap<SUnit*, unsigned> CopyVRBaseMap; + DenseMap<SDValue, Register> VRBaseMap; + DenseMap<SUnit*, Register> CopyVRBaseMap; SmallVector<std::pair<unsigned, MachineInstr*>, 32> Orders; - SmallSet<unsigned, 8> Seen; + SmallSet<Register, 8> Seen; bool HasDbg = DAG->hasDebugValues(); // Emit a node, and determine where its first instruction is for debuginfo. // Zero, one, or multiple instructions can be created when emitting a node. auto EmitNode = [&](SDNode *Node, bool IsClone, bool IsCloned, - DenseMap<SDValue, unsigned> &VRBaseMap) -> MachineInstr * { + DenseMap<SDValue, Register> &VRBaseMap) -> MachineInstr * { // Fetch instruction prior to this, or end() if nonexistant. auto GetPrevInsn = [&](MachineBasicBlock::iterator I) { if (I == BB->begin()) @@ -863,9 +868,14 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { MI = &*std::next(Before); } - if (MI->isCall() && DAG->getTarget().Options.EnableDebugEntryValues) + if (MI->isCandidateForCallSiteEntry() && + DAG->getTarget().Options.EmitCallSiteInfo) MF.addCallArgsForwardingRegs(MI, DAG->getSDCallSiteInfo(Node)); + if (DAG->getNoMergeSiteInfo(Node)) { + MI->setFlag(MachineInstr::MIFlag::NoMerge); + } + return MI; }; diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 183ce4b0652d0..8c28ce403c9bd 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -184,7 +184,7 @@ class InstrItineraryData; void BuildSchedUnits(); void AddSchedEdges(); - void EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap, + void EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, Register> &VRBaseMap, MachineBasicBlock::iterator InsertPos); }; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 313e07b5fdd6f..592c09c10fb08 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -38,6 +38,7 @@ #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -543,7 +544,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { case ISD::ConstantPool: case ISD::TargetConstantPool: { const ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(N); - ID.AddInteger(CP->getAlignment()); + ID.AddInteger(CP->getAlign().value()); ID.AddInteger(CP->getOffset()); if (CP->isMachineConstantPoolEntry()) CP->getMachineCPVal()->addSelectionDAGCSEId(ID); @@ -1000,12 +1001,12 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, ArrayRef<SDValue> Ops, return Node; } -unsigned SelectionDAG::getEVTAlignment(EVT VT) const { +Align SelectionDAG::getEVTAlign(EVT VT) const { Type *Ty = VT == MVT::iPTR ? PointerType::get(Type::getInt8Ty(*getContext()), 0) : VT.getTypeForEVT(*getContext()); - return getDataLayout().getABITypeAlignment(Ty); + return getDataLayout().getABITypeAlign(Ty); } // EntryNode could meaningfully have debug info if we can find it... @@ -1167,15 +1168,21 @@ SDValue SelectionDAG::getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, } SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) { - assert(!VT.isVector() && - "getZeroExtendInReg should use the vector element type instead of " - "the vector type!"); - if (Op.getValueType().getScalarType() == VT) return Op; - unsigned BitWidth = Op.getScalarValueSizeInBits(); - APInt Imm = APInt::getLowBitsSet(BitWidth, - VT.getSizeInBits()); - return getNode(ISD::AND, DL, Op.getValueType(), Op, - getConstant(Imm, DL, Op.getValueType())); + EVT OpVT = Op.getValueType(); + assert(VT.isInteger() && OpVT.isInteger() && + "Cannot getZeroExtendInReg FP types"); + assert(VT.isVector() == OpVT.isVector() && + "getZeroExtendInReg type should be vector iff the operand " + "type is vector!"); + assert((!VT.isVector() || + VT.getVectorElementCount() == OpVT.getVectorElementCount()) && + "Vector element counts must match in getZeroExtendInReg"); + assert(VT.bitsLE(OpVT) && "Not extending!"); + if (OpVT == VT) + return Op; + APInt Imm = APInt::getLowBitsSet(OpVT.getScalarSizeInBits(), + VT.getScalarSizeInBits()); + return getNode(ISD::AND, DL, OpVT, Op, getConstant(Imm, DL, OpVT)); } SDValue SelectionDAG::getPtrExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) { @@ -1332,10 +1339,16 @@ SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, const SDLoc &DL, SDValue SelectionDAG::getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL, bool LegalTypes) { + assert(VT.isInteger() && "Shift amount is not an integer type!"); EVT ShiftVT = TLI->getShiftAmountTy(VT, getDataLayout(), LegalTypes); return getConstant(Val, DL, ShiftVT); } +SDValue SelectionDAG::getVectorIdxConstant(uint64_t Val, const SDLoc &DL, + bool isTarget) { + return getConstant(Val, DL, TLI->getVectorIdxTy(getDataLayout()), isTarget); +} + SDValue SelectionDAG::getConstantFP(const APFloat &V, const SDLoc &DL, EVT VT, bool isTarget) { return getConstantFP(*ConstantFP::get(*getContext(), V), DL, VT, isTarget); @@ -1381,7 +1394,7 @@ SDValue SelectionDAG::getConstantFP(double Val, const SDLoc &DL, EVT VT, else if (EltVT == MVT::f64) return getConstantFP(APFloat(Val), DL, VT, isTarget); else if (EltVT == MVT::f80 || EltVT == MVT::f128 || EltVT == MVT::ppcf128 || - EltVT == MVT::f16) { + EltVT == MVT::f16 || EltVT == MVT::bf16) { bool Ignored; APFloat APF = APFloat(Val); APF.convert(EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven, @@ -1459,19 +1472,18 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget, } SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, - unsigned Alignment, int Offset, - bool isTarget, - unsigned TargetFlags) { + MaybeAlign Alignment, int Offset, + bool isTarget, unsigned TargetFlags) { assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); - if (Alignment == 0) + if (!Alignment) Alignment = shouldOptForSize() - ? getDataLayout().getABITypeAlignment(C->getType()) - : getDataLayout().getPrefTypeAlignment(C->getType()); + ? getDataLayout().getABITypeAlign(C->getType()) + : getDataLayout().getPrefTypeAlign(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, getVTList(VT), None); - ID.AddInteger(Alignment); + ID.AddInteger(Alignment->value()); ID.AddInteger(Offset); ID.AddPointer(C); ID.AddInteger(TargetFlags); @@ -1479,25 +1491,26 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VT, Offset, Alignment, + auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VT, Offset, *Alignment, TargetFlags); CSEMap.InsertNode(N, IP); InsertNode(N); - return SDValue(N, 0); + SDValue V = SDValue(N, 0); + NewSDValueDbgMsg(V, "Creating new constant pool: ", this); + return V; } SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, - unsigned Alignment, int Offset, - bool isTarget, - unsigned TargetFlags) { + MaybeAlign Alignment, int Offset, + bool isTarget, unsigned TargetFlags) { assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); - if (Alignment == 0) - Alignment = getDataLayout().getPrefTypeAlignment(C->getType()); + if (!Alignment) + Alignment = getDataLayout().getPrefTypeAlign(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, getVTList(VT), None); - ID.AddInteger(Alignment); + ID.AddInteger(Alignment->value()); ID.AddInteger(Offset); C->addSelectionDAGCSEId(ID); ID.AddInteger(TargetFlags); @@ -1505,7 +1518,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, if (SDNode *E = FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VT, Offset, Alignment, + auto *N = newSDNode<ConstantPoolSDNode>(isTarget, C, VT, Offset, *Alignment, TargetFlags); CSEMap.InsertNode(N, IP); InsertNode(N); @@ -1861,9 +1874,6 @@ SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT, } SDValue SelectionDAG::getSrcValue(const Value *V) { - assert((!V || V->getType()->isPointerTy()) && - "SrcValue is not a pointer?"); - FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), None); ID.AddPointer(V); @@ -1921,6 +1931,10 @@ SDValue SelectionDAG::getAddrSpaceCast(const SDLoc &dl, EVT VT, SDValue Ptr, return SDValue(N, 0); } +SDValue SelectionDAG::getFreeze(SDValue V) { + return getNode(ISD::FREEZE, SDLoc(V), V.getValueType(), V); +} + /// getShiftAmountOperand - Return the specified value casted to /// the target's desired shift amount type. SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) { @@ -1979,28 +1993,54 @@ SDValue SelectionDAG::expandVACopy(SDNode *Node) { MachinePointerInfo(VD)); } -SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) { - MachineFrameInfo &MFI = getMachineFunction().getFrameInfo(); - unsigned ByteSize = VT.getStoreSize(); +Align SelectionDAG::getReducedAlign(EVT VT, bool UseABI) { + const DataLayout &DL = getDataLayout(); Type *Ty = VT.getTypeForEVT(*getContext()); - unsigned StackAlign = - std::max((unsigned)getDataLayout().getPrefTypeAlignment(Ty), minAlign); + Align RedAlign = UseABI ? DL.getABITypeAlign(Ty) : DL.getPrefTypeAlign(Ty); + + if (TLI->isTypeLegal(VT) || !VT.isVector()) + return RedAlign; + + const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); + const Align StackAlign = TFI->getStackAlign(); + + // See if we can choose a smaller ABI alignment in cases where it's an + // illegal vector type that will get broken down. + if (RedAlign > StackAlign) { + EVT IntermediateVT; + MVT RegisterVT; + unsigned NumIntermediates; + TLI->getVectorTypeBreakdown(*getContext(), VT, IntermediateVT, + NumIntermediates, RegisterVT); + Ty = IntermediateVT.getTypeForEVT(*getContext()); + Align RedAlign2 = UseABI ? DL.getABITypeAlign(Ty) : DL.getPrefTypeAlign(Ty); + if (RedAlign2 < RedAlign) + RedAlign = RedAlign2; + } + + return RedAlign; +} - int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false); +SDValue SelectionDAG::CreateStackTemporary(TypeSize Bytes, Align Alignment) { + MachineFrameInfo &MFI = MF->getFrameInfo(); + int FrameIdx = MFI.CreateStackObject(Bytes, Alignment, false); return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout())); } +SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) { + Type *Ty = VT.getTypeForEVT(*getContext()); + Align StackAlign = + std::max(getDataLayout().getPrefTypeAlign(Ty), Align(minAlign)); + return CreateStackTemporary(VT.getStoreSize(), StackAlign); +} + SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { - unsigned Bytes = std::max(VT1.getStoreSize(), VT2.getStoreSize()); + TypeSize Bytes = std::max(VT1.getStoreSize(), VT2.getStoreSize()); Type *Ty1 = VT1.getTypeForEVT(*getContext()); Type *Ty2 = VT2.getTypeForEVT(*getContext()); const DataLayout &DL = getDataLayout(); - unsigned Align = - std::max(DL.getPrefTypeAlignment(Ty1), DL.getPrefTypeAlignment(Ty2)); - - MachineFrameInfo &MFI = getMachineFunction().getFrameInfo(); - int FrameIdx = MFI.CreateStackObject(Bytes, Align, false); - return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout())); + Align Align = std::max(DL.getPrefTypeAlign(Ty1), DL.getPrefTypeAlign(Ty2)); + return CreateStackTemporary(Bytes, Align); } SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2, @@ -2179,21 +2219,16 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits, const APInt &DemandedElts) { switch (V.getOpcode()) { default: + return TLI->SimplifyMultipleUseDemandedBits(V, DemandedBits, DemandedElts, + *this, 0); break; case ISD::Constant: { - auto *CV = cast<ConstantSDNode>(V.getNode()); - assert(CV && "Const value should be ConstSDNode."); - const APInt &CVal = CV->getAPIntValue(); + const APInt &CVal = cast<ConstantSDNode>(V)->getAPIntValue(); APInt NewVal = CVal & DemandedBits; if (NewVal != CVal) return getConstant(NewVal, SDLoc(V), V.getValueType()); break; } - case ISD::OR: - case ISD::XOR: - case ISD::SIGN_EXTEND_INREG: - return TLI->SimplifyMultipleUseDemandedBits(V, DemandedBits, DemandedElts, - *this, 0); case ISD::SRL: // Only look at single-use SRLs. if (!V.getNode()->hasOneUse()) @@ -2224,19 +2259,6 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits, } break; } - case ISD::ANY_EXTEND: { - SDValue Src = V.getOperand(0); - unsigned SrcBitWidth = Src.getScalarValueSizeInBits(); - // Being conservative here - only peek through if we only demand bits in the - // non-extended source (even though the extended bits are technically - // undef). - if (DemandedBits.getActiveBits() > SrcBitWidth) - break; - APInt SrcDemandedBits = DemandedBits.trunc(SrcBitWidth); - if (SDValue DemandedSrc = GetDemandedBits(Src, SrcDemandedBits)) - return getNode(ISD::ANY_EXTEND, SDLoc(V), V.getValueType(), DemandedSrc); - break; - } } return SDValue(); } @@ -2253,11 +2275,7 @@ bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const { /// for bits that V cannot have. bool SelectionDAG::MaskedValueIsZero(SDValue V, const APInt &Mask, unsigned Depth) const { - EVT VT = V.getValueType(); - APInt DemandedElts = VT.isVector() - ? APInt::getAllOnesValue(VT.getVectorNumElements()) - : APInt(1, 1); - return MaskedValueIsZero(V, Mask, DemandedElts, Depth); + return Mask.isSubsetOf(computeKnownBits(V, Depth).Zero); } /// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero in @@ -2276,15 +2294,42 @@ bool SelectionDAG::MaskedValueIsAllOnes(SDValue V, const APInt &Mask, } /// isSplatValue - Return true if the vector V has the same value -/// across all DemandedElts. +/// across all DemandedElts. For scalable vectors it does not make +/// sense to specify which elements are demanded or undefined, therefore +/// they are simply ignored. bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts) { - if (!DemandedElts) - return false; // No demanded elts, better to assume we don't know anything. - EVT VT = V.getValueType(); assert(VT.isVector() && "Vector type expected"); + if (!VT.isScalableVector() && !DemandedElts) + return false; // No demanded elts, better to assume we don't know anything. + + // Deal with some common cases here that work for both fixed and scalable + // vector types. + switch (V.getOpcode()) { + case ISD::SPLAT_VECTOR: + return true; + case ISD::ADD: + case ISD::SUB: + case ISD::AND: { + APInt UndefLHS, UndefRHS; + SDValue LHS = V.getOperand(0); + SDValue RHS = V.getOperand(1); + if (isSplatValue(LHS, DemandedElts, UndefLHS) && + isSplatValue(RHS, DemandedElts, UndefRHS)) { + UndefElts = UndefLHS | UndefRHS; + return true; + } + break; + } + } + + // We don't support other cases than those above for scalable vectors at + // the moment. + if (VT.isScalableVector()) + return false; + unsigned NumElts = VT.getVectorNumElements(); assert(NumElts == DemandedElts.getBitWidth() && "Vector size mismatch"); UndefElts = APInt::getNullValue(NumElts); @@ -2326,30 +2371,14 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, return true; } case ISD::EXTRACT_SUBVECTOR: { + // Offset the demanded elts by the subvector index. SDValue Src = V.getOperand(0); - ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(V.getOperand(1)); + uint64_t Idx = V.getConstantOperandVal(1); unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); - if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) { - // Offset the demanded elts by the subvector index. - uint64_t Idx = SubIdx->getZExtValue(); - APInt UndefSrcElts; - APInt DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); - if (isSplatValue(Src, DemandedSrc, UndefSrcElts)) { - UndefElts = UndefSrcElts.extractBits(NumElts, Idx); - return true; - } - } - break; - } - case ISD::ADD: - case ISD::SUB: - case ISD::AND: { - APInt UndefLHS, UndefRHS; - SDValue LHS = V.getOperand(0); - SDValue RHS = V.getOperand(1); - if (isSplatValue(LHS, DemandedElts, UndefLHS) && - isSplatValue(RHS, DemandedElts, UndefRHS)) { - UndefElts = UndefLHS | UndefRHS; + APInt UndefSrcElts; + APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); + if (isSplatValue(Src, DemandedSrcElts, UndefSrcElts)) { + UndefElts = UndefSrcElts.extractBits(NumElts, Idx); return true; } break; @@ -2363,10 +2392,13 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, bool SelectionDAG::isSplatValue(SDValue V, bool AllowUndefs) { EVT VT = V.getValueType(); assert(VT.isVector() && "Vector type expected"); - unsigned NumElts = VT.getVectorNumElements(); APInt UndefElts; - APInt DemandedElts = APInt::getAllOnesValue(NumElts); + APInt DemandedElts; + + // For now we don't support this with scalable vectors. + if (!VT.isScalableVector()) + DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); return isSplatValue(V, DemandedElts, UndefElts) && (AllowUndefs || !UndefElts); } @@ -2379,19 +2411,35 @@ SDValue SelectionDAG::getSplatSourceVector(SDValue V, int &SplatIdx) { switch (Opcode) { default: { APInt UndefElts; - APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); + APInt DemandedElts; + + if (!VT.isScalableVector()) + DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); + if (isSplatValue(V, DemandedElts, UndefElts)) { - // Handle case where all demanded elements are UNDEF. - if (DemandedElts.isSubsetOf(UndefElts)) { + if (VT.isScalableVector()) { + // DemandedElts and UndefElts are ignored for scalable vectors, since + // the only supported cases are SPLAT_VECTOR nodes. SplatIdx = 0; - return getUNDEF(VT); + } else { + // Handle case where all demanded elements are UNDEF. + if (DemandedElts.isSubsetOf(UndefElts)) { + SplatIdx = 0; + return getUNDEF(VT); + } + SplatIdx = (UndefElts & DemandedElts).countTrailingOnes(); } - SplatIdx = (UndefElts & DemandedElts).countTrailingOnes(); return V; } break; } + case ISD::SPLAT_VECTOR: + SplatIdx = 0; + return V; case ISD::VECTOR_SHUFFLE: { + if (VT.isScalableVector()) + return SDValue(); + // Check if this is a shuffle node doing a splat. // TODO - remove this and rely purely on SelectionDAG::isSplatValue, // getTargetVShiftNode currently struggles without the splat source. @@ -2413,14 +2461,16 @@ SDValue SelectionDAG::getSplatValue(SDValue V) { if (SDValue SrcVector = getSplatSourceVector(V, SplatIdx)) return getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(V), SrcVector.getValueType().getScalarType(), SrcVector, - getIntPtrConstant(SplatIdx, SDLoc(V))); + getVectorIdxConstant(SplatIdx, SDLoc(V))); return SDValue(); } -/// If a SHL/SRA/SRL node has a constant or splat constant shift amount that -/// is less than the element bit-width of the shift node, return it. -static const APInt *getValidShiftAmountConstant(SDValue V, - const APInt &DemandedElts) { +const APInt * +SelectionDAG::getValidShiftAmountConstant(SDValue V, + const APInt &DemandedElts) const { + assert((V.getOpcode() == ISD::SHL || V.getOpcode() == ISD::SRL || + V.getOpcode() == ISD::SRA) && + "Unknown shift node"); unsigned BitWidth = V.getScalarValueSizeInBits(); if (ConstantSDNode *SA = isConstOrConstSplat(V.getOperand(1), DemandedElts)) { // Shifting more than the bitwidth is not valid. @@ -2431,10 +2481,13 @@ static const APInt *getValidShiftAmountConstant(SDValue V, return nullptr; } -/// If a SHL/SRA/SRL node has constant vector shift amounts that are all less -/// than the element bit-width of the shift node, return the minimum value. -static const APInt * -getValidMinimumShiftAmountConstant(SDValue V, const APInt &DemandedElts) { +const APInt *SelectionDAG::getValidMinimumShiftAmountConstant( + SDValue V, const APInt &DemandedElts) const { + assert((V.getOpcode() == ISD::SHL || V.getOpcode() == ISD::SRL || + V.getOpcode() == ISD::SRA) && + "Unknown shift node"); + if (const APInt *ValidAmt = getValidShiftAmountConstant(V, DemandedElts)) + return ValidAmt; unsigned BitWidth = V.getScalarValueSizeInBits(); auto *BV = dyn_cast<BuildVectorSDNode>(V.getOperand(1)); if (!BV) @@ -2457,10 +2510,13 @@ getValidMinimumShiftAmountConstant(SDValue V, const APInt &DemandedElts) { return MinShAmt; } -/// If a SHL/SRA/SRL node has constant vector shift amounts that are all less -/// than the element bit-width of the shift node, return the maximum value. -static const APInt * -getValidMaximumShiftAmountConstant(SDValue V, const APInt &DemandedElts) { +const APInt *SelectionDAG::getValidMaximumShiftAmountConstant( + SDValue V, const APInt &DemandedElts) const { + assert((V.getOpcode() == ISD::SHL || V.getOpcode() == ISD::SRL || + V.getOpcode() == ISD::SRA) && + "Unknown shift node"); + if (const APInt *ValidAmt = getValidShiftAmountConstant(V, DemandedElts)) + return ValidAmt; unsigned BitWidth = V.getScalarValueSizeInBits(); auto *BV = dyn_cast<BuildVectorSDNode>(V.getOperand(1)); if (!BV) @@ -2488,6 +2544,14 @@ getValidMaximumShiftAmountConstant(SDValue V, const APInt &DemandedElts) { /// every vector element. KnownBits SelectionDAG::computeKnownBits(SDValue Op, unsigned Depth) const { EVT VT = Op.getValueType(); + + // TOOD: Until we have a plan for how to represent demanded elements for + // scalable vectors, we can just bail out for now. + if (Op.getValueType().isScalableVector()) { + unsigned BitWidth = Op.getScalarValueSizeInBits(); + return KnownBits(BitWidth); + } + APInt DemandedElts = VT.isVector() ? APInt::getAllOnesValue(VT.getVectorNumElements()) : APInt(1, 1); @@ -2503,6 +2567,11 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, KnownBits Known(BitWidth); // Don't know anything. + // TOOD: Until we have a plan for how to represent demanded elements for + // scalable vectors, we can just bail out for now. + if (Op.getValueType().isScalableVector()) + return Known; + if (auto *C = dyn_cast<ConstantSDNode>(Op)) { // We know all of the bits for a constant! Known.One = C->getAPIntValue(); @@ -2622,52 +2691,40 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, break; } case ISD::INSERT_SUBVECTOR: { - // If we know the element index, demand any elements from the subvector and - // the remainder from the src its inserted into, otherwise demand them all. + // Demand any elements from the subvector and the remainder from the src its + // inserted into. SDValue Src = Op.getOperand(0); SDValue Sub = Op.getOperand(1); - ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2)); + uint64_t Idx = Op.getConstantOperandVal(2); unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); - if (SubIdx && SubIdx->getAPIntValue().ule(NumElts - NumSubElts)) { - Known.One.setAllBits(); - Known.Zero.setAllBits(); - uint64_t Idx = SubIdx->getZExtValue(); - APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); - if (!!DemandedSubElts) { - Known = computeKnownBits(Sub, DemandedSubElts, Depth + 1); - if (Known.isUnknown()) - break; // early-out. - } - APInt SubMask = APInt::getBitsSet(NumElts, Idx, Idx + NumSubElts); - APInt DemandedSrcElts = DemandedElts & ~SubMask; - if (!!DemandedSrcElts) { - Known2 = computeKnownBits(Src, DemandedSrcElts, Depth + 1); - Known.One &= Known2.One; - Known.Zero &= Known2.Zero; - } - } else { - Known = computeKnownBits(Sub, Depth + 1); + APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); + APInt DemandedSrcElts = DemandedElts; + DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx); + + Known.One.setAllBits(); + Known.Zero.setAllBits(); + if (!!DemandedSubElts) { + Known = computeKnownBits(Sub, DemandedSubElts, Depth + 1); if (Known.isUnknown()) break; // early-out. - Known2 = computeKnownBits(Src, Depth + 1); + } + if (!!DemandedSrcElts) { + Known2 = computeKnownBits(Src, DemandedSrcElts, Depth + 1); Known.One &= Known2.One; Known.Zero &= Known2.Zero; } break; } case ISD::EXTRACT_SUBVECTOR: { - // If we know the element index, just demand that subvector elements, - // otherwise demand them all. + // Offset the demanded elts by the subvector index. SDValue Src = Op.getOperand(0); - ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1)); + // Bail until we can represent demanded elements for scalable vectors. + if (Src.getValueType().isScalableVector()) + break; + uint64_t Idx = Op.getConstantOperandVal(1); unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); - APInt DemandedSrc = APInt::getAllOnesValue(NumSrcElts); - if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) { - // Offset the demanded elts by the subvector index. - uint64_t Idx = SubIdx->getZExtValue(); - DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); - } - Known = computeKnownBits(Src, DemandedSrc, Depth + 1); + APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); + Known = computeKnownBits(Src, DemandedSrcElts, Depth + 1); break; } case ISD::SCALAR_TO_VECTOR: { @@ -2753,35 +2810,23 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, break; } case ISD::AND: - // If either the LHS or the RHS are Zero, the result is zero. Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); - // Output known-1 bits are only known if set in both the LHS & RHS. - Known.One &= Known2.One; - // Output known-0 are known to be clear if zero in either the LHS | RHS. - Known.Zero |= Known2.Zero; + Known &= Known2; break; case ISD::OR: Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); - // Output known-0 bits are only known if clear in both the LHS & RHS. - Known.Zero &= Known2.Zero; - // Output known-1 are known to be set if set in either the LHS | RHS. - Known.One |= Known2.One; + Known |= Known2; break; - case ISD::XOR: { + case ISD::XOR: Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); - // Output known-0 bits are known if clear or set in both the LHS & RHS. - APInt KnownZeroOut = (Known.Zero & Known2.Zero) | (Known.One & Known2.One); - // Output known-1 are known to be set if set in only one of the LHS, RHS. - Known.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero); - Known.Zero = KnownZeroOut; + Known ^= Known2; break; - } case ISD::MUL: { Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); @@ -3075,12 +3120,12 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, EVT InVT = Op.getOperand(0).getValueType(); APInt InDemandedElts = DemandedElts.zextOrSelf(InVT.getVectorNumElements()); Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1); - Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */); + Known = Known.zext(BitWidth); break; } case ISD::ZERO_EXTEND: { Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); - Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */); + Known = Known.zext(BitWidth); break; } case ISD::SIGN_EXTEND_VECTOR_INREG: { @@ -3099,9 +3144,16 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known = Known.sext(BitWidth); break; } + case ISD::ANY_EXTEND_VECTOR_INREG: { + EVT InVT = Op.getOperand(0).getValueType(); + APInt InDemandedElts = DemandedElts.zextOrSelf(InVT.getVectorNumElements()); + Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1); + Known = Known.anyext(BitWidth); + break; + } case ISD::ANY_EXTEND: { - Known = computeKnownBits(Op.getOperand(0), Depth+1); - Known = Known.zext(BitWidth, false /* ExtendedBitsAreKnownZero */); + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known = Known.anyext(BitWidth); break; } case ISD::TRUNCATE: { @@ -3117,6 +3169,15 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known.One &= (~Known.Zero); break; } + case ISD::AssertAlign: { + unsigned LogOfAlign = Log2(cast<AssertAlignSDNode>(Op)->getAlign()); + assert(LogOfAlign != 0); + // If a node is guaranteed to be aligned, set low zero bits accordingly as + // well as clearing one bits. + Known.Zero.setLowBits(LogOfAlign); + Known.One.clearLowBits(LogOfAlign); + break; + } case ISD::FGETSIGN: // All bits are zero except the low bit. Known.Zero.setBitsFrom(1); @@ -3134,6 +3195,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, LLVM_FALLTHROUGH; case ISD::SUB: case ISD::SUBC: { + assert(Op.getResNo() == 0 && + "We only compute knownbits for the difference here."); + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); Known = KnownBits::computeForAddSub(/* Add */ false, /* NSW */ false, @@ -3245,57 +3309,51 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, EVT VecVT = InVec.getValueType(); const unsigned EltBitWidth = VecVT.getScalarSizeInBits(); const unsigned NumSrcElts = VecVT.getVectorNumElements(); + // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know // anything about the extended bits. if (BitWidth > EltBitWidth) Known = Known.trunc(EltBitWidth); - ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo); - if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts)) { - // If we know the element index, just demand that vector element. - unsigned Idx = ConstEltNo->getZExtValue(); - APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx); - Known = computeKnownBits(InVec, DemandedElt, Depth + 1); - } else { - // Unknown element index, so ignore DemandedElts and demand them all. - Known = computeKnownBits(InVec, Depth + 1); - } + + // If we know the element index, just demand that vector element, else for + // an unknown element index, ignore DemandedElts and demand them all. + APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts); + auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo); + if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts)) + DemandedSrcElts = + APInt::getOneBitSet(NumSrcElts, ConstEltNo->getZExtValue()); + + Known = computeKnownBits(InVec, DemandedSrcElts, Depth + 1); if (BitWidth > EltBitWidth) - Known = Known.zext(BitWidth, false /* => any extend */); + Known = Known.anyext(BitWidth); break; } case ISD::INSERT_VECTOR_ELT: { + // If we know the element index, split the demand between the + // source vector and the inserted element, otherwise assume we need + // the original demanded vector elements and the value. SDValue InVec = Op.getOperand(0); SDValue InVal = Op.getOperand(1); SDValue EltNo = Op.getOperand(2); - - ConstantSDNode *CEltNo = dyn_cast<ConstantSDNode>(EltNo); + bool DemandedVal = true; + APInt DemandedVecElts = DemandedElts; + auto *CEltNo = dyn_cast<ConstantSDNode>(EltNo); if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)) { - // If we know the element index, split the demand between the - // source vector and the inserted element. - Known.Zero = Known.One = APInt::getAllOnesValue(BitWidth); unsigned EltIdx = CEltNo->getZExtValue(); - - // If we demand the inserted element then add its common known bits. - if (DemandedElts[EltIdx]) { - Known2 = computeKnownBits(InVal, Depth + 1); - Known.One &= Known2.One.zextOrTrunc(Known.One.getBitWidth()); - Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth()); - } - - // If we demand the source vector then add its common known bits, ensuring - // that we don't demand the inserted element. - APInt VectorElts = DemandedElts & ~(APInt::getOneBitSet(NumElts, EltIdx)); - if (!!VectorElts) { - Known2 = computeKnownBits(InVec, VectorElts, Depth + 1); - Known.One &= Known2.One; - Known.Zero &= Known2.Zero; - } - } else { - // Unknown element index, so ignore DemandedElts and demand them all. - Known = computeKnownBits(InVec, Depth + 1); + DemandedVal = !!DemandedElts[EltIdx]; + DemandedVecElts.clearBit(EltIdx); + } + Known.One.setAllBits(); + Known.Zero.setAllBits(); + if (DemandedVal) { Known2 = computeKnownBits(InVal, Depth + 1); - Known.One &= Known2.One.zextOrTrunc(Known.One.getBitWidth()); - Known.Zero &= Known2.Zero.zextOrTrunc(Known.Zero.getBitWidth()); + Known.One &= Known2.One.zextOrTrunc(BitWidth); + Known.Zero &= Known2.Zero.zextOrTrunc(BitWidth); + } + if (!!DemandedVecElts) { + Known2 = computeKnownBits(InVec, DemandedVecElts, Depth + 1); + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; } break; } @@ -3399,7 +3457,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, } case ISD::FrameIndex: case ISD::TargetFrameIndex: - TLI->computeKnownBitsForFrameIndex(Op, Known, DemandedElts, *this, Depth); + TLI->computeKnownBitsForFrameIndex(cast<FrameIndexSDNode>(Op)->getIndex(), + Known, getMachineFunction()); break; default: @@ -3492,6 +3551,11 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const { unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const { EVT VT = Op.getValueType(); + + // TODO: Assume we don't know anything for now. + if (VT.isScalableVector()) + return 1; + APInt DemandedElts = VT.isVector() ? APInt::getAllOnesValue(VT.getVectorNumElements()) : APInt(1, 1); @@ -3515,7 +3579,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, if (Depth >= MaxRecursionDepth) return 1; // Limit search depth. - if (!DemandedElts) + if (!DemandedElts || VT.isScalableVector()) return 1; // No demanded elts, better to assume we don't know anything. unsigned Opcode = Op.getOpcode(); @@ -3535,7 +3599,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, continue; SDValue SrcOp = Op.getOperand(i); - Tmp2 = ComputeNumSignBits(Op.getOperand(i), Depth + 1); + Tmp2 = ComputeNumSignBits(SrcOp, Depth + 1); // BUILD_VECTOR can implicitly truncate sources, we must handle this. if (SrcOp.getValueSizeInBits() != VTBits) { @@ -3646,23 +3710,17 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, case ISD::SRA: Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); // SRA X, C -> adds C sign bits. - if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts)) - Tmp = std::min<uint64_t>(Tmp + ShAmt->getZExtValue(), VTBits); - else if (const APInt *ShAmt = - getValidMinimumShiftAmountConstant(Op, DemandedElts)) + if (const APInt *ShAmt = + getValidMinimumShiftAmountConstant(Op, DemandedElts)) Tmp = std::min<uint64_t>(Tmp + ShAmt->getZExtValue(), VTBits); return Tmp; case ISD::SHL: - if (const APInt *ShAmt = getValidShiftAmountConstant(Op, DemandedElts)) { + if (const APInt *ShAmt = + getValidMaximumShiftAmountConstant(Op, DemandedElts)) { // shl destroys sign bits, ensure it doesn't shift out all sign bits. Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); if (ShAmt->ult(Tmp)) return Tmp - ShAmt->getZExtValue(); - } else if (const APInt *ShAmt = - getValidMaximumShiftAmountConstant(Op, DemandedElts)) { - Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); - if (ShAmt->ult(Tmp)) - return Tmp - ShAmt->getZExtValue(); } break; case ISD::AND: @@ -3712,18 +3770,18 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, } // Fallback - just get the minimum number of sign bits of the operands. - Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1); + Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); if (Tmp == 1) return 1; // Early out. - Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth + 1); + Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1); return std::min(Tmp, Tmp2); } case ISD::UMIN: case ISD::UMAX: - Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1); + Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); if (Tmp == 1) return 1; // Early out. - Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth + 1); + Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1); return std::min(Tmp, Tmp2); case ISD::SADDO: case ISD::UADDO: @@ -3753,7 +3811,14 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, } case ISD::ROTL: case ISD::ROTR: - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); + + // If we're rotating an 0/-1 value, then it stays an 0/-1 value. + if (Tmp == VTBits) + return VTBits; + + if (ConstantSDNode *C = + isConstOrConstSplat(Op.getOperand(1), DemandedElts)) { unsigned RotAmt = C->getAPIntValue().urem(VTBits); // Handle rotate right by N like a rotate left by 32-N. @@ -3762,7 +3827,6 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, // If we aren't rotating out all of the known-in sign bits, return the // number that are left. This handles rotl(sext(x), 1) for example. - Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); if (Tmp > (RotAmt + 1)) return (Tmp - RotAmt); } break; @@ -3770,13 +3834,15 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, case ISD::ADDC: // Add can have at most one carry bit. Thus we know that the output // is, at worst, one more bit than the inputs. - Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); - if (Tmp == 1) return 1; // Early out. + Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); + if (Tmp == 1) return 1; // Early out. // Special case decrementing a value (ADD X, -1): - if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(1))) + if (ConstantSDNode *CRHS = + isConstOrConstSplat(Op.getOperand(1), DemandedElts)) if (CRHS->isAllOnesValue()) { - KnownBits Known = computeKnownBits(Op.getOperand(0), Depth+1); + KnownBits Known = + computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. @@ -3789,18 +3855,19 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return Tmp; } - Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); - if (Tmp2 == 1) return 1; - return std::min(Tmp, Tmp2)-1; - + Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1); + if (Tmp2 == 1) return 1; // Early out. + return std::min(Tmp, Tmp2) - 1; case ISD::SUB: - Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); - if (Tmp2 == 1) return 1; + Tmp2 = ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1); + if (Tmp2 == 1) return 1; // Early out. // Handle NEG. - if (ConstantSDNode *CLHS = isConstOrConstSplat(Op.getOperand(0))) + if (ConstantSDNode *CLHS = + isConstOrConstSplat(Op.getOperand(0), DemandedElts)) if (CLHS->isNullValue()) { - KnownBits Known = computeKnownBits(Op.getOperand(1), Depth+1); + KnownBits Known = + computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. if ((Known.Zero | 1).isAllOnesValue()) @@ -3816,9 +3883,9 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, // Sub can have at most one carry bit. Thus we know that the output // is, at worst, one more bit than the inputs. - Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); - if (Tmp == 1) return 1; // Early out. - return std::min(Tmp, Tmp2)-1; + Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); + if (Tmp == 1) return 1; // Early out. + return std::min(Tmp, Tmp2) - 1; case ISD::MUL: { // The output of the Mul can be at most twice the valid bits in the inputs. unsigned SignBitsOp0 = ComputeNumSignBits(Op.getOperand(0), Depth + 1); @@ -3853,39 +3920,32 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0); } case ISD::INSERT_VECTOR_ELT: { + // If we know the element index, split the demand between the + // source vector and the inserted element, otherwise assume we need + // the original demanded vector elements and the value. SDValue InVec = Op.getOperand(0); SDValue InVal = Op.getOperand(1); SDValue EltNo = Op.getOperand(2); - - ConstantSDNode *CEltNo = dyn_cast<ConstantSDNode>(EltNo); + bool DemandedVal = true; + APInt DemandedVecElts = DemandedElts; + auto *CEltNo = dyn_cast<ConstantSDNode>(EltNo); if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)) { - // If we know the element index, split the demand between the - // source vector and the inserted element. unsigned EltIdx = CEltNo->getZExtValue(); - - // If we demand the inserted element then get its sign bits. - Tmp = std::numeric_limits<unsigned>::max(); - if (DemandedElts[EltIdx]) { - // TODO - handle implicit truncation of inserted elements. - if (InVal.getScalarValueSizeInBits() != VTBits) - break; - Tmp = ComputeNumSignBits(InVal, Depth + 1); - } - - // If we demand the source vector then get its sign bits, and determine - // the minimum. - APInt VectorElts = DemandedElts; - VectorElts.clearBit(EltIdx); - if (!!VectorElts) { - Tmp2 = ComputeNumSignBits(InVec, VectorElts, Depth + 1); - Tmp = std::min(Tmp, Tmp2); - } - } else { - // Unknown element index, so ignore DemandedElts and demand them all. - Tmp = ComputeNumSignBits(InVec, Depth + 1); + DemandedVal = !!DemandedElts[EltIdx]; + DemandedVecElts.clearBit(EltIdx); + } + Tmp = std::numeric_limits<unsigned>::max(); + if (DemandedVal) { + // TODO - handle implicit truncation of inserted elements. + if (InVal.getScalarValueSizeInBits() != VTBits) + break; Tmp2 = ComputeNumSignBits(InVal, Depth + 1); Tmp = std::min(Tmp, Tmp2); } + if (!!DemandedVecElts) { + Tmp2 = ComputeNumSignBits(InVec, DemandedVecElts, Depth + 1); + Tmp = std::min(Tmp, Tmp2); + } assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); return Tmp; } @@ -3906,7 +3966,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, // If we know the element index, just demand that vector element, else for // an unknown element index, ignore DemandedElts and demand them all. APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts); - ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo); + auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo); if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts)) DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, ConstEltNo->getZExtValue()); @@ -3914,18 +3974,15 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return ComputeNumSignBits(InVec, DemandedSrcElts, Depth + 1); } case ISD::EXTRACT_SUBVECTOR: { - // If we know the element index, just demand that subvector elements, - // otherwise demand them all. + // Offset the demanded elts by the subvector index. SDValue Src = Op.getOperand(0); - ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1)); + // Bail until we can represent demanded elements for scalable vectors. + if (Src.getValueType().isScalableVector()) + break; + uint64_t Idx = Op.getConstantOperandVal(1); unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); - APInt DemandedSrc = APInt::getAllOnesValue(NumSrcElts); - if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) { - // Offset the demanded elts by the subvector index. - uint64_t Idx = SubIdx->getZExtValue(); - DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); - } - return ComputeNumSignBits(Src, DemandedSrc, Depth + 1); + APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); + return ComputeNumSignBits(Src, DemandedSrcElts, Depth + 1); } case ISD::CONCAT_VECTORS: { // Determine the minimum number of sign bits across all demanded @@ -3946,35 +4003,26 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return Tmp; } case ISD::INSERT_SUBVECTOR: { - // If we know the element index, demand any elements from the subvector and - // the remainder from the src its inserted into, otherwise demand them all. + // Demand any elements from the subvector and the remainder from the src its + // inserted into. SDValue Src = Op.getOperand(0); SDValue Sub = Op.getOperand(1); - auto *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2)); + uint64_t Idx = Op.getConstantOperandVal(2); unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); - if (SubIdx && SubIdx->getAPIntValue().ule(NumElts - NumSubElts)) { - Tmp = std::numeric_limits<unsigned>::max(); - uint64_t Idx = SubIdx->getZExtValue(); - APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); - if (!!DemandedSubElts) { - Tmp = ComputeNumSignBits(Sub, DemandedSubElts, Depth + 1); - if (Tmp == 1) return 1; // early-out - } - APInt SubMask = APInt::getBitsSet(NumElts, Idx, Idx + NumSubElts); - APInt DemandedSrcElts = DemandedElts & ~SubMask; - if (!!DemandedSrcElts) { - Tmp2 = ComputeNumSignBits(Src, DemandedSrcElts, Depth + 1); - Tmp = std::min(Tmp, Tmp2); - } - assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); - return Tmp; - } + APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); + APInt DemandedSrcElts = DemandedElts; + DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx); - // Not able to determine the index so just assume worst case. - Tmp = ComputeNumSignBits(Sub, Depth + 1); - if (Tmp == 1) return 1; // early-out - Tmp2 = ComputeNumSignBits(Src, Depth + 1); - Tmp = std::min(Tmp, Tmp2); + Tmp = std::numeric_limits<unsigned>::max(); + if (!!DemandedSubElts) { + Tmp = ComputeNumSignBits(Sub, DemandedSubElts, Depth + 1); + if (Tmp == 1) + return 1; // early-out + } + if (!!DemandedSrcElts) { + Tmp2 = ComputeNumSignBits(Src, DemandedSrcElts, Depth + 1); + Tmp = std::min(Tmp, Tmp2); + } assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); return Tmp; } @@ -4052,13 +4100,10 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return FirstAnswer; } - // Okay, we know that the sign bit in Mask is set. Use CLZ to determine + // Okay, we know that the sign bit in Mask is set. Use CLO to determine // the number of identical bits in the top of the input value. - Mask = ~Mask; Mask <<= Mask.getBitWidth()-VTBits; - // Return # leading zeros. We use 'min' here in case Val was zero before - // shifting. We don't want to return '64' as for an i32 "0". - return std::max(FirstAnswer, std::min(VTBits, Mask.countLeadingZeros())); + return std::max(FirstAnswer, Mask.countLeadingOnes()); } bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const { @@ -4109,6 +4154,7 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const case ISD::FFLOOR: case ISD::FCEIL: case ISD::FROUND: + case ISD::FROUNDEVEN: case ISD::FRINT: case ISD::FNEARBYINT: { if (SNaN) @@ -4249,6 +4295,8 @@ static SDValue FoldBUILD_VECTOR(const SDLoc &DL, EVT VT, SelectionDAG &DAG) { int NumOps = Ops.size(); assert(NumOps != 0 && "Can't build an empty vector!"); + assert(!VT.isScalableVector() && + "BUILD_VECTOR cannot be used with scalable types"); assert(VT.getVectorNumElements() == (unsigned)NumOps && "Incorrect element count in BUILD_VECTOR!"); @@ -4287,8 +4335,8 @@ static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT, return Ops[0].getValueType() == Op.getValueType(); }) && "Concatenation of vectors with inconsistent value types!"); - assert((Ops.size() * Ops[0].getValueType().getVectorNumElements()) == - VT.getVectorNumElements() && + assert((Ops[0].getValueType().getVectorElementCount() * Ops.size()) == + VT.getVectorElementCount() && "Incorrect element count in vector concatenation!"); if (Ops.size() == 1) @@ -4305,11 +4353,10 @@ static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT, bool IsIdentity = true; for (unsigned i = 0, e = Ops.size(); i != e; ++i) { SDValue Op = Ops[i]; - unsigned IdentityIndex = i * Op.getValueType().getVectorNumElements(); + unsigned IdentityIndex = i * Op.getValueType().getVectorMinNumElements(); if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR || Op.getOperand(0).getValueType() != VT || (IdentitySrc && Op.getOperand(0) != IdentitySrc) || - !isa<ConstantSDNode>(Op.getOperand(1)) || Op.getConstantOperandVal(1) != IdentityIndex) { IsIdentity = false; break; @@ -4323,6 +4370,11 @@ static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT, return IdentitySrc; } + // The code below this point is only designed to work for fixed width + // vectors, so we bail out for now. + if (VT.isScalableVector()) + return SDValue(); + // A CONCAT_VECTOR with all UNDEF/BUILD_VECTOR operands can be // simplified to one big BUILD_VECTOR. // FIXME: Add support for SCALAR_TO_VECTOR as well. @@ -4508,7 +4560,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, // FIXME need to be more flexible about rounding mode. (void)V.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &Ignored); - return getConstant(V.bitcastToAPInt(), DL, VT); + return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT); } } } @@ -4553,6 +4605,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, unsigned OpOpcode = Operand.getNode()->getOpcode(); switch (Opcode) { + case ISD::FREEZE: + assert(VT == Operand.getValueType() && "Unexpected VT!"); + break; case ISD::TokenFactor: case ISD::MERGE_VALUES: case ISD::CONCAT_VECTORS: @@ -4597,8 +4652,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, "type is vector!"); if (Operand.getValueType() == VT) return Operand; // noop extension assert((!VT.isVector() || - VT.getVectorNumElements() == - Operand.getValueType().getVectorNumElements()) && + VT.getVectorElementCount() == + Operand.getValueType().getVectorElementCount()) && "Vector element count mismatch!"); assert(Operand.getValueType().bitsLT(VT) && "Invalid sext node, dst < src!"); @@ -4616,8 +4671,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, "type is vector!"); if (Operand.getValueType() == VT) return Operand; // noop extension assert((!VT.isVector() || - VT.getVectorNumElements() == - Operand.getValueType().getVectorNumElements()) && + VT.getVectorElementCount() == + Operand.getValueType().getVectorElementCount()) && "Vector element count mismatch!"); assert(Operand.getValueType().bitsLT(VT) && "Invalid zext node, dst < src!"); @@ -4635,8 +4690,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, "type is vector!"); if (Operand.getValueType() == VT) return Operand; // noop extension assert((!VT.isVector() || - VT.getVectorNumElements() == - Operand.getValueType().getVectorNumElements()) && + VT.getVectorElementCount() == + Operand.getValueType().getVectorElementCount()) && "Vector element count mismatch!"); assert(Operand.getValueType().bitsLT(VT) && "Invalid anyext node, dst < src!"); @@ -4665,8 +4720,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, "type is vector!"); if (Operand.getValueType() == VT) return Operand; // noop truncate assert((!VT.isVector() || - VT.getVectorNumElements() == - Operand.getValueType().getVectorNumElements()) && + VT.getVectorElementCount() == + Operand.getValueType().getVectorElementCount()) && "Vector element count mismatch!"); assert(Operand.getValueType().bitsGT(VT) && "Invalid truncate node, src < dst!"); @@ -4753,6 +4808,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (OpOpcode == ISD::FNEG) // abs(-X) -> abs(X) return getNode(ISD::FABS, DL, VT, Operand.getOperand(0)); break; + case ISD::VSCALE: + assert(VT == Operand.getValueType() && "Unexpected VT!"); + break; } SDNode *N; @@ -4824,17 +4882,6 @@ static llvm::Optional<APInt> FoldValue(unsigned Opcode, const APInt &C1, return llvm::None; } -SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, - EVT VT, const ConstantSDNode *C1, - const ConstantSDNode *C2) { - if (C1->isOpaque() || C2->isOpaque()) - return SDValue(); - if (Optional<APInt> Folded = - FoldValue(Opcode, C1->getAPIntValue(), C2->getAPIntValue())) - return getConstant(Folded.getValue(), DL, VT); - return SDValue(); -} - SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT, const GlobalAddressSDNode *GA, const SDNode *N2) { @@ -4881,20 +4928,37 @@ bool SelectionDAG::isUndef(unsigned Opcode, ArrayRef<SDValue> Ops) { } SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, - EVT VT, SDNode *N1, SDNode *N2) { + EVT VT, ArrayRef<SDValue> Ops) { // If the opcode is a target-specific ISD node, there's nothing we can // do here and the operand rules may not line up with the below, so // bail early. if (Opcode >= ISD::BUILTIN_OP_END) return SDValue(); - if (isUndef(Opcode, {SDValue(N1, 0), SDValue(N2, 0)})) + // For now, the array Ops should only contain two values. + // This enforcement will be removed once this function is merged with + // FoldConstantVectorArithmetic + if (Ops.size() != 2) + return SDValue(); + + if (isUndef(Opcode, Ops)) return getUNDEF(VT); + SDNode *N1 = Ops[0].getNode(); + SDNode *N2 = Ops[1].getNode(); + // Handle the case of two scalars. if (auto *C1 = dyn_cast<ConstantSDNode>(N1)) { if (auto *C2 = dyn_cast<ConstantSDNode>(N2)) { - SDValue Folded = FoldConstantArithmetic(Opcode, DL, VT, C1, C2); + if (C1->isOpaque() || C2->isOpaque()) + return SDValue(); + + Optional<APInt> FoldAttempt = + FoldValue(Opcode, C1->getAPIntValue(), C2->getAPIntValue()); + if (!FoldAttempt) + return SDValue(); + + SDValue Folded = getConstant(FoldAttempt.getValue(), DL, VT); assert((!Folded || !VT.isVector()) && "Can't fold vectors ops with scalar operands"); return Folded; @@ -4908,8 +4972,14 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N2)) return FoldSymbolOffset(Opcode, VT, GA, N1); - // For vectors, extract each constant element and fold them individually. - // Either input may be an undef value. + // TODO: All the folds below are performed lane-by-lane and assume a fixed + // vector width, however we should be able to do constant folds involving + // splat vector nodes too. + if (VT.isScalableVector()) + return SDValue(); + + // For fixed width vectors, extract each constant element and fold them + // individually. Either input may be an undef value. auto *BV1 = dyn_cast<BuildVectorSDNode>(N1); if (!BV1 && !N1->isUndef()) return SDValue(); @@ -4985,6 +5055,13 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, if (!VT.isVector()) return SDValue(); + // TODO: All the folds below are performed lane-by-lane and assume a fixed + // vector width, however we should be able to do constant folds involving + // splat vector nodes too. + if (VT.isScalableVector()) + return SDValue(); + + // From this point onwards all vectors are assumed to be fixed width. unsigned NumElts = VT.getVectorNumElements(); auto IsScalarOrSameVectorSize = [&](const SDValue &Op) { @@ -5107,8 +5184,13 @@ SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL, } switch (Opcode) { - case ISD::FADD: case ISD::FSUB: + // -0.0 - undef --> undef (consistent with "fneg undef") + if (N1CFP && N1CFP->getValueAPF().isNegZero() && N2.isUndef()) + return getUNDEF(VT); + LLVM_FALLTHROUGH; + + case ISD::FADD: case ISD::FMUL: case ISD::FDIV: case ISD::FREM: @@ -5122,6 +5204,34 @@ SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL, return SDValue(); } +SDValue SelectionDAG::getAssertAlign(const SDLoc &DL, SDValue Val, Align A) { + assert(Val.getValueType().isInteger() && "Invalid AssertAlign!"); + + // There's no need to assert on a byte-aligned pointer. All pointers are at + // least byte aligned. + if (A == Align(1)) + return Val; + + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::AssertAlign, getVTList(Val.getValueType()), {Val}); + ID.AddInteger(A.value()); + + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) + return SDValue(E, 0); + + auto *N = newSDNode<AssertAlignSDNode>(DL.getIROrder(), DL.getDebugLoc(), + Val.getValueType(), A); + createOperands(N, {Val}); + + CSEMap.InsertNode(N, IP); + InsertNode(N); + + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1, SDValue N2, const SDNodeFlags Flags) { ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); @@ -5186,11 +5296,20 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (N2C && N2C->isNullValue()) return N1; break; + case ISD::MUL: + assert(VT.isInteger() && "This operator does not apply to FP types!"); + assert(N1.getValueType() == N2.getValueType() && + N1.getValueType() == VT && "Binary operator types must match!"); + if (N2C && (N1.getOpcode() == ISD::VSCALE) && Flags.hasNoSignedWrap()) { + APInt MulImm = cast<ConstantSDNode>(N1->getOperand(0))->getAPIntValue(); + APInt N2CImm = N2C->getAPIntValue(); + return getVScale(DL, VT, MulImm * N2CImm); + } + break; case ISD::UDIV: case ISD::UREM: case ISD::MULHU: case ISD::MULHS: - case ISD::MUL: case ISD::SDIV: case ISD::SREM: case ISD::SMIN: @@ -5213,7 +5332,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, assert(VT.isFloatingPoint() && "This operator only applies to FP types!"); assert(N1.getValueType() == N2.getValueType() && N1.getValueType() == VT && "Binary operator types must match!"); - if (SDValue V = simplifyFPBinop(Opcode, N1, N2)) + if (SDValue V = simplifyFPBinop(Opcode, N1, N2, Flags)) return V; break; case ISD::FCOPYSIGN: // N1 and result must match. N1/N2 need not match. @@ -5223,6 +5342,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, "Invalid FCOPYSIGN!"); break; case ISD::SHL: + if (N2C && (N1.getOpcode() == ISD::VSCALE) && Flags.hasNoSignedWrap()) { + APInt MulImm = cast<ConstantSDNode>(N1->getOperand(0))->getAPIntValue(); + APInt ShiftImm = N2C->getAPIntValue(); + return getVScale(DL, VT, MulImm << ShiftImm); + } + LLVM_FALLTHROUGH; case ISD::SRA: case ISD::SRL: if (SDValue V = simplifyShift(N1, N2)) @@ -5240,7 +5365,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, // amounts. This catches things like trying to shift an i1024 value by an // i8, which is easy to fall into in generic code that uses // TLI.getShiftAmount(). - assert(N2.getValueSizeInBits() >= Log2_32_Ceil(N1.getValueSizeInBits()) && + assert(N2.getValueType().getScalarSizeInBits().getFixedSize() >= + Log2_32_Ceil(VT.getScalarSizeInBits().getFixedSize()) && "Invalid use of small shift amount with oversized value!"); // Always fold shifts of i1 values so the code generator doesn't need to @@ -5281,7 +5407,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, "SIGN_EXTEND_INREG type should be vector iff the operand " "type is vector!"); assert((!EVT.isVector() || - EVT.getVectorNumElements() == VT.getVectorNumElements()) && + EVT.getVectorElementCount() == VT.getVectorElementCount()) && "Vector element counts must match in SIGN_EXTEND_INREG"); assert(EVT.bitsLE(VT) && "Not extending!"); if (EVT == VT) return N1; // Not actually extending @@ -5323,27 +5449,36 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (N1.isUndef() || N2.isUndef()) return getUNDEF(VT); - // EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF - if (N2C && N2C->getAPIntValue().uge(N1.getValueType().getVectorNumElements())) + // EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF for fixed length + // vectors. For scalable vectors we will provide appropriate support for + // dealing with arbitrary indices. + if (N2C && N1.getValueType().isFixedLengthVector() && + N2C->getAPIntValue().uge(N1.getValueType().getVectorNumElements())) return getUNDEF(VT); // EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is - // expanding copies of large vectors from registers. - if (N2C && - N1.getOpcode() == ISD::CONCAT_VECTORS && - N1.getNumOperands() > 0) { + // expanding copies of large vectors from registers. This only works for + // fixed length vectors, since we need to know the exact number of + // elements. + if (N2C && N1.getOperand(0).getValueType().isFixedLengthVector() && + N1.getOpcode() == ISD::CONCAT_VECTORS && N1.getNumOperands() > 0) { unsigned Factor = N1.getOperand(0).getValueType().getVectorNumElements(); return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(N2C->getZExtValue() / Factor), - getConstant(N2C->getZExtValue() % Factor, DL, - N2.getValueType())); + getVectorIdxConstant(N2C->getZExtValue() % Factor, DL)); } - // EXTRACT_VECTOR_ELT of BUILD_VECTOR is often formed while lowering is - // expanding large vector constants. - if (N2C && N1.getOpcode() == ISD::BUILD_VECTOR) { - SDValue Elt = N1.getOperand(N2C->getZExtValue()); + // EXTRACT_VECTOR_ELT of BUILD_VECTOR or SPLAT_VECTOR is often formed while + // lowering is expanding large vector constants. + if (N2C && (N1.getOpcode() == ISD::BUILD_VECTOR || + N1.getOpcode() == ISD::SPLAT_VECTOR)) { + assert((N1.getOpcode() != ISD::BUILD_VECTOR || + N1.getValueType().isFixedLengthVector()) && + "BUILD_VECTOR used for scalable vectors"); + unsigned Index = + N1.getOpcode() == ISD::BUILD_VECTOR ? N2C->getZExtValue() : 0; + SDValue Elt = N1.getOperand(Index); if (VT != Elt.getValueType()) // If the vector element type is not legal, the BUILD_VECTOR operands @@ -5377,8 +5512,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, // EXTRACT_VECTOR_ELT of v1iX EXTRACT_SUBVECTOR could be formed // when vector types are scalarized and v1iX is legal. - // vextract (v1iX extract_subvector(vNiX, Idx)) -> vextract(vNiX,Idx) + // vextract (v1iX extract_subvector(vNiX, Idx)) -> vextract(vNiX,Idx). + // Here we are completely ignoring the extract element index (N2), + // which is fine for fixed width vectors, since any index other than 0 + // is undefined anyway. However, this cannot be ignored for scalable + // vectors - in theory we could support this, but we don't want to do this + // without a profitability check. if (N1.getOpcode() == ISD::EXTRACT_SUBVECTOR && + N1.getValueType().isFixedLengthVector() && N1.getValueType().getVectorNumElements() == 1) { return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), N1.getOperand(1)); @@ -5406,50 +5547,48 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, } break; case ISD::EXTRACT_SUBVECTOR: - if (VT.isSimple() && N1.getValueType().isSimple()) { - assert(VT.isVector() && N1.getValueType().isVector() && - "Extract subvector VTs must be a vectors!"); - assert(VT.getVectorElementType() == - N1.getValueType().getVectorElementType() && - "Extract subvector VTs must have the same element type!"); - assert(VT.getSimpleVT() <= N1.getSimpleValueType() && - "Extract subvector must be from larger vector to smaller vector!"); - - if (N2C) { - assert((VT.getVectorNumElements() + N2C->getZExtValue() - <= N1.getValueType().getVectorNumElements()) - && "Extract subvector overflow!"); - } - - // Trivial extraction. - if (VT.getSimpleVT() == N1.getSimpleValueType()) - return N1; - - // EXTRACT_SUBVECTOR of an UNDEF is an UNDEF. - if (N1.isUndef()) - return getUNDEF(VT); + EVT N1VT = N1.getValueType(); + assert(VT.isVector() && N1VT.isVector() && + "Extract subvector VTs must be vectors!"); + assert(VT.getVectorElementType() == N1VT.getVectorElementType() && + "Extract subvector VTs must have the same element type!"); + assert((VT.isFixedLengthVector() || N1VT.isScalableVector()) && + "Cannot extract a scalable vector from a fixed length vector!"); + assert((VT.isScalableVector() != N1VT.isScalableVector() || + VT.getVectorMinNumElements() <= N1VT.getVectorMinNumElements()) && + "Extract subvector must be from larger vector to smaller vector!"); + assert(N2C && "Extract subvector index must be a constant"); + assert((VT.isScalableVector() != N1VT.isScalableVector() || + (VT.getVectorMinNumElements() + N2C->getZExtValue()) <= + N1VT.getVectorMinNumElements()) && + "Extract subvector overflow!"); + + // Trivial extraction. + if (VT == N1VT) + return N1; - // EXTRACT_SUBVECTOR of CONCAT_VECTOR can be simplified if the pieces of - // the concat have the same type as the extract. - if (N2C && N1.getOpcode() == ISD::CONCAT_VECTORS && - N1.getNumOperands() > 0 && - VT == N1.getOperand(0).getValueType()) { - unsigned Factor = VT.getVectorNumElements(); - return N1.getOperand(N2C->getZExtValue() / Factor); - } + // EXTRACT_SUBVECTOR of an UNDEF is an UNDEF. + if (N1.isUndef()) + return getUNDEF(VT); - // EXTRACT_SUBVECTOR of INSERT_SUBVECTOR is often created - // during shuffle legalization. - if (N1.getOpcode() == ISD::INSERT_SUBVECTOR && N2 == N1.getOperand(2) && - VT == N1.getOperand(1).getValueType()) - return N1.getOperand(1); + // EXTRACT_SUBVECTOR of CONCAT_VECTOR can be simplified if the pieces of + // the concat have the same type as the extract. + if (N2C && N1.getOpcode() == ISD::CONCAT_VECTORS && + N1.getNumOperands() > 0 && VT == N1.getOperand(0).getValueType()) { + unsigned Factor = VT.getVectorMinNumElements(); + return N1.getOperand(N2C->getZExtValue() / Factor); } + + // EXTRACT_SUBVECTOR of INSERT_SUBVECTOR is often created + // during shuffle legalization. + if (N1.getOpcode() == ISD::INSERT_SUBVECTOR && N2 == N1.getOperand(2) && + VT == N1.getOperand(1).getValueType()) + return N1.getOperand(1); break; } // Perform trivial constant folding. - if (SDValue SV = - FoldConstantArithmetic(Opcode, DL, VT, N1.getNode(), N2.getNode())) + if (SDValue SV = FoldConstantArithmetic(Opcode, DL, VT, {N1, N2})) return SV; if (SDValue V = foldConstantFPMath(Opcode, DL, VT, N1, N2)) @@ -5571,8 +5710,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, "SETCC operands must have the same type!"); assert(VT.isVector() == N1.getValueType().isVector() && "SETCC type should be vector iff the operand type is vector!"); - assert((!VT.isVector() || - VT.getVectorNumElements() == N1.getValueType().getVectorNumElements()) && + assert((!VT.isVector() || VT.getVectorElementCount() == + N1.getValueType().getVectorElementCount()) && "SETCC vector element counts must match!"); // Use FoldSetCC to simplify SETCC's. if (SDValue V = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL)) @@ -5594,8 +5733,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, llvm_unreachable("should use getVectorShuffle constructor!"); case ISD::INSERT_VECTOR_ELT: { ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3); - // INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF - if (N3C && N3C->getZExtValue() >= N1.getValueType().getVectorNumElements()) + // INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF, except + // for scalable vectors where we will generate appropriate code to + // deal with out-of-bounds cases correctly. + if (N3C && N1.getValueType().isFixedLengthVector() && + N3C->getZExtValue() >= N1.getValueType().getVectorNumElements()) return getUNDEF(VT); // Undefined index can be assumed out-of-bounds, so that's UNDEF too. @@ -5612,33 +5754,34 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, // Inserting undef into undef is still undef. if (N1.isUndef() && N2.isUndef()) return getUNDEF(VT); - SDValue Index = N3; - if (VT.isSimple() && N1.getValueType().isSimple() - && N2.getValueType().isSimple()) { - assert(VT.isVector() && N1.getValueType().isVector() && - N2.getValueType().isVector() && - "Insert subvector VTs must be a vectors"); - assert(VT == N1.getValueType() && - "Dest and insert subvector source types must match!"); - assert(N2.getSimpleValueType() <= N1.getSimpleValueType() && - "Insert subvector must be from smaller vector to larger vector!"); - if (isa<ConstantSDNode>(Index)) { - assert((N2.getValueType().getVectorNumElements() + - cast<ConstantSDNode>(Index)->getZExtValue() - <= VT.getVectorNumElements()) - && "Insert subvector overflow!"); - } - // Trivial insertion. - if (VT.getSimpleVT() == N2.getSimpleValueType()) - return N2; + EVT N2VT = N2.getValueType(); + assert(VT == N1.getValueType() && + "Dest and insert subvector source types must match!"); + assert(VT.isVector() && N2VT.isVector() && + "Insert subvector VTs must be vectors!"); + assert((VT.isScalableVector() || N2VT.isFixedLengthVector()) && + "Cannot insert a scalable vector into a fixed length vector!"); + assert((VT.isScalableVector() != N2VT.isScalableVector() || + VT.getVectorMinNumElements() >= N2VT.getVectorMinNumElements()) && + "Insert subvector must be from smaller vector to larger vector!"); + assert(isa<ConstantSDNode>(N3) && + "Insert subvector index must be constant"); + assert((VT.isScalableVector() != N2VT.isScalableVector() || + (N2VT.getVectorMinNumElements() + + cast<ConstantSDNode>(N3)->getZExtValue()) <= + VT.getVectorMinNumElements()) && + "Insert subvector overflow!"); + + // Trivial insertion. + if (VT == N2VT) + return N2; - // If this is an insert of an extracted vector into an undef vector, we - // can just use the input to the extract. - if (N1.isUndef() && N2.getOpcode() == ISD::EXTRACT_SUBVECTOR && - N2.getOperand(1) == N3 && N2.getOperand(0).getValueType() == VT) - return N2.getOperand(0); - } + // If this is an insert of an extracted vector into an undef vector, we + // can just use the input to the extract. + if (N1.isUndef() && N2.getOpcode() == ISD::EXTRACT_SUBVECTOR && + N2.getOperand(1) == N3 && N2.getOperand(0).getValueType() == VT) + return N2.getOperand(0); break; } case ISD::BITCAST: @@ -5867,7 +6010,7 @@ static void chainLoadsAndStoresForMemcpy(SelectionDAG &DAG, const SDLoc &dl, static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, - uint64_t Size, unsigned Alignment, + uint64_t Size, Align Alignment, bool isVol, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { @@ -5891,37 +6034,38 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI.isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; - unsigned SrcAlign = DAG.InferPtrAlignment(Src); - if (Alignment > SrcAlign) + MaybeAlign SrcAlign = DAG.InferPtrAlign(Src); + if (!SrcAlign || Alignment > *SrcAlign) SrcAlign = Alignment; + assert(SrcAlign && "SrcAlign must be set"); ConstantDataArraySlice Slice; bool CopyFromConstant = isMemSrcFromConstant(Src, Slice); bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr; unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize); - + const MemOp Op = isZeroConstant + ? MemOp::Set(Size, DstAlignCanChange, Alignment, + /*IsZeroMemset*/ true, isVol) + : MemOp::Copy(Size, DstAlignCanChange, Alignment, + *SrcAlign, isVol, CopyFromConstant); if (!TLI.findOptimalMemOpLowering( - MemOps, Limit, Size, (DstAlignCanChange ? 0 : Alignment), - (isZeroConstant ? 0 : SrcAlign), /*IsMemset=*/false, - /*ZeroMemset=*/false, /*MemcpyStrSrc=*/CopyFromConstant, - /*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(), + MemOps, Limit, Op, DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes())) return SDValue(); if (DstAlignCanChange) { Type *Ty = MemOps[0].getTypeForEVT(C); - unsigned NewAlign = (unsigned)DL.getABITypeAlignment(Ty); + Align NewAlign = DL.getABITypeAlign(Ty); // Don't promote to an alignment that would require dynamic stack // realignment. const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (!TRI->needsStackRealignment(MF)) - while (NewAlign > Alignment && - DL.exceedsNaturalStackAlignment(Align(NewAlign))) - NewAlign /= 2; + while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign)) + NewAlign = NewAlign / 2; if (NewAlign > Alignment) { // Give the stack frame object a larger alignment if needed. - if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign) + if (MFI.getObjectAlign(FI->getIndex()) < NewAlign) MFI.setObjectAlignment(FI->getIndex(), NewAlign); Alignment = NewAlign; } @@ -5968,7 +6112,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, if (Value.getNode()) { Store = DAG.getStore( Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl), - DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags); + DstPtrInfo.getWithOffset(DstOff), Alignment.value(), MMOFlags); OutChains.push_back(Store); } } @@ -5991,12 +6135,13 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain, DAG.getMemBasePlusOffset(Src, SrcOff, dl), SrcPtrInfo.getWithOffset(SrcOff), VT, - MinAlign(SrcAlign, SrcOff), SrcMMOFlags); + commonAlignment(*SrcAlign, SrcOff).value(), + SrcMMOFlags); OutLoadChains.push_back(Value.getValue(1)); Store = DAG.getTruncStore( Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl), - DstPtrInfo.getWithOffset(DstOff), VT, Alignment, MMOFlags); + DstPtrInfo.getWithOffset(DstOff), VT, Alignment.value(), MMOFlags); OutStoreChains.push_back(Store); } SrcOff += VTSize; @@ -6052,7 +6197,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, - uint64_t Size, unsigned Align, + uint64_t Size, Align Alignment, bool isVol, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { @@ -6074,29 +6219,27 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); if (FI && !MFI.isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; - unsigned SrcAlign = DAG.InferPtrAlignment(Src); - if (Align > SrcAlign) - SrcAlign = Align; + MaybeAlign SrcAlign = DAG.InferPtrAlign(Src); + if (!SrcAlign || Alignment > *SrcAlign) + SrcAlign = Alignment; + assert(SrcAlign && "SrcAlign must be set"); unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize); - // FIXME: `AllowOverlap` should really be `!isVol` but there is a bug in - // findOptimalMemOpLowering. Meanwhile, setting it to `false` produces the - // correct code. - bool AllowOverlap = false; if (!TLI.findOptimalMemOpLowering( - MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), SrcAlign, - /*IsMemset=*/false, /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false, - AllowOverlap, DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), + MemOps, Limit, + MemOp::Copy(Size, DstAlignCanChange, Alignment, *SrcAlign, + /*IsVolatile*/ true), + DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes())) return SDValue(); if (DstAlignCanChange) { Type *Ty = MemOps[0].getTypeForEVT(C); - unsigned NewAlign = (unsigned)DL.getABITypeAlignment(Ty); - if (NewAlign > Align) { + Align NewAlign = DL.getABITypeAlign(Ty); + if (NewAlign > Alignment) { // Give the stack frame object a larger alignment if needed. - if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign) + if (MFI.getObjectAlign(FI->getIndex()) < NewAlign) MFI.setObjectAlignment(FI->getIndex(), NewAlign); - Align = NewAlign; + Alignment = NewAlign; } } @@ -6118,9 +6261,9 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, if (isDereferenceable) SrcMMOFlags |= MachineMemOperand::MODereferenceable; - Value = - DAG.getLoad(VT, dl, Chain, DAG.getMemBasePlusOffset(Src, SrcOff, dl), - SrcPtrInfo.getWithOffset(SrcOff), SrcAlign, SrcMMOFlags); + Value = DAG.getLoad( + VT, dl, Chain, DAG.getMemBasePlusOffset(Src, SrcOff, dl), + SrcPtrInfo.getWithOffset(SrcOff), SrcAlign->value(), SrcMMOFlags); LoadValues.push_back(Value); LoadChains.push_back(Value.getValue(1)); SrcOff += VTSize; @@ -6132,9 +6275,9 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, unsigned VTSize = VT.getSizeInBits() / 8; SDValue Store; - Store = DAG.getStore(Chain, dl, LoadValues[i], - DAG.getMemBasePlusOffset(Dst, DstOff, dl), - DstPtrInfo.getWithOffset(DstOff), Align, MMOFlags); + Store = DAG.getStore( + Chain, dl, LoadValues[i], DAG.getMemBasePlusOffset(Dst, DstOff, dl), + DstPtrInfo.getWithOffset(DstOff), Alignment.value(), MMOFlags); OutChains.push_back(Store); DstOff += VTSize; } @@ -6151,7 +6294,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, /// \param Dst Pointer to destination memory location. /// \param Src Value of byte to write into the memory. /// \param Size Number of bytes to write. -/// \param Align Alignment of the destination in bytes. +/// \param Alignment Alignment of the destination in bytes. /// \param isVol True if destination is volatile. /// \param DstPtrInfo IR information on the memory pointer. /// \returns New head in the control flow, if lowering was successful, empty @@ -6162,7 +6305,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, /// memory size. static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, - uint64_t Size, unsigned Align, bool isVol, + uint64_t Size, Align Alignment, bool isVol, MachinePointerInfo DstPtrInfo) { // Turn a memset of undef to nop. // FIXME: We need to honor volatile even is Src is undef. @@ -6183,21 +6326,19 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, bool IsZeroVal = isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue(); if (!TLI.findOptimalMemOpLowering( - MemOps, TLI.getMaxStoresPerMemset(OptSize), Size, - (DstAlignCanChange ? 0 : Align), 0, /*IsMemset=*/true, - /*ZeroMemset=*/IsZeroVal, /*MemcpyStrSrc=*/false, - /*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(), ~0u, - MF.getFunction().getAttributes())) + MemOps, TLI.getMaxStoresPerMemset(OptSize), + MemOp::Set(Size, DstAlignCanChange, Alignment, IsZeroVal, isVol), + DstPtrInfo.getAddrSpace(), ~0u, MF.getFunction().getAttributes())) return SDValue(); if (DstAlignCanChange) { Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); - unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty); - if (NewAlign > Align) { + Align NewAlign = DAG.getDataLayout().getABITypeAlign(Ty); + if (NewAlign > Alignment) { // Give the stack frame object a larger alignment if needed. - if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign) + if (MFI.getObjectAlign(FI->getIndex()) < NewAlign) MFI.setObjectAlignment(FI->getIndex(), NewAlign); - Align = NewAlign; + Alignment = NewAlign; } } @@ -6235,7 +6376,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, assert(Value.getValueType() == VT && "Value with wrong type."); SDValue Store = DAG.getStore( Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl), - DstPtrInfo.getWithOffset(DstOff), Align, + DstPtrInfo.getWithOffset(DstOff), Alignment.value(), isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone); OutChains.push_back(Store); DstOff += VT.getSizeInBits() / 8; @@ -6256,12 +6397,10 @@ static void checkAddrSpaceIsValidForLibcall(const TargetLowering *TLI, } SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, - SDValue Src, SDValue Size, unsigned Align, + SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { - assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); - // Check to see if we should lower the memcpy to loads and stores first. // For cases within the target-specified limits, this is the best choice. ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); @@ -6270,9 +6409,9 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, if (ConstantSize->isNullValue()) return Chain; - SDValue Result = getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src, - ConstantSize->getZExtValue(),Align, - isVol, false, DstPtrInfo, SrcPtrInfo); + SDValue Result = getMemcpyLoadsAndStores( + *this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment, + isVol, false, DstPtrInfo, SrcPtrInfo); if (Result.getNode()) return Result; } @@ -6281,7 +6420,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, // code. If the target chooses to do this, this is the next best. if (TSI) { SDValue Result = TSI->EmitTargetCodeForMemcpy( - *this, dl, Chain, Dst, Src, Size, Align, isVol, AlwaysInline, + *this, dl, Chain, Dst, Src, Size, Alignment, isVol, AlwaysInline, DstPtrInfo, SrcPtrInfo); if (Result.getNode()) return Result; @@ -6292,8 +6431,8 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, if (AlwaysInline) { assert(ConstantSize && "AlwaysInline requires a constant size!"); return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src, - ConstantSize->getZExtValue(), Align, isVol, - true, DstPtrInfo, SrcPtrInfo); + ConstantSize->getZExtValue(), Alignment, + isVol, true, DstPtrInfo, SrcPtrInfo); } checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); @@ -6372,12 +6511,10 @@ SDValue SelectionDAG::getAtomicMemcpy(SDValue Chain, const SDLoc &dl, } SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, - SDValue Src, SDValue Size, unsigned Align, + SDValue Src, SDValue Size, Align Alignment, bool isVol, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { - assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); - // Check to see if we should lower the memmove to loads and stores first. // For cases within the target-specified limits, this is the best choice. ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); @@ -6386,10 +6523,9 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, if (ConstantSize->isNullValue()) return Chain; - SDValue Result = - getMemmoveLoadsAndStores(*this, dl, Chain, Dst, Src, - ConstantSize->getZExtValue(), Align, isVol, - false, DstPtrInfo, SrcPtrInfo); + SDValue Result = getMemmoveLoadsAndStores( + *this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment, + isVol, false, DstPtrInfo, SrcPtrInfo); if (Result.getNode()) return Result; } @@ -6397,8 +6533,9 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, // Then check to see if we should lower the memmove with target-specific // code. If the target chooses to do this, this is the next best. if (TSI) { - SDValue Result = TSI->EmitTargetCodeForMemmove( - *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo, SrcPtrInfo); + SDValue Result = + TSI->EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, + Alignment, isVol, DstPtrInfo, SrcPtrInfo); if (Result.getNode()) return Result; } @@ -6476,11 +6613,9 @@ SDValue SelectionDAG::getAtomicMemmove(SDValue Chain, const SDLoc &dl, } SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, - SDValue Src, SDValue Size, unsigned Align, + SDValue Src, SDValue Size, Align Alignment, bool isVol, bool isTailCall, MachinePointerInfo DstPtrInfo) { - assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); - // Check to see if we should lower the memset to stores first. // For cases within the target-specified limits, this is the best choice. ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); @@ -6489,9 +6624,9 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, if (ConstantSize->isNullValue()) return Chain; - SDValue Result = - getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), - Align, isVol, DstPtrInfo); + SDValue Result = getMemsetStores(*this, dl, Chain, Dst, Src, + ConstantSize->getZExtValue(), Alignment, + isVol, DstPtrInfo); if (Result.getNode()) return Result; @@ -6501,7 +6636,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, // code. If the target chooses to do this, this is the next best. if (TSI) { SDValue Result = TSI->EmitTargetCodeForMemset( - *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo); + *this, dl, Chain, Dst, Src, Size, Alignment, isVol, DstPtrInfo); if (Result.getNode()) return Result; } @@ -6662,11 +6797,8 @@ SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, const SDLoc &dl) { SDValue SelectionDAG::getMemIntrinsicNode( unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue> Ops, - EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align, + EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags, uint64_t Size, const AAMDNodes &AAInfo) { - if (Align == 0) // Ensure that codegen never sees alignment 0 - Align = getEVTAlignment(MemVT); - if (!Size && MemVT.isScalableVector()) Size = MemoryLocation::UnknownSize; else if (!Size) @@ -6674,7 +6806,7 @@ SDValue SelectionDAG::getMemIntrinsicNode( MachineFunction &MF = getMachineFunction(); MachineMemOperand *MMO = - MF.getMachineMemOperand(PtrInfo, Flags, Size, Align, AAInfo); + MF.getMachineMemOperand(PtrInfo, Flags, Size, Alignment, AAInfo); return getMemIntrinsicNode(Opcode, dl, VTList, Ops, MemVT, MMO); } @@ -6686,8 +6818,6 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, assert((Opcode == ISD::INTRINSIC_VOID || Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::PREFETCH || - Opcode == ISD::LIFETIME_START || - Opcode == ISD::LIFETIME_END || ((int)Opcode <= std::numeric_limits<int>::max() && (int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) && "Opcode is not a memory-accessing opcode!"); @@ -6795,13 +6925,11 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, SDValue Offset, MachinePointerInfo PtrInfo, EVT MemVT, - unsigned Alignment, + Align Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, const MDNode *Ranges) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); - if (Alignment == 0) // Ensure that codegen never sees alignment 0 - Alignment = getEVTAlignment(MemVT); MMOFlags |= MachineMemOperand::MOLoad; assert((MMOFlags & MachineMemOperand::MOStore) == 0); @@ -6810,9 +6938,10 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, if (PtrInfo.V.isNull()) PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset); + uint64_t Size = MemoryLocation::getSizeOrUnknown(MemVT.getStoreSize()); MachineFunction &MF = getMachineFunction(); - MachineMemOperand *MMO = MF.getMachineMemOperand( - PtrInfo, MMOFlags, MemVT.getStoreSize(), Alignment, AAInfo, Ranges); + MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, + Alignment, AAInfo, Ranges); return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO); } @@ -6867,7 +6996,7 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, SDValue SelectionDAG::getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, - unsigned Alignment, + MaybeAlign Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, const MDNode *Ranges) { SDValue Undef = getUNDEF(Ptr.getValueType()); @@ -6885,7 +7014,7 @@ SDValue SelectionDAG::getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, - unsigned Alignment, + MaybeAlign Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo) { SDValue Undef = getUNDEF(Ptr.getValueType()); @@ -6918,12 +7047,10 @@ SDValue SelectionDAG::getIndexedLoad(SDValue OrigLoad, const SDLoc &dl, SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, - unsigned Alignment, + Align Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); - if (Alignment == 0) // Ensure that codegen never sees alignment 0 - Alignment = getEVTAlignment(Val.getValueType()); MMOFlags |= MachineMemOperand::MOStore; assert((MMOFlags & MachineMemOperand::MOLoad) == 0); @@ -6932,8 +7059,10 @@ SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val, PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr); MachineFunction &MF = getMachineFunction(); - MachineMemOperand *MMO = MF.getMachineMemOperand( - PtrInfo, MMOFlags, Val.getValueType().getStoreSize(), Alignment, AAInfo); + uint64_t Size = + MemoryLocation::getSizeOrUnknown(Val.getValueType().getStoreSize()); + MachineMemOperand *MMO = + MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo); return getStore(Chain, dl, Val, Ptr, MMO); } @@ -6969,13 +7098,11 @@ SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, - EVT SVT, unsigned Alignment, + EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); - if (Alignment == 0) // Ensure that codegen never sees alignment 0 - Alignment = getEVTAlignment(SVT); MMOFlags |= MachineMemOperand::MOStore; assert((MMOFlags & MachineMemOperand::MOLoad) == 0); @@ -7288,9 +7415,24 @@ SDValue SelectionDAG::simplifyShift(SDValue X, SDValue Y) { return SDValue(); } -// TODO: Use fast-math-flags to enable more simplifications. -SDValue SelectionDAG::simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y) { +SDValue SelectionDAG::simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y, + SDNodeFlags Flags) { + // If this operation has 'nnan' or 'ninf' and at least 1 disallowed operand + // (an undef operand can be chosen to be Nan/Inf), then the result of this + // operation is poison. That result can be relaxed to undef. + ConstantFPSDNode *XC = isConstOrConstSplatFP(X, /* AllowUndefs */ true); ConstantFPSDNode *YC = isConstOrConstSplatFP(Y, /* AllowUndefs */ true); + bool HasNan = (XC && XC->getValueAPF().isNaN()) || + (YC && YC->getValueAPF().isNaN()); + bool HasInf = (XC && XC->getValueAPF().isInfinity()) || + (YC && YC->getValueAPF().isInfinity()); + + if (Flags.hasNoNaNs() && (HasNan || X.isUndef() || Y.isUndef())) + return getUNDEF(X.getValueType()); + + if (Flags.hasNoInfs() && (HasInf || X.isUndef() || Y.isUndef())) + return getUNDEF(X.getValueType()); + if (!YC) return SDValue(); @@ -7394,6 +7536,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, createOperands(N, Ops); } + N->setFlags(Flags); InsertNode(N); SDValue V(N, 0); NewSDValueDbgMsg(V, "Creating new node: ", this); @@ -7406,7 +7549,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, } SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, - ArrayRef<SDValue> Ops) { + ArrayRef<SDValue> Ops, const SDNodeFlags Flags) { if (VTList.NumVTs == 1) return getNode(Opcode, DL, VTList.VTs[0], Ops); @@ -7481,6 +7624,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTList); createOperands(N, Ops); } + + N->setFlags(Flags); InsertNode(N); SDValue V(N, 0); NewSDValueDbgMsg(V, "Creating new node: ", this); @@ -7919,7 +8064,7 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) { switch (OrigOpc) { default: llvm_unreachable("mutateStrictFPToFP called with unexpected opcode!"); -#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ +#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ case ISD::STRICT_##DAGN: NewOpc = ISD::DAGN; break; #define CMP_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ case ISD::STRICT_##DAGN: NewOpc = ISD::SETCC; break; @@ -9196,9 +9341,8 @@ SelectionDAG::matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp, if (!TLI->isExtractSubvectorCheap(SubVT, OpVT, 0)) return SDValue(); BinOp = (ISD::NodeType)CandidateBinOp; - return getNode( - ISD::EXTRACT_SUBVECTOR, SDLoc(Op), SubVT, Op, - getConstant(0, SDLoc(Op), TLI->getVectorIdxTy(getDataLayout()))); + return getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Op), SubVT, Op, + getVectorIdxConstant(0, SDLoc(Op))); }; // At each stage, we're looking for something that looks like: @@ -9246,6 +9390,28 @@ SelectionDAG::matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp, PrevOp = Op; } + // Handle subvector reductions, which tend to appear after the shuffle + // reduction stages. + while (Op.getOpcode() == CandidateBinOp) { + unsigned NumElts = Op.getValueType().getVectorNumElements(); + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + if (Op0.getOpcode() != ISD::EXTRACT_SUBVECTOR || + Op1.getOpcode() != ISD::EXTRACT_SUBVECTOR || + Op0.getOperand(0) != Op1.getOperand(0)) + break; + SDValue Src = Op0.getOperand(0); + unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); + if (NumSrcElts != (2 * NumElts)) + break; + if (!(Op0.getConstantOperandAPInt(1) == 0 && + Op1.getConstantOperandAPInt(1) == NumElts) && + !(Op1.getConstantOperandAPInt(1) == 0 && + Op0.getConstantOperandAPInt(1) == NumElts)) + break; + Op = Src; + } + BinOp = (ISD::NodeType)CandidateBinOp; return Op; } @@ -9276,9 +9442,8 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { if (OperandVT.isVector()) { // A vector operand; extract a single element. EVT OperandEltVT = OperandVT.getVectorElementType(); - Operands[j] = - getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, Operand, - getConstant(i, dl, TLI->getVectorIdxTy(getDataLayout()))); + Operands[j] = getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, + Operand, getVectorIdxConstant(i, dl)); } else { // A scalar operand; just use it as is. Operands[j] = Operand; @@ -9395,9 +9560,9 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD, return false; } -/// InferPtrAlignment - Infer alignment of a load / store address. Return 0 if -/// it cannot be inferred. -unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { +/// InferPtrAlignment - Infer alignment of a load / store address. Return None +/// if it cannot be inferred. +MaybeAlign SelectionDAG::InferPtrAlign(SDValue Ptr) const { // If this is a GlobalAddress + cst, return the alignment. const GlobalValue *GV = nullptr; int64_t GVOffset = 0; @@ -9406,9 +9571,8 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { KnownBits Known(PtrWidth); llvm::computeKnownBits(GV, Known, getDataLayout()); unsigned AlignBits = Known.countMinTrailingZeros(); - unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0; - if (Align) - return MinAlign(Align, GVOffset); + if (AlignBits) + return commonAlignment(Align(1ull << std::min(31U, AlignBits)), GVOffset); } // If this is a direct reference to a stack slot, use information about the @@ -9426,12 +9590,10 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { if (FrameIdx != INT_MIN) { const MachineFrameInfo &MFI = getMachineFunction().getFrameInfo(); - unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx), - FrameOffset); - return FIInfoAlign; + return commonAlignment(MFI.getObjectAlign(FrameIdx), FrameOffset); } - return 0; + return None; } /// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type @@ -9447,20 +9609,58 @@ std::pair<EVT, EVT> SelectionDAG::GetSplitDestVTs(const EVT &VT) const { return std::make_pair(LoVT, HiVT); } +/// GetDependentSplitDestVTs - Compute the VTs needed for the low/hi parts of a +/// type, dependent on an enveloping VT that has been split into two identical +/// pieces. Sets the HiIsEmpty flag when hi type has zero storage size. +std::pair<EVT, EVT> +SelectionDAG::GetDependentSplitDestVTs(const EVT &VT, const EVT &EnvVT, + bool *HiIsEmpty) const { + EVT EltTp = VT.getVectorElementType(); + bool IsScalable = VT.isScalableVector(); + // Examples: + // custom VL=8 with enveloping VL=8/8 yields 8/0 (hi empty) + // custom VL=9 with enveloping VL=8/8 yields 8/1 + // custom VL=10 with enveloping VL=8/8 yields 8/2 + // etc. + unsigned VTNumElts = VT.getVectorNumElements(); + unsigned EnvNumElts = EnvVT.getVectorNumElements(); + EVT LoVT, HiVT; + if (VTNumElts > EnvNumElts) { + LoVT = EnvVT; + HiVT = EVT::getVectorVT(*getContext(), EltTp, VTNumElts - EnvNumElts, + IsScalable); + *HiIsEmpty = false; + } else { + // Flag that hi type has zero storage size, but return split envelop type + // (this would be easier if vector types with zero elements were allowed). + LoVT = EVT::getVectorVT(*getContext(), EltTp, VTNumElts, IsScalable); + HiVT = EnvVT; + *HiIsEmpty = true; + } + return std::make_pair(LoVT, HiVT); +} + /// SplitVector - Split the vector with EXTRACT_SUBVECTOR and return the /// low/high part. std::pair<SDValue, SDValue> SelectionDAG::SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT) { - assert(LoVT.getVectorNumElements() + HiVT.getVectorNumElements() <= - N.getValueType().getVectorNumElements() && + assert(LoVT.isScalableVector() == HiVT.isScalableVector() && + LoVT.isScalableVector() == N.getValueType().isScalableVector() && + "Splitting vector with an invalid mixture of fixed and scalable " + "vector types"); + assert(LoVT.getVectorMinNumElements() + HiVT.getVectorMinNumElements() <= + N.getValueType().getVectorMinNumElements() && "More vector elements requested than available!"); SDValue Lo, Hi; - Lo = getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, N, - getConstant(0, DL, TLI->getVectorIdxTy(getDataLayout()))); + Lo = + getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, N, getVectorIdxConstant(0, DL)); + // For scalable vectors it is safe to use LoVT.getVectorMinNumElements() + // (rather than having to use ElementCount), because EXTRACT_SUBVECTOR scales + // IDX with the runtime scaling factor of the result vector type. For + // fixed-width result vectors, that runtime scaling factor is 1. Hi = getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, N, - getConstant(LoVT.getVectorNumElements(), DL, - TLI->getVectorIdxTy(getDataLayout()))); + getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL)); return std::make_pair(Lo, Hi); } @@ -9470,22 +9670,22 @@ SDValue SelectionDAG::WidenVector(const SDValue &N, const SDLoc &DL) { EVT WideVT = EVT::getVectorVT(*getContext(), VT.getVectorElementType(), NextPowerOf2(VT.getVectorNumElements())); return getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, getUNDEF(WideVT), N, - getConstant(0, DL, TLI->getVectorIdxTy(getDataLayout()))); + getVectorIdxConstant(0, DL)); } void SelectionDAG::ExtractVectorElements(SDValue Op, SmallVectorImpl<SDValue> &Args, - unsigned Start, unsigned Count) { + unsigned Start, unsigned Count, + EVT EltVT) { EVT VT = Op.getValueType(); if (Count == 0) Count = VT.getVectorNumElements(); - - EVT EltVT = VT.getVectorElementType(); - EVT IdxTy = TLI->getVectorIdxTy(getDataLayout()); + if (EltVT == EVT()) + EltVT = VT.getVectorElementType(); SDLoc SL(Op); for (unsigned i = Start, e = Start + Count; i != e; ++i) { - Args.push_back(getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, - Op, getConstant(i, SL, IdxTy))); + Args.push_back(getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, Op, + getVectorIdxConstant(i, SL))); } } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 728d963a916f5..1d596c89c9113 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -69,7 +69,6 @@ #include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constant.h" #include "llvm/IR/ConstantRange.h" @@ -136,6 +135,11 @@ using namespace SwitchCG; /// some float libcalls (6, 8 or 12 bits). static unsigned LimitFloatPrecision; +static cl::opt<bool> + InsertAssertAlign("insert-assert-align", cl::init(true), + cl::desc("Insert the experimental `assertalign` node."), + cl::ReallyHidden); + static cl::opt<unsigned, true> LimitFPPrecision("limit-float-precision", cl::desc("Generate low-precision inline sequences " @@ -206,12 +210,17 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, MVT PartVT, EVT ValueVT, const Value *V, Optional<CallingConv::ID> CC = None, Optional<ISD::NodeType> AssertOp = None) { + // Let the target assemble the parts if it wants to + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (SDValue Val = TLI.joinRegisterPartsIntoValue(DAG, DL, Parts, NumParts, + PartVT, ValueVT, CC)) + return Val; + if (ValueVT.isVector()) return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT, V, CC); assert(NumParts > 0 && "No parts to assemble!"); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Val = Parts[0]; if (NumParts > 1) { @@ -347,7 +356,7 @@ static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V, const char *AsmError = ", possible invalid constraint for vector type"; if (const CallInst *CI = dyn_cast<CallInst>(I)) - if (isa<InlineAsm>(CI->getCalledValue())) + if (CI->isInlineAsm()) return Ctx.emitError(I, ErrMsg + AsmError); return Ctx.emitError(I, ErrMsg); @@ -415,10 +424,13 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the // intermediate operands. EVT BuiltVectorTy = - EVT::getVectorVT(*DAG.getContext(), IntermediateVT.getScalarType(), - (IntermediateVT.isVector() - ? IntermediateVT.getVectorNumElements() * NumParts - : NumIntermediates)); + IntermediateVT.isVector() + ? EVT::getVectorVT( + *DAG.getContext(), IntermediateVT.getScalarType(), + IntermediateVT.getVectorElementCount() * NumParts) + : EVT::getVectorVT(*DAG.getContext(), + IntermediateVT.getScalarType(), + NumIntermediates); Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, DL, BuiltVectorTy, Ops); @@ -436,18 +448,20 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the // elements we want. if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) { - assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() && + assert((PartEVT.getVectorElementCount().Min > + ValueVT.getVectorElementCount().Min) && + (PartEVT.getVectorElementCount().Scalable == + ValueVT.getVectorElementCount().Scalable) && "Cannot narrow, it would be a lossy transformation"); - return DAG.getNode( - ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, - DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, + DAG.getVectorIdxConstant(0, DL)); } // Vector/Vector bitcast. if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); - assert(PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements() && + assert(PartEVT.getVectorElementCount() == ValueVT.getVectorElementCount() && "Cannot handle this kind of promotion"); // Promoted vector extract return DAG.getAnyExtOrTrunc(Val, DL, ValueVT); @@ -472,9 +486,8 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, EVT WiderVecType = EVT::getVectorVT(*DAG.getContext(), ValueVT.getVectorElementType(), Elts); Val = DAG.getBitcast(WiderVecType, Val); - return DAG.getNode( - ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, - DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, + DAG.getVectorIdxConstant(0, DL)); } diagnosePossiblyInvalidConstraint( @@ -484,9 +497,14 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, // Handle cases such as i8 -> <1 x i1> EVT ValueSVT = ValueVT.getVectorElementType(); - if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT) - Val = ValueVT.isFloatingPoint() ? DAG.getFPExtendOrRound(Val, DL, ValueSVT) - : DAG.getAnyExtOrTrunc(Val, DL, ValueSVT); + if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT) { + if (ValueSVT.getSizeInBits() == PartEVT.getSizeInBits()) + Val = DAG.getNode(ISD::BITCAST, DL, ValueSVT, Val); + else + Val = ValueVT.isFloatingPoint() + ? DAG.getFPExtendOrRound(Val, DL, ValueSVT) + : DAG.getAnyExtOrTrunc(Val, DL, ValueSVT); + } return DAG.getBuildVector(ValueVT, DL, Val); } @@ -504,6 +522,11 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, const Value *V, Optional<CallingConv::ID> CallConv = None, ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { + // Let the target split the parts if it wants to + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (TLI.splitValueIntoRegisterParts(DAG, DL, Val, Parts, NumParts, PartVT, + CallConv)) + return; EVT ValueVT = Val.getValueType(); // Handle the vector case separately. @@ -633,7 +656,7 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, static SDValue widenVectorToPartType(SelectionDAG &DAG, SDValue Val, const SDLoc &DL, EVT PartVT) { - if (!PartVT.isVector()) + if (!PartVT.isFixedLengthVector()) return SDValue(); EVT ValueVT = Val.getValueType(); @@ -679,16 +702,16 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, Val = Widened; } else if (PartVT.isVector() && PartEVT.getVectorElementType().bitsGE( - ValueVT.getVectorElementType()) && - PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements()) { + ValueVT.getVectorElementType()) && + PartEVT.getVectorElementCount() == + ValueVT.getVectorElementCount()) { // Promoted vector extract Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); } else { if (ValueVT.getVectorNumElements() == 1) { - Val = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, - DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, + DAG.getVectorIdxConstant(0, DL)); } else { assert(PartVT.getSizeInBits() > ValueVT.getSizeInBits() && "lossy conversion of vector to scalar type"); @@ -723,15 +746,18 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, NumParts = NumRegs; // Silence a compiler warning. assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); - unsigned IntermediateNumElts = IntermediateVT.isVector() ? - IntermediateVT.getVectorNumElements() : 1; + assert(IntermediateVT.isScalableVector() == ValueVT.isScalableVector() && + "Mixing scalable and fixed vectors when copying in parts"); - // Convert the vector to the appropriate type if necessary. - unsigned DestVectorNoElts = NumIntermediates * IntermediateNumElts; + ElementCount DestEltCnt; + + if (IntermediateVT.isVector()) + DestEltCnt = IntermediateVT.getVectorElementCount() * NumIntermediates; + else + DestEltCnt = ElementCount(NumIntermediates, false); EVT BuiltVectorTy = EVT::getVectorVT( - *DAG.getContext(), IntermediateVT.getScalarType(), DestVectorNoElts); - MVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout()); + *DAG.getContext(), IntermediateVT.getScalarType(), DestEltCnt); if (ValueVT != BuiltVectorTy) { if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, BuiltVectorTy)) Val = Widened; @@ -743,12 +769,15 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, SmallVector<SDValue, 8> Ops(NumIntermediates); for (unsigned i = 0; i != NumIntermediates; ++i) { if (IntermediateVT.isVector()) { - Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val, - DAG.getConstant(i * IntermediateNumElts, DL, IdxVT)); + // This does something sensible for scalable vectors - see the + // definition of EXTRACT_SUBVECTOR for further details. + unsigned IntermediateNumElts = IntermediateVT.getVectorMinNumElements(); + Ops[i] = + DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val, + DAG.getVectorIdxConstant(i * IntermediateNumElts, DL)); } else { - Ops[i] = DAG.getNode( - ISD::EXTRACT_VECTOR_ELT, DL, IntermediateVT, Val, - DAG.getConstant(i, DL, IdxVT)); + Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntermediateVT, Val, + DAG.getVectorIdxConstant(i, DL)); } } @@ -1112,32 +1141,26 @@ void SelectionDAGBuilder::visit(const Instruction &I) { visit(I.getOpcode(), I); if (auto *FPMO = dyn_cast<FPMathOperator>(&I)) { - // Propagate the fast-math-flags of this IR instruction to the DAG node that - // maps to this instruction. - // TODO: We could handle all flags (nsw, etc) here. - // TODO: If an IR instruction maps to >1 node, only the final node will have - // flags set. - if (SDNode *Node = getNodeForIRValue(&I)) { - SDNodeFlags IncomingFlags; - IncomingFlags.copyFMF(*FPMO); - if (!Node->getFlags().isDefined()) - Node->setFlags(IncomingFlags); - else - Node->intersectFlagsWith(IncomingFlags); - } - } - // Constrained FP intrinsics with fpexcept.ignore should also get - // the NoFPExcept flag. - if (auto *FPI = dyn_cast<ConstrainedFPIntrinsic>(&I)) - if (FPI->getExceptionBehavior() == fp::ExceptionBehavior::ebIgnore) + // ConstrainedFPIntrinsics handle their own FMF. + if (!isa<ConstrainedFPIntrinsic>(&I)) { + // Propagate the fast-math-flags of this IR instruction to the DAG node that + // maps to this instruction. + // TODO: We could handle all flags (nsw, etc) here. + // TODO: If an IR instruction maps to >1 node, only the final node will have + // flags set. if (SDNode *Node = getNodeForIRValue(&I)) { - SDNodeFlags Flags = Node->getFlags(); - Flags.setNoFPExcept(true); - Node->setFlags(Flags); + SDNodeFlags IncomingFlags; + IncomingFlags.copyFMF(*FPMO); + if (!Node->getFlags().isDefined()) + Node->setFlags(IncomingFlags); + else + Node->intersectFlagsWith(IncomingFlags); } + } + } if (!I.isTerminator() && !HasTailCall && - !isStatepoint(&I)) // statepoints handle their exports internally + !isa<GCStatepointInst>(I)) // statepoints handle their exports internally CopyToExportRegsIfNeeded(&I); CurInst = nullptr; @@ -1399,11 +1422,11 @@ void SelectionDAGBuilder::resolveOrClearDbgInfo() { /// getCopyFromRegs - If there was virtual register allocated for the value V /// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise. SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) { - DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V); + DenseMap<const Value *, Register>::iterator It = FuncInfo.ValueMap.find(V); SDValue Result; if (It != FuncInfo.ValueMap.end()) { - unsigned InReg = It->second; + Register InReg = It->second; RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), DAG.getDataLayout(), InReg, Ty, @@ -1437,12 +1460,6 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { return Val; } -// Return true if SDValue exists for the given Value -bool SelectionDAGBuilder::findValue(const Value *V) const { - return (NodeMap.find(V) != NodeMap.end()) || - (FuncInfo.ValueMap.find(V) != FuncInfo.ValueMap.end()); -} - /// getNonRegisterValue - Return an SDValue for the given Value, but /// don't look in FuncInfo.ValueMap for a virtual register. SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) { @@ -1486,6 +1503,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { TLI.getPointerTy(DAG.getDataLayout(), AS)); } + if (match(C, m_VScale(DAG.getDataLayout()))) + return DAG.getVScale(getCurSDLoc(), VT, APInt(VT.getSizeInBits(), 1)); + if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) return DAG.getConstantFP(*CFP, getCurSDLoc(), VT); @@ -1558,16 +1578,17 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { return DAG.getBlockAddress(BA, VT); VectorType *VecTy = cast<VectorType>(V->getType()); - unsigned NumElements = VecTy->getNumElements(); // Now that we know the number and type of the elements, get that number of // elements into the Ops array based on what kind of constant it is. - SmallVector<SDValue, 16> Ops; if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) { + SmallVector<SDValue, 16> Ops; + unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements(); for (unsigned i = 0; i != NumElements; ++i) Ops.push_back(getValue(CV->getOperand(i))); - } else { - assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!"); + + return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops); + } else if (isa<ConstantAggregateZero>(C)) { EVT EltVT = TLI.getValueType(DAG.getDataLayout(), VecTy->getElementType()); @@ -1576,11 +1597,16 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { Op = DAG.getConstantFP(0, getCurSDLoc(), EltVT); else Op = DAG.getConstant(0, getCurSDLoc(), EltVT); - Ops.assign(NumElements, Op); - } - // Create a BUILD_VECTOR node. - return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops); + if (isa<ScalableVectorType>(VecTy)) + return NodeMap[V] = DAG.getSplatVector(VT, getCurSDLoc(), Op); + else { + SmallVector<SDValue, 16> Ops; + Ops.assign(cast<FixedVectorType>(VecTy)->getNumElements(), Op); + return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops); + } + } + llvm_unreachable("Unknown vector constant"); } // If this is a static alloca, generate it as the frameindex instead of @@ -1603,6 +1629,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); } + if (const MetadataAsValue *MD = dyn_cast<MetadataAsValue>(V)) { + return DAG.getMDNode(cast<MDNode>(MD->getMetadata())); + } llvm_unreachable("Can't get register for value!"); } @@ -1611,17 +1640,12 @@ void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) { bool IsMSVCCXX = Pers == EHPersonality::MSVC_CXX; bool IsCoreCLR = Pers == EHPersonality::CoreCLR; bool IsSEH = isAsynchronousEHPersonality(Pers); - bool IsWasmCXX = Pers == EHPersonality::Wasm_CXX; MachineBasicBlock *CatchPadMBB = FuncInfo.MBB; if (!IsSEH) CatchPadMBB->setIsEHScopeEntry(); // In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues. if (IsMSVCCXX || IsCoreCLR) CatchPadMBB->setIsEHFuncletEntry(); - // Wasm does not need catchpads anymore - if (!IsWasmCXX) - DAG.setRoot(DAG.getNode(ISD::CATCHPAD, getCurSDLoc(), MVT::Other, - getControlRoot())); } void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) { @@ -1835,6 +1859,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { unsigned NumValues = ValueVTs.size(); SmallVector<SDValue, 4> Chains(NumValues); + Align BaseAlign = DL.getPrefTypeAlign(I.getOperand(0)->getType()); for (unsigned i = 0; i != NumValues; ++i) { // An aggregate return value cannot wrap around the address space, so // offsets to its parts don't wrap either. @@ -1843,9 +1868,11 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { SDValue Val = RetOp.getValue(RetOp.getResNo() + i); if (MemVTs[i] != ValueVTs[i]) Val = DAG.getPtrExtOrTrunc(Val, getCurSDLoc(), MemVTs[i]); - Chains[i] = DAG.getStore(Chain, getCurSDLoc(), Val, + Chains[i] = DAG.getStore( + Chain, getCurSDLoc(), Val, // FIXME: better loc info would be nice. - Ptr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction())); + Ptr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), + commonAlignment(BaseAlign, Offsets[i])); } Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), @@ -1964,7 +1991,7 @@ void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) { if (V->getType()->isEmptyTy()) return; - DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V); + DenseMap<const Value *, Register>::iterator VMI = FuncInfo.ValueMap.find(V); if (VMI != FuncInfo.ValueMap.end()) { assert(!V->use_empty() && "Unused value assigned virtual registers!"); CopyValueToVirtualRegister(V, VMI->second); @@ -2277,7 +2304,9 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // If this is a series of conditions that are or'd or and'd together, emit // this as a sequence of branches instead of setcc's with and/or operations. - // As long as jumps are not expensive, this should improve performance. + // As long as jumps are not expensive (exceptions for multi-use logic ops, + // unpredictable branches, and vector extracts because those jumps are likely + // expensive for any target), this should improve performance. // For example, instead of something like: // cmp A, B // C = seteq @@ -2292,9 +2321,12 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // jle foo if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) { Instruction::BinaryOps Opcode = BOp->getOpcode(); + Value *Vec, *BOp0 = BOp->getOperand(0), *BOp1 = BOp->getOperand(1); if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() && !I.hasMetadata(LLVMContext::MD_unpredictable) && - (Opcode == Instruction::And || Opcode == Instruction::Or)) { + (Opcode == Instruction::And || Opcode == Instruction::Or) && + !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) && + match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) { FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, Opcode, getEdgeProbability(BrMBB, Succ0MBB), @@ -2516,7 +2548,7 @@ static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL, auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable; MachineMemOperand *MemRef = MF.getMachineMemOperand( - MPInfo, Flags, PtrTy.getSizeInBits() / 8, DAG.getEVTAlignment(PtrTy)); + MPInfo, Flags, PtrTy.getSizeInBits() / 8, DAG.getEVTAlign(PtrTy)); DAG.setNodeMemRefs(Node, {MemRef}); } if (PtrTy != PtrMemTy) @@ -2597,17 +2629,13 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, MachineMemOperand::MOVolatile); } - // Perform the comparison via a subtract/getsetcc. - EVT VT = Guard.getValueType(); - SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, GuardVal); - + // Perform the comparison via a getsetcc. SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), - Sub.getValueType()), - Sub, DAG.getConstant(0, dl, VT), ISD::SETNE); + Guard.getValueType()), + Guard, GuardVal, ISD::SETNE); - // If the sub is not 0, then we know the guard/stackslot do not equal, so - // branch to failure MBB. + // If the guard/stackslot do not equal, branch to failure MBB. SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, MVT::Other, GuardVal.getOperand(0), Cmp, DAG.getBasicBlock(SPD.getFailureMBB())); @@ -2640,6 +2668,11 @@ SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) { // Passing 'true' for doesNotReturn above won't generate the trap for us. if (TM.getTargetTriple().isPS4CPU()) Chain = DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, Chain); + // WebAssembly needs an unreachable instruction after a non-returning call, + // because the function return type can be different from __stack_chk_fail's + // return type (void). + if (TM.getTargetTriple().isWasm()) + Chain = DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, Chain); DAG.setRoot(Chain); } @@ -2778,14 +2811,16 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't // have to do anything here to lower funclet bundles. assert(!I.hasOperandBundlesOtherThan({LLVMContext::OB_deopt, + LLVMContext::OB_gc_transition, + LLVMContext::OB_gc_live, LLVMContext::OB_funclet, LLVMContext::OB_cfguardtarget}) && "Cannot lower invokes with arbitrary operand bundles yet!"); - const Value *Callee(I.getCalledValue()); + const Value *Callee(I.getCalledOperand()); const Function *Fn = dyn_cast<Function>(Callee); if (isa<InlineAsm>(Callee)) - visitInlineAsm(&I); + visitInlineAsm(I); else if (Fn && Fn->isIntrinsic()) { switch (Fn->getIntrinsicID()) { default: @@ -2795,10 +2830,10 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { break; case Intrinsic::experimental_patchpoint_void: case Intrinsic::experimental_patchpoint_i64: - visitPatchpoint(&I, EHPadBB); + visitPatchpoint(I, EHPadBB); break; case Intrinsic::experimental_gc_statepoint: - LowerStatepoint(ImmutableStatepoint(&I), EHPadBB); + LowerStatepoint(cast<GCStatepointInst>(I), EHPadBB); break; case Intrinsic::wasm_rethrow_in_catch: { // This is usually done in visitTargetIntrinsic, but this intrinsic is @@ -2822,14 +2857,14 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { // with deopt state. LowerCallSiteWithDeoptBundle(&I, getValue(Callee), EHPadBB); } else { - LowerCallTo(&I, getValue(Callee), false, EHPadBB); + LowerCallTo(I, getValue(Callee), false, EHPadBB); } // If the value of the invoke is used outside of its defining block, make it // available as a virtual register. // We already took care of the exported value for the statepoint instruction // during call to the LowerStatepoint. - if (!isStatepoint(I)) { + if (!isa<GCStatepointInst>(I)) { CopyToExportRegsIfNeeded(&I); } @@ -2862,18 +2897,19 @@ void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) { {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) && "Cannot lower callbrs with arbitrary operand bundles yet!"); - assert(isa<InlineAsm>(I.getCalledValue()) && - "Only know how to handle inlineasm callbr"); - visitInlineAsm(&I); + assert(I.isInlineAsm() && "Only know how to handle inlineasm callbr"); + visitInlineAsm(I); + CopyToExportRegsIfNeeded(&I); // Retrieve successors. MachineBasicBlock *Return = FuncInfo.MBBMap[I.getDefaultDest()]; // Update successor info. - addSuccessorWithProb(CallBrMBB, Return); + addSuccessorWithProb(CallBrMBB, Return, BranchProbability::getOne()); for (unsigned i = 0, e = I.getNumIndirectDests(); i < e; ++i) { MachineBasicBlock *Target = FuncInfo.MBBMap[I.getIndirectDest(i)]; - addSuccessorWithProb(CallBrMBB, Target); + addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero()); + Target->setIsInlineAsmBrIndirectTarget(); } CallBrMBB->normalizeSuccProbs(); @@ -3003,133 +3039,6 @@ void SelectionDAGBuilder::visitFSub(const User &I) { visitBinary(I, ISD::FSUB); } -/// Checks if the given instruction performs a vector reduction, in which case -/// we have the freedom to alter the elements in the result as long as the -/// reduction of them stays unchanged. -static bool isVectorReductionOp(const User *I) { - const Instruction *Inst = dyn_cast<Instruction>(I); - if (!Inst || !Inst->getType()->isVectorTy()) - return false; - - auto OpCode = Inst->getOpcode(); - switch (OpCode) { - case Instruction::Add: - case Instruction::Mul: - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: - break; - case Instruction::FAdd: - case Instruction::FMul: - if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst)) - if (FPOp->getFastMathFlags().isFast()) - break; - LLVM_FALLTHROUGH; - default: - return false; - } - - unsigned ElemNum = Inst->getType()->getVectorNumElements(); - // Ensure the reduction size is a power of 2. - if (!isPowerOf2_32(ElemNum)) - return false; - - unsigned ElemNumToReduce = ElemNum; - - // Do DFS search on the def-use chain from the given instruction. We only - // allow four kinds of operations during the search until we reach the - // instruction that extracts the first element from the vector: - // - // 1. The reduction operation of the same opcode as the given instruction. - // - // 2. PHI node. - // - // 3. ShuffleVector instruction together with a reduction operation that - // does a partial reduction. - // - // 4. ExtractElement that extracts the first element from the vector, and we - // stop searching the def-use chain here. - // - // 3 & 4 above perform a reduction on all elements of the vector. We push defs - // from 1-3 to the stack to continue the DFS. The given instruction is not - // a reduction operation if we meet any other instructions other than those - // listed above. - - SmallVector<const User *, 16> UsersToVisit{Inst}; - SmallPtrSet<const User *, 16> Visited; - bool ReduxExtracted = false; - - while (!UsersToVisit.empty()) { - auto User = UsersToVisit.back(); - UsersToVisit.pop_back(); - if (!Visited.insert(User).second) - continue; - - for (const auto *U : User->users()) { - auto Inst = dyn_cast<Instruction>(U); - if (!Inst) - return false; - - if (Inst->getOpcode() == OpCode || isa<PHINode>(U)) { - if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst)) - if (!isa<PHINode>(FPOp) && !FPOp->getFastMathFlags().isFast()) - return false; - UsersToVisit.push_back(U); - } else if (const ShuffleVectorInst *ShufInst = - dyn_cast<ShuffleVectorInst>(U)) { - // Detect the following pattern: A ShuffleVector instruction together - // with a reduction that do partial reduction on the first and second - // ElemNumToReduce / 2 elements, and store the result in - // ElemNumToReduce / 2 elements in another vector. - - unsigned ResultElements = ShufInst->getType()->getVectorNumElements(); - if (ResultElements < ElemNum) - return false; - - if (ElemNumToReduce == 1) - return false; - if (!isa<UndefValue>(U->getOperand(1))) - return false; - for (unsigned i = 0; i < ElemNumToReduce / 2; ++i) - if (ShufInst->getMaskValue(i) != int(i + ElemNumToReduce / 2)) - return false; - for (unsigned i = ElemNumToReduce / 2; i < ElemNum; ++i) - if (ShufInst->getMaskValue(i) != -1) - return false; - - // There is only one user of this ShuffleVector instruction, which - // must be a reduction operation. - if (!U->hasOneUse()) - return false; - - auto U2 = dyn_cast<Instruction>(*U->user_begin()); - if (!U2 || U2->getOpcode() != OpCode) - return false; - - // Check operands of the reduction operation. - if ((U2->getOperand(0) == U->getOperand(0) && U2->getOperand(1) == U) || - (U2->getOperand(1) == U->getOperand(0) && U2->getOperand(0) == U)) { - UsersToVisit.push_back(U2); - ElemNumToReduce /= 2; - } else - return false; - } else if (isa<ExtractElementInst>(U)) { - // At this moment we should have reduced all elements in the vector. - if (ElemNumToReduce != 1) - return false; - - const ConstantInt *Val = dyn_cast<ConstantInt>(U->getOperand(1)); - if (!Val || !Val->isZero()) - return false; - - ReduxExtracted = true; - } else - return false; - } - } - return ReduxExtracted; -} - void SelectionDAGBuilder::visitUnary(const User &I, unsigned Opcode) { SDNodeFlags Flags; @@ -3148,17 +3057,6 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) { if (auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I)) { Flags.setExact(ExactOp->isExact()); } - if (isVectorReductionOp(&I)) { - Flags.setVectorReduction(true); - LLVM_DEBUG(dbgs() << "Detected a reduction operation:" << I << "\n"); - - // If no flags are set we will propagate the incoming flags, if any flags - // are set, we will intersect them with the incoming flag and so we need to - // copy the FMF flags here. - if (auto *FPOp = dyn_cast<FPMathOperator>(&I)) { - Flags.copyFMF(*FPOp); - } - } SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); @@ -3296,9 +3194,9 @@ void SelectionDAGBuilder::visitSelect(const User &I) { SDValue Cond = getValue(I.getOperand(0)); SDValue LHSVal = getValue(I.getOperand(1)); SDValue RHSVal = getValue(I.getOperand(2)); - auto BaseOps = {Cond}; - ISD::NodeType OpCode = Cond.getValueType().isVector() ? - ISD::VSELECT : ISD::SELECT; + SmallVector<SDValue, 1> BaseOps(1, Cond); + ISD::NodeType OpCode = + Cond.getValueType().isVector() ? ISD::VSELECT : ISD::SELECT; bool IsUnaryAbs = false; @@ -3381,13 +3279,13 @@ void SelectionDAGBuilder::visitSelect(const User &I) { OpCode = Opc; LHSVal = getValue(LHS); RHSVal = getValue(RHS); - BaseOps = {}; + BaseOps.clear(); } if (IsUnaryAbs) { OpCode = Opc; LHSVal = getValue(LHS); - BaseOps = {}; + BaseOps.clear(); } } @@ -3577,19 +3475,22 @@ void SelectionDAGBuilder::visitExtractElement(const User &I) { void SelectionDAGBuilder::visitShuffleVector(const User &I) { SDValue Src1 = getValue(I.getOperand(0)); SDValue Src2 = getValue(I.getOperand(1)); - Constant *MaskV = cast<Constant>(I.getOperand(2)); + ArrayRef<int> Mask; + if (auto *SVI = dyn_cast<ShuffleVectorInst>(&I)) + Mask = SVI->getShuffleMask(); + else + Mask = cast<ConstantExpr>(I).getShuffleMask(); SDLoc DL = getCurSDLoc(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); EVT SrcVT = Src1.getValueType(); - unsigned SrcNumElts = SrcVT.getVectorNumElements(); - if (MaskV->isNullValue() && VT.isScalableVector()) { + if (all_of(Mask, [](int Elem) { return Elem == 0; }) && + VT.isScalableVector()) { // Canonical splat form of first element of first input vector. - SDValue FirstElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, - SrcVT.getScalarType(), Src1, - DAG.getConstant(0, DL, - TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue FirstElt = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcVT.getScalarType(), Src1, + DAG.getVectorIdxConstant(0, DL)); setValue(&I, DAG.getNode(ISD::SPLAT_VECTOR, DL, VT, FirstElt)); return; } @@ -3599,8 +3500,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { // for targets that support a SPLAT_VECTOR for non-scalable vector types. assert(!VT.isScalableVector() && "Unsupported scalable vector shuffle"); - SmallVector<int, 8> Mask; - ShuffleVectorInst::getShuffleMask(MaskV, Mask); + unsigned SrcNumElts = SrcVT.getVectorNumElements(); unsigned MaskNumElts = Mask.size(); if (SrcNumElts == MaskNumElts) { @@ -3683,9 +3583,8 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { // If the concatenated vector was padded, extract a subvector with the // correct number of elements. if (MaskNumElts != PaddedMaskNumElts) - Result = DAG.getNode( - ISD::EXTRACT_SUBVECTOR, DL, VT, Result, - DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + Result = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Result, + DAG.getVectorIdxConstant(0, DL)); setValue(&I, Result); return; @@ -3729,10 +3628,8 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { if (StartIdx[Input] < 0) Src = DAG.getUNDEF(VT); else { - Src = DAG.getNode( - ISD::EXTRACT_SUBVECTOR, DL, VT, Src, - DAG.getConstant(StartIdx[Input], DL, - TLI.getVectorIdxTy(DAG.getDataLayout()))); + Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Src, + DAG.getVectorIdxConstant(StartIdx[Input], DL)); } } @@ -3754,7 +3651,6 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { // replacing the shuffle with extract and build vector. // to insert and build vector. EVT EltVT = VT.getVectorElementType(); - EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout()); SmallVector<SDValue,8> Ops; for (int Idx : Mask) { SDValue Res; @@ -3765,8 +3661,8 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2; if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts; - Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, - EltVT, Src, DAG.getConstant(Idx, DL, IdxVT)); + Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src, + DAG.getVectorIdxConstant(Idx, DL)); } Ops.push_back(Res); @@ -3882,13 +3778,18 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { // Normalize Vector GEP - all scalar operands should be converted to the // splat vector. - unsigned VectorWidth = I.getType()->isVectorTy() ? - I.getType()->getVectorNumElements() : 0; + bool IsVectorGEP = I.getType()->isVectorTy(); + ElementCount VectorElementCount = + IsVectorGEP ? cast<VectorType>(I.getType())->getElementCount() + : ElementCount(0, false); - if (VectorWidth && !N.getValueType().isVector()) { + if (IsVectorGEP && !N.getValueType().isVector()) { LLVMContext &Context = *DAG.getContext(); - EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorWidth); - N = DAG.getSplatBuildVector(VT, dl, N); + EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorElementCount); + if (VectorElementCount.Scalable) + N = DAG.getSplatVector(VT, dl, N); + else + N = DAG.getSplatBuildVector(VT, dl, N); } for (gep_type_iterator GTI = gep_type_begin(&I), E = gep_type_end(&I); @@ -3910,9 +3811,16 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { DAG.getConstant(Offset, dl, N.getValueType()), Flags); } } else { + // IdxSize is the width of the arithmetic according to IR semantics. + // In SelectionDAG, we may prefer to do arithmetic in a wider bitwidth + // (and fix up the result later). unsigned IdxSize = DAG.getDataLayout().getIndexSizeInBits(AS); MVT IdxTy = MVT::getIntegerVT(IdxSize); - APInt ElementSize(IdxSize, DL->getTypeAllocSize(GTI.getIndexedType())); + TypeSize ElementSize = DL->getTypeAllocSize(GTI.getIndexedType()); + // We intentionally mask away the high bits here; ElementSize may not + // fit in IdxTy. + APInt ElementMul(IdxSize, ElementSize.getKnownMinSize()); + bool ElementScalable = ElementSize.isScalable(); // If this is a scalar constant or a splat vector of constants, // handle it quickly. @@ -3920,14 +3828,18 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (C && isa<VectorType>(C->getType())) C = C->getSplatValue(); - if (const auto *CI = dyn_cast_or_null<ConstantInt>(C)) { - if (CI->isZero()) - continue; - APInt Offs = ElementSize * CI->getValue().sextOrTrunc(IdxSize); + const auto *CI = dyn_cast_or_null<ConstantInt>(C); + if (CI && CI->isZero()) + continue; + if (CI && !ElementScalable) { + APInt Offs = ElementMul * CI->getValue().sextOrTrunc(IdxSize); LLVMContext &Context = *DAG.getContext(); - SDValue OffsVal = VectorWidth ? - DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorWidth)) : - DAG.getConstant(Offs, dl, IdxTy); + SDValue OffsVal; + if (IsVectorGEP) + OffsVal = DAG.getConstant( + Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorElementCount)); + else + OffsVal = DAG.getConstant(Offs, dl, IdxTy); // In an inbounds GEP with an offset that is nonnegative even when // interpreted as signed, assume there is no unsigned overflow. @@ -3941,31 +3853,45 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { continue; } - // N = N + Idx * ElementSize; + // N = N + Idx * ElementMul; SDValue IdxN = getValue(Idx); - if (!IdxN.getValueType().isVector() && VectorWidth) { - EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(), VectorWidth); - IdxN = DAG.getSplatBuildVector(VT, dl, IdxN); + if (!IdxN.getValueType().isVector() && IsVectorGEP) { + EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(), + VectorElementCount); + if (VectorElementCount.Scalable) + IdxN = DAG.getSplatVector(VT, dl, IdxN); + else + IdxN = DAG.getSplatBuildVector(VT, dl, IdxN); } // If the index is smaller or larger than intptr_t, truncate or extend // it. IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType()); - // If this is a multiply by a power of two, turn it into a shl - // immediately. This is a very common case. - if (ElementSize != 1) { - if (ElementSize.isPowerOf2()) { - unsigned Amt = ElementSize.logBase2(); - IdxN = DAG.getNode(ISD::SHL, dl, - N.getValueType(), IdxN, - DAG.getConstant(Amt, dl, IdxN.getValueType())); - } else { - SDValue Scale = DAG.getConstant(ElementSize.getZExtValue(), dl, - IdxN.getValueType()); - IdxN = DAG.getNode(ISD::MUL, dl, - N.getValueType(), IdxN, Scale); + if (ElementScalable) { + EVT VScaleTy = N.getValueType().getScalarType(); + SDValue VScale = DAG.getNode( + ISD::VSCALE, dl, VScaleTy, + DAG.getConstant(ElementMul.getZExtValue(), dl, VScaleTy)); + if (IsVectorGEP) + VScale = DAG.getSplatVector(N.getValueType(), dl, VScale); + IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, VScale); + } else { + // If this is a multiply by a power of two, turn it into a shl + // immediately. This is a very common case. + if (ElementMul != 1) { + if (ElementMul.isPowerOf2()) { + unsigned Amt = ElementMul.logBase2(); + IdxN = DAG.getNode(ISD::SHL, dl, + N.getValueType(), IdxN, + DAG.getConstant(Amt, dl, IdxN.getValueType())); + } else { + SDValue Scale = DAG.getConstant(ElementMul.getZExtValue(), dl, + IdxN.getValueType()); + IdxN = DAG.getNode(ISD::MUL, dl, + N.getValueType(), IdxN, Scale); + } } } @@ -3991,8 +3917,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); auto &DL = DAG.getDataLayout(); uint64_t TySize = DL.getTypeAllocSize(Ty); - unsigned Align = - std::max((unsigned)DL.getPrefTypeAlignment(Ty), I.getAlignment()); + MaybeAlign Alignment = std::max(DL.getPrefTypeAlign(Ty), I.getAlign()); SDValue AllocSize = getValue(I.getArraySize()); @@ -4007,25 +3932,26 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { // Handle alignment. If the requested alignment is less than or equal to // the stack alignment, ignore it. If the size is greater than or equal to // the stack alignment, we note this in the DYNAMIC_STACKALLOC node. - unsigned StackAlign = - DAG.getSubtarget().getFrameLowering()->getStackAlignment(); - if (Align <= StackAlign) - Align = 0; + Align StackAlign = DAG.getSubtarget().getFrameLowering()->getStackAlign(); + if (*Alignment <= StackAlign) + Alignment = None; + const uint64_t StackAlignMask = StackAlign.value() - 1U; // Round the size of the allocation up to the stack alignment size // by add SA-1 to the size. This doesn't overflow because we're computing // an address inside an alloca. SDNodeFlags Flags; Flags.setNoUnsignedWrap(true); AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize, - DAG.getConstant(StackAlign - 1, dl, IntPtr), Flags); + DAG.getConstant(StackAlignMask, dl, IntPtr), Flags); // Mask out the low bits for alignment purposes. - AllocSize = - DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize, - DAG.getConstant(~(uint64_t)(StackAlign - 1), dl, IntPtr)); + AllocSize = DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize, + DAG.getConstant(~StackAlignMask, dl, IntPtr)); - SDValue Ops[] = {getRoot(), AllocSize, DAG.getConstant(Align, dl, IntPtr)}; + SDValue Ops[] = { + getRoot(), AllocSize, + DAG.getConstant(Alignment ? Alignment->value() : 0, dl, IntPtr)}; SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other); SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, dl, VTs, Ops); setValue(&I, DSA); @@ -4057,13 +3983,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { SDValue Ptr = getValue(SV); Type *Ty = I.getType(); - - bool isVolatile = I.isVolatile(); - bool isNonTemporal = I.hasMetadata(LLVMContext::MD_nontemporal); - bool isInvariant = I.hasMetadata(LLVMContext::MD_invariant_load); - bool isDereferenceable = - isDereferenceablePointer(SV, I.getType(), DAG.getDataLayout()); - unsigned Alignment = I.getAlignment(); + Align Alignment = I.getAlign(); AAMDNodes AAInfo; I.getAAMetadata(AAInfo); @@ -4076,6 +3996,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { if (NumValues == 0) return; + bool isVolatile = I.isVolatile(); + SDValue Root; bool ConstantMemory = false; if (isVolatile) @@ -4109,6 +4031,10 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { SmallVector<SDValue, 4> Values(NumValues); SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues)); EVT PtrVT = Ptr.getValueType(); + + MachineMemOperand::Flags MMOFlags + = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout()); + unsigned ChainI = 0; for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { // Serializing loads here may result in excessive register pressure, and @@ -4128,16 +4054,6 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { PtrVT, Ptr, DAG.getConstant(Offsets[i], dl, PtrVT), Flags); - auto MMOFlags = MachineMemOperand::MONone; - if (isVolatile) - MMOFlags |= MachineMemOperand::MOVolatile; - if (isNonTemporal) - MMOFlags |= MachineMemOperand::MONonTemporal; - if (isInvariant) - MMOFlags |= MachineMemOperand::MOInvariant; - if (isDereferenceable) - MMOFlags |= MachineMemOperand::MODereferenceable; - MMOFlags |= TLI.getMMOFlags(I); SDValue L = DAG.getLoad(MemVTs[i], dl, Root, A, MachinePointerInfo(SV, Offsets[i]), Alignment, @@ -4260,16 +4176,11 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { SDValue Root = I.isVolatile() ? getRoot() : getMemoryRoot(); SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues)); SDLoc dl = getCurSDLoc(); - unsigned Alignment = I.getAlignment(); + Align Alignment = I.getAlign(); AAMDNodes AAInfo; I.getAAMetadata(AAInfo); - auto MMOFlags = MachineMemOperand::MONone; - if (I.isVolatile()) - MMOFlags |= MachineMemOperand::MOVolatile; - if (I.hasMetadata(LLVMContext::MD_nontemporal)) - MMOFlags |= MachineMemOperand::MONonTemporal; - MMOFlags |= TLI.getMMOFlags(I); + auto MMOFlags = TLI.getStoreMemOperandFlags(I, DAG.getDataLayout()); // An aggregate load cannot wrap around the address space, so offsets to its // parts don't wrap either. @@ -4304,25 +4215,25 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, bool IsCompressing) { SDLoc sdl = getCurSDLoc(); - auto getMaskedStoreOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0, - unsigned& Alignment) { + auto getMaskedStoreOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0, + MaybeAlign &Alignment) { // llvm.masked.store.*(Src0, Ptr, alignment, Mask) Src0 = I.getArgOperand(0); Ptr = I.getArgOperand(1); - Alignment = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue(); + Alignment = cast<ConstantInt>(I.getArgOperand(2))->getMaybeAlignValue(); Mask = I.getArgOperand(3); }; - auto getCompressingStoreOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0, - unsigned& Alignment) { + auto getCompressingStoreOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0, + MaybeAlign &Alignment) { // llvm.masked.compressstore.*(Src0, Ptr, Mask) Src0 = I.getArgOperand(0); Ptr = I.getArgOperand(1); Mask = I.getArgOperand(2); - Alignment = 0; + Alignment = None; }; Value *PtrOperand, *MaskOperand, *Src0Operand; - unsigned Alignment; + MaybeAlign Alignment; if (IsCompressing) getCompressingStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment); else @@ -4335,19 +4246,16 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, EVT VT = Src0.getValueType(); if (!Alignment) - Alignment = DAG.getEVTAlignment(VT); + Alignment = DAG.getEVTAlign(VT); AAMDNodes AAInfo; I.getAAMetadata(AAInfo); - MachineMemOperand *MMO = - DAG.getMachineFunction(). - getMachineMemOperand(MachinePointerInfo(PtrOperand), - MachineMemOperand::MOStore, - // TODO: Make MachineMemOperands aware of scalable - // vectors. - VT.getStoreSize().getKnownMinSize(), - Alignment, AAInfo); + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore, + // TODO: Make MachineMemOperands aware of scalable + // vectors. + VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo); SDValue StoreNode = DAG.getMaskedStore(getMemoryRoot(), sdl, Src0, Ptr, Offset, Mask, VT, MMO, ISD::UNINDEXED, false /* Truncating */, IsCompressing); @@ -4370,78 +4278,51 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, // are looking for. If first operand of the GEP is a splat vector - we // extract the splat value and use it as a uniform base. // In all other cases the function returns 'false'. -static bool getUniformBase(const Value *&Ptr, SDValue &Base, SDValue &Index, +static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index, ISD::MemIndexType &IndexType, SDValue &Scale, - SelectionDAGBuilder *SDB) { + SelectionDAGBuilder *SDB, const BasicBlock *CurBB) { SelectionDAG& DAG = SDB->DAG; - LLVMContext &Context = *DAG.getContext(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const DataLayout &DL = DAG.getDataLayout(); assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type"); - const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr); - if (!GEP) - return false; - - const Value *GEPPtr = GEP->getPointerOperand(); - if (!GEPPtr->getType()->isVectorTy()) - Ptr = GEPPtr; - else if (!(Ptr = getSplatValue(GEPPtr))) - return false; - - unsigned FinalIndex = GEP->getNumOperands() - 1; - Value *IndexVal = GEP->getOperand(FinalIndex); - gep_type_iterator GTI = gep_type_begin(*GEP); - // Ensure all the other indices are 0. - for (unsigned i = 1; i < FinalIndex; ++i, ++GTI) { - auto *C = dyn_cast<Constant>(GEP->getOperand(i)); + // Handle splat constant pointer. + if (auto *C = dyn_cast<Constant>(Ptr)) { + C = C->getSplatValue(); if (!C) return false; - if (isa<VectorType>(C->getType())) - C = C->getSplatValue(); - auto *CI = dyn_cast_or_null<ConstantInt>(C); - if (!CI || !CI->isZero()) - return false; + + Base = SDB->getValue(C); + + unsigned NumElts = cast<FixedVectorType>(Ptr->getType())->getNumElements(); + EVT VT = EVT::getVectorVT(*DAG.getContext(), TLI.getPointerTy(DL), NumElts); + Index = DAG.getConstant(0, SDB->getCurSDLoc(), VT); + IndexType = ISD::SIGNED_SCALED; + Scale = DAG.getTargetConstant(1, SDB->getCurSDLoc(), TLI.getPointerTy(DL)); + return true; } - // The operands of the GEP may be defined in another basic block. - // In this case we'll not find nodes for the operands. - if (!SDB->findValue(Ptr)) + const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr); + if (!GEP || GEP->getParent() != CurBB) return false; - Constant *C = dyn_cast<Constant>(IndexVal); - if (!C && !SDB->findValue(IndexVal)) + + if (GEP->getNumOperands() != 2) return false; - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - const DataLayout &DL = DAG.getDataLayout(); - StructType *STy = GTI.getStructTypeOrNull(); - - if (STy) { - const StructLayout *SL = DL.getStructLayout(STy); - if (isa<VectorType>(C->getType())) { - C = C->getSplatValue(); - // FIXME: If getSplatValue may return nullptr for a structure? - // If not, the following check can be removed. - if (!C) - return false; - } - auto *CI = cast<ConstantInt>(C); - Scale = DAG.getTargetConstant(1, SDB->getCurSDLoc(), TLI.getPointerTy(DL)); - Index = DAG.getConstant(SL->getElementOffset(CI->getZExtValue()), - SDB->getCurSDLoc(), TLI.getPointerTy(DL)); - } else { - Scale = DAG.getTargetConstant( - DL.getTypeAllocSize(GEP->getResultElementType()), - SDB->getCurSDLoc(), TLI.getPointerTy(DL)); - Index = SDB->getValue(IndexVal); - } - Base = SDB->getValue(Ptr); - IndexType = ISD::SIGNED_SCALED; + const Value *BasePtr = GEP->getPointerOperand(); + const Value *IndexVal = GEP->getOperand(GEP->getNumOperands() - 1); - if (STy || !Index.getValueType().isVector()) { - unsigned GEPWidth = GEP->getType()->getVectorNumElements(); - EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth); - Index = DAG.getSplatBuildVector(VT, SDLoc(Index), Index); - } + // Make sure the base is scalar and the index is a vector. + if (BasePtr->getType()->isVectorTy() || !IndexVal->getType()->isVectorTy()) + return false; + + Base = SDB->getValue(BasePtr); + Index = SDB->getValue(IndexVal); + IndexType = ISD::SIGNED_SCALED; + Scale = DAG.getTargetConstant( + DL.getTypeAllocSize(GEP->getResultElementType()), + SDB->getCurSDLoc(), TLI.getPointerTy(DL)); return true; } @@ -4453,9 +4334,9 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { SDValue Src0 = getValue(I.getArgOperand(0)); SDValue Mask = getValue(I.getArgOperand(3)); EVT VT = Src0.getValueType(); - unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(2)))->getZExtValue(); - if (!Alignment) - Alignment = DAG.getEVTAlignment(VT); + Align Alignment = cast<ConstantInt>(I.getArgOperand(2)) + ->getMaybeAlignValue() + .getValueOr(DAG.getEVTAlign(VT)); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); AAMDNodes AAInfo; @@ -4465,18 +4346,15 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { SDValue Index; ISD::MemIndexType IndexType; SDValue Scale; - const Value *BasePtr = Ptr; - bool UniformBase = getUniformBase(BasePtr, Base, Index, IndexType, Scale, - this); - - const Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr; - MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MachinePointerInfo(MemOpBasePtr), - MachineMemOperand::MOStore, - // TODO: Make MachineMemOperands aware of scalable - // vectors. - VT.getStoreSize().getKnownMinSize(), - Alignment, AAInfo); + bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, this, + I.getParent()); + + unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace(); + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo(AS), MachineMemOperand::MOStore, + // TODO: Make MachineMemOperands aware of scalable + // vectors. + MemoryLocation::UnknownSize, Alignment, AAInfo); if (!UniformBase) { Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(Ptr); @@ -4493,25 +4371,25 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { SDLoc sdl = getCurSDLoc(); - auto getMaskedLoadOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0, - unsigned& Alignment) { + auto getMaskedLoadOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0, + MaybeAlign &Alignment) { // @llvm.masked.load.*(Ptr, alignment, Mask, Src0) Ptr = I.getArgOperand(0); - Alignment = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); + Alignment = cast<ConstantInt>(I.getArgOperand(1))->getMaybeAlignValue(); Mask = I.getArgOperand(2); Src0 = I.getArgOperand(3); }; - auto getExpandingLoadOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0, - unsigned& Alignment) { + auto getExpandingLoadOps = [&](Value *&Ptr, Value *&Mask, Value *&Src0, + MaybeAlign &Alignment) { // @llvm.masked.expandload.*(Ptr, Mask, Src0) Ptr = I.getArgOperand(0); - Alignment = 0; + Alignment = None; Mask = I.getArgOperand(1); Src0 = I.getArgOperand(2); }; Value *PtrOperand, *MaskOperand, *Src0Operand; - unsigned Alignment; + MaybeAlign Alignment; if (IsExpanding) getExpandingLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment); else @@ -4524,7 +4402,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { EVT VT = Src0.getValueType(); if (!Alignment) - Alignment = DAG.getEVTAlignment(VT); + Alignment = DAG.getEVTAlign(VT); AAMDNodes AAInfo; I.getAAMetadata(AAInfo); @@ -4542,14 +4420,11 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); - MachineMemOperand *MMO = - DAG.getMachineFunction(). - getMachineMemOperand(MachinePointerInfo(PtrOperand), - MachineMemOperand::MOLoad, - // TODO: Make MachineMemOperands aware of scalable - // vectors. - VT.getStoreSize().getKnownMinSize(), - Alignment, AAInfo, Ranges); + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad, + // TODO: Make MachineMemOperands aware of scalable + // vectors. + VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo, Ranges); SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Offset, Mask, Src0, VT, MMO, @@ -4569,9 +4444,9 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); - unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(1)))->getZExtValue(); - if (!Alignment) - Alignment = DAG.getEVTAlignment(VT); + Align Alignment = cast<ConstantInt>(I.getArgOperand(1)) + ->getMaybeAlignValue() + .getValueOr(DAG.getEVTAlign(VT)); AAMDNodes AAInfo; I.getAAMetadata(AAInfo); @@ -4582,29 +4457,14 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { SDValue Index; ISD::MemIndexType IndexType; SDValue Scale; - const Value *BasePtr = Ptr; - bool UniformBase = getUniformBase(BasePtr, Base, Index, IndexType, Scale, - this); - bool ConstantMemory = false; - if (UniformBase && AA && - AA->pointsToConstantMemory( - MemoryLocation(BasePtr, - LocationSize::precise( - DAG.getDataLayout().getTypeStoreSize(I.getType())), - AAInfo))) { - // Do not serialize (non-volatile) loads of constant memory with anything. - Root = DAG.getEntryNode(); - ConstantMemory = true; - } - - MachineMemOperand *MMO = - DAG.getMachineFunction(). - getMachineMemOperand(MachinePointerInfo(UniformBase ? BasePtr : nullptr), - MachineMemOperand::MOLoad, - // TODO: Make MachineMemOperands aware of scalable - // vectors. - VT.getStoreSize().getKnownMinSize(), - Alignment, AAInfo, Ranges); + bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, this, + I.getParent()); + unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace(); + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo(AS), MachineMemOperand::MOLoad, + // TODO: Make MachineMemOperands aware of scalable + // vectors. + MemoryLocation::UnknownSize, Alignment, AAInfo, Ranges); if (!UniformBase) { Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); @@ -4616,9 +4476,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl, Ops, MMO, IndexType); - SDValue OutChain = Gather.getValue(1); - if (!ConstantMemory) - PendingLoads.push_back(OutChain); + PendingLoads.push_back(Gather.getValue(1)); setValue(&I, Gather); } @@ -4633,19 +4491,14 @@ void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType(); SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other); - auto Alignment = DAG.getEVTAlignment(MemVT); - - auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; - if (I.isVolatile()) - Flags |= MachineMemOperand::MOVolatile; - Flags |= DAG.getTargetLoweringInfo().getMMOFlags(I); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + auto Flags = TLI.getAtomicMemOperandFlags(I, DAG.getDataLayout()); MachineFunction &MF = DAG.getMachineFunction(); - MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), - Flags, MemVT.getStoreSize(), Alignment, - AAMDNodes(), nullptr, SSID, SuccessOrdering, - FailureOrdering); + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(), + DAG.getEVTAlign(MemVT), AAMDNodes(), nullptr, SSID, SuccessOrdering, + FailureOrdering); SDValue L = DAG.getAtomicCmpSwap(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, MemVT, VTs, InChain, @@ -4684,18 +4537,13 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { SDValue InChain = getRoot(); auto MemVT = getValue(I.getValOperand()).getSimpleValueType(); - auto Alignment = DAG.getEVTAlignment(MemVT); - - auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore; - if (I.isVolatile()) - Flags |= MachineMemOperand::MOVolatile; - Flags |= DAG.getTargetLoweringInfo().getMMOFlags(I); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + auto Flags = TLI.getAtomicMemOperandFlags(I, DAG.getDataLayout()); MachineFunction &MF = DAG.getMachineFunction(); - MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), Flags, - MemVT.getStoreSize(), Alignment, AAMDNodes(), - nullptr, SSID, Ordering); + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(), + DAG.getEVTAlign(MemVT), AAMDNodes(), nullptr, SSID, Ordering); SDValue L = DAG.getAtomic(NT, dl, MemVT, InChain, @@ -4735,24 +4583,11 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { I.getAlignment() < MemVT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic load"); - auto Flags = MachineMemOperand::MOLoad; - if (I.isVolatile()) - Flags |= MachineMemOperand::MOVolatile; - if (I.hasMetadata(LLVMContext::MD_invariant_load)) - Flags |= MachineMemOperand::MOInvariant; - if (isDereferenceablePointer(I.getPointerOperand(), I.getType(), - DAG.getDataLayout())) - Flags |= MachineMemOperand::MODereferenceable; - - Flags |= TLI.getMMOFlags(I); - - MachineMemOperand *MMO = - DAG.getMachineFunction(). - getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), - Flags, MemVT.getStoreSize(), - I.getAlignment() ? I.getAlignment() : - DAG.getEVTAlignment(MemVT), - AAMDNodes(), nullptr, SSID, Order); + auto Flags = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout()); + + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(), + I.getAlign(), AAMDNodes(), nullptr, SSID, Order); InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG); @@ -4773,7 +4608,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { PendingLoads.push_back(OutChain); return; } - + SDValue L = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, MemVT, MemVT, InChain, Ptr, MMO); @@ -4800,16 +4635,12 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { if (I.getAlignment() < MemVT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic store"); - auto Flags = MachineMemOperand::MOStore; - if (I.isVolatile()) - Flags |= MachineMemOperand::MOVolatile; - Flags |= TLI.getMMOFlags(I); + auto Flags = TLI.getStoreMemOperandFlags(I, DAG.getDataLayout()); MachineFunction &MF = DAG.getMachineFunction(); - MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), Flags, - MemVT.getStoreSize(), I.getAlignment(), AAMDNodes(), - nullptr, SSID, Ordering); + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(), + I.getAlign(), AAMDNodes(), nullptr, SSID, Ordering); SDValue Val = getValue(I.getValueOperand()); if (Val.getValueType() != MemVT) @@ -4899,10 +4730,10 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, // This is target intrinsic that touches memory AAMDNodes AAInfo; I.getAAMetadata(AAInfo); - Result = DAG.getMemIntrinsicNode( - Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT, - MachinePointerInfo(Info.ptrVal, Info.offset), - Info.align ? Info.align->value() : 0, Info.flags, Info.size, AAInfo); + Result = + DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT, + MachinePointerInfo(Info.ptrVal, Info.offset), + Info.align, Info.flags, Info.size, AAInfo); } else if (!HasChain) { Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops); } else if (!I.getType()->isVoidTy()) { @@ -4926,6 +4757,15 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, } else Result = lowerRangeToAssertZExt(DAG, I, Result); + MaybeAlign Alignment = I.getRetAlign(); + if (!Alignment) + Alignment = F->getAttributes().getRetAlignment(); + // Insert `assertalign` node if there's an alignment. + if (InsertAssertAlign && Alignment) { + Result = + DAG.getAssertAlign(getCurSDLoc(), Result, Alignment.valueOrOne()); + } + setValue(&I, Result); } } @@ -5465,7 +5305,8 @@ static SDValue expandDivFix(unsigned Opcode, const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue Scale, SelectionDAG &DAG, const TargetLowering &TLI) { EVT VT = LHS.getValueType(); - bool Signed = Opcode == ISD::SDIVFIX; + bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT; + bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT; LLVMContext &Ctx = *DAG.getContext(); // If the type is legal but the operation isn't, this node might survive all @@ -5477,14 +5318,16 @@ static SDValue expandDivFix(unsigned Opcode, const SDLoc &DL, // by bumping the size by one bit. This will force it to Promote, enabling the // early expansion and avoiding the need to expand later. - // We don't have to do this if Scale is 0; that can always be expanded. + // We don't have to do this if Scale is 0; that can always be expanded, unless + // it's a saturating signed operation. Those can experience true integer + // division overflow, a case which we must avoid. // FIXME: We wouldn't have to do this (or any of the early // expansion/promotion) if it was possible to expand a libcall of an // illegal type during operation legalization. But it's not, so things // get a bit hacky. unsigned ScaleInt = cast<ConstantSDNode>(Scale)->getZExtValue(); - if (ScaleInt > 0 && + if ((ScaleInt > 0 || (Saturating && Signed)) && (TLI.isTypeLegal(VT) || (VT.isVector() && TLI.isTypeLegal(VT.getVectorElementType())))) { TargetLowering::LegalizeAction Action = TLI.getFixedPointOperationAction( @@ -5506,8 +5349,16 @@ static SDValue expandDivFix(unsigned Opcode, const SDLoc &DL, LHS = DAG.getZExtOrTrunc(LHS, DL, PromVT); RHS = DAG.getZExtOrTrunc(RHS, DL, PromVT); } - // TODO: Saturation. + EVT ShiftTy = TLI.getShiftAmountTy(PromVT, DAG.getDataLayout()); + // For saturating operations, we need to shift up the LHS to get the + // proper saturation width, and then shift down again afterwards. + if (Saturating) + LHS = DAG.getNode(ISD::SHL, DL, PromVT, LHS, + DAG.getConstant(1, DL, ShiftTy)); SDValue Res = DAG.getNode(Opcode, DL, PromVT, LHS, RHS, Scale); + if (Saturating) + Res = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, DL, PromVT, Res, + DAG.getConstant(1, DL, ShiftTy)); return DAG.getZExtOrTrunc(Res, DL, VT); } } @@ -5622,6 +5473,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( MachineFunction &MF = DAG.getMachineFunction(); const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); + bool IsIndirect = false; Optional<MachineOperand> Op; // Some arguments' frame index is recorded during argument lowering. int FI = FuncInfo.getArgumentFrameIndex(Arg); @@ -5643,6 +5495,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( } if (Reg) { Op = MachineOperand::CreateReg(Reg, false); + IsIndirect = IsDbgDeclare; } } @@ -5691,13 +5544,13 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( } assert(!IsDbgDeclare && "DbgDeclare operand is not in memory?"); FuncInfo.ArgDbgValues.push_back( - BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), false, + BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsDbgDeclare, RegAndSize.first, Variable, *FragmentExpr)); } }; // Check if ValueMap has reg number. - DenseMap<const Value *, unsigned>::const_iterator + DenseMap<const Value *, Register>::const_iterator VMI = FuncInfo.ValueMap.find(V); if (VMI != FuncInfo.ValueMap.end()) { const auto &TLI = DAG.getTargetLoweringInfo(); @@ -5709,6 +5562,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( } Op = MachineOperand::CreateReg(VMI->second, false); + IsIndirect = IsDbgDeclare; } else if (ArgRegsAndSizes.size() > 1) { // This was split due to the calling convention, and no virtual register // mapping exists for the value. @@ -5722,28 +5576,9 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( assert(Variable->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); - - // If the argument arrives in a stack slot, then what the IR thought was a - // normal Value is actually in memory, and we must add a deref to load it. - if (Op->isFI()) { - int FI = Op->getIndex(); - unsigned Size = DAG.getMachineFunction().getFrameInfo().getObjectSize(FI); - if (Expr->isImplicit()) { - SmallVector<uint64_t, 2> Ops = {dwarf::DW_OP_deref_size, Size}; - Expr = DIExpression::prependOpcodes(Expr, Ops); - } else { - Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore); - } - } - - // If this location was specified with a dbg.declare, then it and its - // expression calculate the address of the variable. Append a deref to - // force it to be a memory location. - if (IsDbgDeclare) - Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref}); - + IsIndirect = (Op->isReg()) ? IsIndirect : true; FuncInfo.ArgDbgValues.push_back( - BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), false, + BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect, *Op, Variable, Expr)); return true; @@ -5787,6 +5622,10 @@ static unsigned FixedPointIntrinsicToOpcode(unsigned Intrinsic) { return ISD::SDIVFIX; case Intrinsic::udiv_fix: return ISD::UDIVFIX; + case Intrinsic::sdiv_fix_sat: + return ISD::SDIVFIXSAT; + case Intrinsic::udiv_fix_sat: + return ISD::UDIVFIXSAT; default: llvm_unreachable("Unhandled fixed point intrinsic"); } @@ -5798,7 +5637,24 @@ void SelectionDAGBuilder::lowerCallToExternalSymbol(const CallInst &I, SDValue Callee = DAG.getExternalSymbol( FunctionName, DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout())); - LowerCallTo(&I, Callee, I.isTailCall()); + LowerCallTo(I, Callee, I.isTailCall()); +} + +/// Given a @llvm.call.preallocated.setup, return the corresponding +/// preallocated call. +static const CallBase *FindPreallocatedCall(const Value *PreallocatedSetup) { + assert(cast<CallBase>(PreallocatedSetup) + ->getCalledFunction() + ->getIntrinsicID() == Intrinsic::call_preallocated_setup && + "expected call_preallocated_setup Value"); + for (auto *U : PreallocatedSetup->users()) { + auto *UseCall = cast<CallBase>(U); + const Function *Fn = UseCall->getCalledFunction(); + if (!Fn || Fn->getIntrinsicID() != Intrinsic::call_preallocated_arg) { + return UseCall; + } + } + llvm_unreachable("expected corresponding call to preallocated setup/arg"); } /// Lower the call to the specified intrinsic function. @@ -5814,6 +5670,13 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, // By default, turn this into a target intrinsic node. visitTargetIntrinsic(I, Intrinsic); return; + case Intrinsic::vscale: { + match(&I, m_VScale(DAG.getDataLayout())); + EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + setValue(&I, + DAG.getVScale(getCurSDLoc(), VT, APInt(VT.getSizeInBits(), 1))); + return; + } case Intrinsic::vastart: visitVAStart(I); return; case Intrinsic::vaend: visitVAEnd(I); return; case Intrinsic::vacopy: visitVACopy(I); return; @@ -5835,6 +5698,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, TLI.getFrameIndexTy(DAG.getDataLayout()), getValue(I.getArgOperand(0)))); return; + case Intrinsic::read_volatile_register: case Intrinsic::read_register: { Value *Reg = I.getArgOperand(0); SDValue Chain = getRoot(); @@ -5863,16 +5727,37 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); // @llvm.memcpy defines 0 and 1 to both mean no alignment. - unsigned DstAlign = std::max<unsigned>(MCI.getDestAlignment(), 1); - unsigned SrcAlign = std::max<unsigned>(MCI.getSourceAlignment(), 1); - unsigned Align = MinAlign(DstAlign, SrcAlign); + Align DstAlign = MCI.getDestAlign().valueOrOne(); + Align SrcAlign = MCI.getSourceAlign().valueOrOne(); + Align Alignment = commonAlignment(DstAlign, SrcAlign); bool isVol = MCI.isVolatile(); - bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); // FIXME: Support passing different dest/src alignments to the memcpy DAG // node. SDValue Root = isVol ? getRoot() : getMemoryRoot(); - SDValue MC = DAG.getMemcpy(Root, sdl, Op1, Op2, Op3, Align, isVol, - false, isTC, + SDValue MC = DAG.getMemcpy(Root, sdl, Op1, Op2, Op3, Alignment, isVol, + /* AlwaysInline */ false, isTC, + MachinePointerInfo(I.getArgOperand(0)), + MachinePointerInfo(I.getArgOperand(1))); + updateDAGForMaybeTailCall(MC); + return; + } + case Intrinsic::memcpy_inline: { + const auto &MCI = cast<MemCpyInlineInst>(I); + SDValue Dst = getValue(I.getArgOperand(0)); + SDValue Src = getValue(I.getArgOperand(1)); + SDValue Size = getValue(I.getArgOperand(2)); + assert(isa<ConstantSDNode>(Size) && "memcpy_inline needs constant size"); + // @llvm.memcpy.inline defines 0 and 1 to both mean no alignment. + Align DstAlign = MCI.getDestAlign().valueOrOne(); + Align SrcAlign = MCI.getSourceAlign().valueOrOne(); + Align Alignment = commonAlignment(DstAlign, SrcAlign); + bool isVol = MCI.isVolatile(); + bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); + // FIXME: Support passing different dest/src alignments to the memcpy DAG + // node. + SDValue MC = DAG.getMemcpy(getRoot(), sdl, Dst, Src, Size, Alignment, isVol, + /* AlwaysInline */ true, isTC, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1))); updateDAGForMaybeTailCall(MC); @@ -5884,12 +5769,12 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); // @llvm.memset defines 0 and 1 to both mean no alignment. - unsigned Align = std::max<unsigned>(MSI.getDestAlignment(), 1); + Align Alignment = MSI.getDestAlign().valueOrOne(); bool isVol = MSI.isVolatile(); - bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); SDValue Root = isVol ? getRoot() : getMemoryRoot(); - SDValue MS = DAG.getMemset(Root, sdl, Op1, Op2, Op3, Align, isVol, - isTC, MachinePointerInfo(I.getArgOperand(0))); + SDValue MS = DAG.getMemset(Root, sdl, Op1, Op2, Op3, Alignment, isVol, isTC, + MachinePointerInfo(I.getArgOperand(0))); updateDAGForMaybeTailCall(MS); return; } @@ -5899,15 +5784,15 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); // @llvm.memmove defines 0 and 1 to both mean no alignment. - unsigned DstAlign = std::max<unsigned>(MMI.getDestAlignment(), 1); - unsigned SrcAlign = std::max<unsigned>(MMI.getSourceAlignment(), 1); - unsigned Align = MinAlign(DstAlign, SrcAlign); + Align DstAlign = MMI.getDestAlign().valueOrOne(); + Align SrcAlign = MMI.getSourceAlign().valueOrOne(); + Align Alignment = commonAlignment(DstAlign, SrcAlign); bool isVol = MMI.isVolatile(); - bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); // FIXME: Support passing different dest/src alignments to the memmove DAG // node. SDValue Root = isVol ? getRoot() : getMemoryRoot(); - SDValue MM = DAG.getMemmove(Root, sdl, Op1, Op2, Op3, Align, isVol, + SDValue MM = DAG.getMemmove(Root, sdl, Op1, Op2, Op3, Alignment, isVol, isTC, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1))); updateDAGForMaybeTailCall(MM); @@ -5923,7 +5808,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned SrcAlign = MI.getSourceAlignment(); Type *LengthTy = MI.getLength()->getType(); unsigned ElemSz = MI.getElementSizeInBytes(); - bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); SDValue MC = DAG.getAtomicMemcpy(getRoot(), sdl, Dst, DstAlign, Src, SrcAlign, Length, LengthTy, ElemSz, isTC, MachinePointerInfo(MI.getRawDest()), @@ -5941,7 +5826,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned SrcAlign = MI.getSourceAlignment(); Type *LengthTy = MI.getLength()->getType(); unsigned ElemSz = MI.getElementSizeInBytes(); - bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); SDValue MC = DAG.getAtomicMemmove(getRoot(), sdl, Dst, DstAlign, Src, SrcAlign, Length, LengthTy, ElemSz, isTC, MachinePointerInfo(MI.getRawDest()), @@ -5958,13 +5843,37 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned DstAlign = MI.getDestAlignment(); Type *LengthTy = MI.getLength()->getType(); unsigned ElemSz = MI.getElementSizeInBytes(); - bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); SDValue MC = DAG.getAtomicMemset(getRoot(), sdl, Dst, DstAlign, Val, Length, LengthTy, ElemSz, isTC, MachinePointerInfo(MI.getRawDest())); updateDAGForMaybeTailCall(MC); return; } + case Intrinsic::call_preallocated_setup: { + const CallBase *PreallocatedCall = FindPreallocatedCall(&I); + SDValue SrcValue = DAG.getSrcValue(PreallocatedCall); + SDValue Res = DAG.getNode(ISD::PREALLOCATED_SETUP, sdl, MVT::Other, + getRoot(), SrcValue); + setValue(&I, Res); + DAG.setRoot(Res); + return; + } + case Intrinsic::call_preallocated_arg: { + const CallBase *PreallocatedCall = FindPreallocatedCall(I.getOperand(0)); + SDValue SrcValue = DAG.getSrcValue(PreallocatedCall); + SDValue Ops[3]; + Ops[0] = getRoot(); + Ops[1] = SrcValue; + Ops[2] = DAG.getTargetConstant(*cast<ConstantInt>(I.getArgOperand(1)), sdl, + MVT::i32); // arg index + SDValue Res = DAG.getNode( + ISD::PREALLOCATED_ARG, sdl, + DAG.getVTList(TLI.getPointerTy(DAG.getDataLayout()), MVT::Other), Ops); + setValue(&I, Res); + DAG.setRoot(Res.getValue(1)); + return; + } case Intrinsic::dbg_addr: case Intrinsic::dbg_declare: { const auto &DI = cast<DbgVariableIntrinsic>(I); @@ -5972,12 +5881,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, DIExpression *Expression = DI.getExpression(); dropDanglingDebugInfo(Variable, Expression); assert(Variable && "Missing variable"); - + LLVM_DEBUG(dbgs() << "SelectionDAG visiting debug intrinsic: " << DI + << "\n"); // Check if address has undef value. const Value *Address = DI.getVariableLocation(); if (!Address || isa<UndefValue>(Address) || (Address->use_empty() && !isa<Argument>(Address))) { - LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); + LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI + << " (bad/undef/unused-arg address)\n"); return; } @@ -6006,6 +5917,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, SDDbgValue *SDV = DAG.getFrameIndexDbgValue( Variable, Expression, FI, /*IsIndirect*/ true, dl, SDNodeOrder); DAG.AddDbgValue(SDV, getRoot().getNode(), isParameter); + } else { + LLVM_DEBUG(dbgs() << "Skipping " << DI + << " (variable info stashed in MF side table)\n"); } return; } @@ -6040,7 +5954,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, // virtual register info from the FuncInfo.ValueMap. if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true, N)) { - LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); + LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI + << " (could not emit func-arg dbg_value)\n"); } } return; @@ -6192,6 +6107,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::rint: case Intrinsic::nearbyint: case Intrinsic::round: + case Intrinsic::roundeven: case Intrinsic::canonicalize: { unsigned Opcode; switch (Intrinsic) { @@ -6206,6 +6122,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::rint: Opcode = ISD::FRINT; break; case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; case Intrinsic::round: Opcode = ISD::FROUND; break; + case Intrinsic::roundeven: Opcode = ISD::FROUNDEVEN; break; case Intrinsic::canonicalize: Opcode = ISD::FCANONICALIZE; break; } @@ -6269,7 +6186,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, getValue(I.getArgOperand(1)), getValue(I.getArgOperand(2)))); return; -#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ +#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC) \ case Intrinsic::INTRINSIC: #include "llvm/IR/ConstrainedOps.def" visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(I)); @@ -6456,7 +6373,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, return; } case Intrinsic::sdiv_fix: - case Intrinsic::udiv_fix: { + case Intrinsic::udiv_fix: + case Intrinsic::sdiv_fix_sat: + case Intrinsic::udiv_fix_sat: { SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); @@ -6466,9 +6385,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, } case Intrinsic::stacksave: { SDValue Op = getRoot(); - Res = DAG.getNode( - ISD::STACKSAVE, sdl, - DAG.getVTList(TLI.getPointerTy(DAG.getDataLayout()), MVT::Other), Op); + EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + Res = DAG.getNode(ISD::STACKSAVE, sdl, DAG.getVTList(VT, MVT::Other), Op); setValue(&I, Res); DAG.setRoot(Res.getValue(1)); return; @@ -6479,7 +6397,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, return; case Intrinsic::get_dynamic_area_offset: { SDValue Op = getRoot(); - EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); + EVT PtrTy = TLI.getFrameIndexTy(DAG.getDataLayout()); EVT ResTy = TLI.getValueType(DAG.getDataLayout(), I.getType()); // Result type for @llvm.get.dynamic.area.offset should match PtrTy for // target. @@ -6493,13 +6411,13 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, return; } case Intrinsic::stackguard: { - EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); MachineFunction &MF = DAG.getMachineFunction(); const Module &M = *MF.getFunction().getParent(); SDValue Chain = getRoot(); if (TLI.useLoadStackGuardNode()) { Res = getLoadStackGuard(DAG, sdl, Chain); } else { + EVT PtrTy = TLI.getValueType(DAG.getDataLayout(), I.getType()); const Value *Global = TLI.getSDagStackGuard(M); unsigned Align = DL->getPrefTypeAlignment(Global->getType()); Res = DAG.getLoad(PtrTy, sdl, Chain, getValue(Global), @@ -6516,7 +6434,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, // Emit code into the DAG to store the stack guard onto the stack. MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); - EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); SDValue Src, Chain = getRoot(); if (TLI.useLoadStackGuardNode()) @@ -6528,6 +6445,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, int FI = FuncInfo.StaticAllocaMap[Slot]; MFI.setStackProtectorIndex(FI); + EVT PtrTy = TLI.getFrameIndexTy(DAG.getDataLayout()); SDValue FIN = DAG.getFrameIndex(FI, PtrTy); @@ -6606,7 +6524,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::gcwrite: llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!"); case Intrinsic::flt_rounds: - setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, sdl, MVT::i32)); + Res = DAG.getNode(ISD::FLT_ROUNDS_, sdl, {MVT::i32, MVT::Other}, getRoot()); + setValue(&I, Res); + DAG.setRoot(Res.getValue(1)); return; case Intrinsic::expect: @@ -6678,12 +6598,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, Ops[2] = getValue(I.getArgOperand(1)); Ops[3] = getValue(I.getArgOperand(2)); Ops[4] = getValue(I.getArgOperand(3)); - SDValue Result = DAG.getMemIntrinsicNode(ISD::PREFETCH, sdl, - DAG.getVTList(MVT::Other), Ops, - EVT::getIntegerVT(*Context, 8), - MachinePointerInfo(I.getArgOperand(0)), - 0, /* align */ - Flags); + SDValue Result = DAG.getMemIntrinsicNode( + ISD::PREFETCH, sdl, DAG.getVTList(MVT::Other), Ops, + EVT::getIntegerVT(*Context, 8), MachinePointerInfo(I.getArgOperand(0)), + /* align */ None, Flags); // Chain the prefetch in parallell with any pending loads, to stay out of // the way of later optimizations. @@ -6750,10 +6668,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, return; case Intrinsic::experimental_patchpoint_void: case Intrinsic::experimental_patchpoint_i64: - visitPatchpoint(&I); + visitPatchpoint(I); return; case Intrinsic::experimental_gc_statepoint: - LowerStatepoint(ImmutableStatepoint(&I)); + LowerStatepoint(cast<GCStatepointInst>(I)); return; case Intrinsic::experimental_gc_result: visitGCResult(cast<GCResultInst>(I)); @@ -6794,7 +6712,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::localrecover: { // i8* @llvm.localrecover(i8* %fn, i8* %fp, i32 %idx) MachineFunction &MF = DAG.getMachineFunction(); - MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout(), 0); // Get the symbol that defines the frame offset. auto *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts()); @@ -6805,6 +6722,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, MF.getMMI().getContext().getOrCreateFrameAllocSymbol( GlobalValue::dropLLVMManglingEscape(Fn->getName()), IdxVal); + Value *FP = I.getArgOperand(1); + SDValue FPVal = getValue(FP); + EVT PtrVT = FPVal.getValueType(); + // Create a MCSymbol for the label to avoid any target lowering // that would make this PC relative. SDValue OffsetSym = DAG.getMCSymbol(FrameAllocSym, PtrVT); @@ -6812,8 +6733,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, DAG.getNode(ISD::LOCAL_RECOVER, sdl, PtrVT, OffsetSym); // Add the offset to the FP. - Value *FP = I.getArgOperand(1); - SDValue FPVal = getValue(FP); SDValue Add = DAG.getMemBasePlusOffset(FPVal, OffsetVal, sdl); setValue(&I, Add); @@ -6996,11 +6915,42 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, SDValue Ptr = getValue(I.getOperand(0)); SDValue Const = getValue(I.getOperand(1)); - EVT DestVT = - EVT(DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout())); + EVT PtrVT = Ptr.getValueType(); + setValue(&I, DAG.getNode(ISD::AND, getCurSDLoc(), PtrVT, Ptr, + DAG.getZExtOrTrunc(Const, getCurSDLoc(), PtrVT))); + return; + } + case Intrinsic::get_active_lane_mask: { + auto DL = getCurSDLoc(); + SDValue Index = getValue(I.getOperand(0)); + SDValue BTC = getValue(I.getOperand(1)); + Type *ElementTy = I.getOperand(0)->getType(); + EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + unsigned VecWidth = VT.getVectorNumElements(); + + SmallVector<SDValue, 16> OpsBTC; + SmallVector<SDValue, 16> OpsIndex; + SmallVector<SDValue, 16> OpsStepConstants; + for (unsigned i = 0; i < VecWidth; i++) { + OpsBTC.push_back(BTC); + OpsIndex.push_back(Index); + OpsStepConstants.push_back(DAG.getConstant(i, DL, MVT::getVT(ElementTy))); + } - setValue(&I, DAG.getNode(ISD::AND, getCurSDLoc(), DestVT, Ptr, - DAG.getZExtOrTrunc(Const, getCurSDLoc(), DestVT))); + EVT CCVT = MVT::i1; + CCVT = EVT::getVectorVT(I.getContext(), CCVT, VecWidth); + + auto VecTy = MVT::getVT(FixedVectorType::get(ElementTy, VecWidth)); + SDValue VectorIndex = DAG.getBuildVector(VecTy, DL, OpsIndex); + SDValue VectorStep = DAG.getBuildVector(VecTy, DL, OpsStepConstants); + SDValue VectorInduction = DAG.getNode( + ISD::UADDO, DL, DAG.getVTList(VecTy, CCVT), VectorIndex, VectorStep); + SDValue VectorBTC = DAG.getBuildVector(VecTy, DL, OpsBTC); + SDValue SetCC = DAG.getSetCC(DL, CCVT, VectorInduction.getValue(0), + VectorBTC, ISD::CondCode::SETULE); + setValue(&I, DAG.getNode(ISD::AND, DL, CCVT, + DAG.getNOT(DL, VectorInduction.getValue(1), CCVT), + SetCC)); return; } } @@ -7032,14 +6982,67 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( Opers.push_back(getValue(FPI.getArgOperand(1))); } + auto pushOutChain = [this](SDValue Result, fp::ExceptionBehavior EB) { + assert(Result.getNode()->getNumValues() == 2); + + // Push node to the appropriate list so that future instructions can be + // chained up correctly. + SDValue OutChain = Result.getValue(1); + switch (EB) { + case fp::ExceptionBehavior::ebIgnore: + // The only reason why ebIgnore nodes still need to be chained is that + // they might depend on the current rounding mode, and therefore must + // not be moved across instruction that may change that mode. + LLVM_FALLTHROUGH; + case fp::ExceptionBehavior::ebMayTrap: + // These must not be moved across calls or instructions that may change + // floating-point exception masks. + PendingConstrainedFP.push_back(OutChain); + break; + case fp::ExceptionBehavior::ebStrict: + // These must not be moved across calls or instructions that may change + // floating-point exception masks or read floating-point exception flags. + // In addition, they cannot be optimized out even if unused. + PendingConstrainedFPStrict.push_back(OutChain); + break; + } + }; + + SDVTList VTs = DAG.getVTList(ValueVTs); + fp::ExceptionBehavior EB = FPI.getExceptionBehavior().getValue(); + + SDNodeFlags Flags; + if (EB == fp::ExceptionBehavior::ebIgnore) + Flags.setNoFPExcept(true); + + if (auto *FPOp = dyn_cast<FPMathOperator>(&FPI)) + Flags.copyFMF(*FPOp); + unsigned Opcode; switch (FPI.getIntrinsicID()) { default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. -#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ +#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ case Intrinsic::INTRINSIC: \ Opcode = ISD::STRICT_##DAGN; \ break; #include "llvm/IR/ConstrainedOps.def" + case Intrinsic::experimental_constrained_fmuladd: { + Opcode = ISD::STRICT_FMA; + // Break fmuladd into fmul and fadd. + if (TM.Options.AllowFPOpFusion == FPOpFusion::Strict || + !TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), + ValueVTs[0])) { + Opers.pop_back(); + SDValue Mul = DAG.getNode(ISD::STRICT_FMUL, sdl, VTs, Opers, Flags); + pushOutChain(Mul, EB); + Opcode = ISD::STRICT_FADD; + Opers.clear(); + Opers.push_back(Mul.getValue(1)); + Opers.push_back(Mul.getValue(0)); + Opers.push_back(getValue(FPI.getArgOperand(2))); + } + break; + } } // A few strict DAG nodes carry additional operands that are not @@ -7058,32 +7061,8 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( } } - SDVTList VTs = DAG.getVTList(ValueVTs); - SDValue Result = DAG.getNode(Opcode, sdl, VTs, Opers); - - assert(Result.getNode()->getNumValues() == 2); - - // Push node to the appropriate list so that future instructions can be - // chained up correctly. - SDValue OutChain = Result.getValue(1); - switch (FPI.getExceptionBehavior().getValue()) { - case fp::ExceptionBehavior::ebIgnore: - // The only reason why ebIgnore nodes still need to be chained is that - // they might depend on the current rounding mode, and therefore must - // not be moved across instruction that may change that mode. - LLVM_FALLTHROUGH; - case fp::ExceptionBehavior::ebMayTrap: - // These must not be moved across calls or instructions that may change - // floating-point exception masks. - PendingConstrainedFP.push_back(OutChain); - break; - case fp::ExceptionBehavior::ebStrict: - // These must not be moved across calls or instructions that may change - // floating-point exception masks or read floating-point exception flags. - // In addition, they cannot be optimized out even if unused. - PendingConstrainedFPStrict.push_back(OutChain); - break; - } + SDValue Result = DAG.getNode(Opcode, sdl, VTs, Opers, Flags); + pushOutChain(Result, EB); SDValue FPResult = Result.getValue(0); setValue(&FPI, FPResult); @@ -7150,10 +7129,9 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, // There is a platform (e.g. wasm) that uses funclet style IR but does not // actually use outlined funclets and their LSDA info style. if (MF.hasEHFunclets() && isFuncletEHPersonality(Pers)) { - assert(CLI.CS); + assert(CLI.CB); WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo(); - EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CS.getInstruction()), - BeginLabel, EndLabel); + EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CB), BeginLabel, EndLabel); } else if (!isScopedEHPersonality(Pers)) { MF.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel); } @@ -7162,15 +7140,15 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, return Result; } -void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, +void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee, bool isTailCall, const BasicBlock *EHPadBB) { auto &DL = DAG.getDataLayout(); - FunctionType *FTy = CS.getFunctionType(); - Type *RetTy = CS.getType(); + FunctionType *FTy = CB.getFunctionType(); + Type *RetTy = CB.getType(); TargetLowering::ArgListTy Args; - Args.reserve(CS.arg_size()); + Args.reserve(CB.arg_size()); const Value *SwiftErrorVal = nullptr; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -7178,7 +7156,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, if (isTailCall) { // Avoid emitting tail calls in functions with the disable-tail-calls // attribute. - auto *Caller = CS.getInstruction()->getParent()->getParent(); + auto *Caller = CB.getParent()->getParent(); if (Caller->getFnAttribute("disable-tail-calls").getValueAsString() == "true") isTailCall = false; @@ -7191,10 +7169,9 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, isTailCall = false; } - for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); - i != e; ++i) { + for (auto I = CB.arg_begin(), E = CB.arg_end(); I != E; ++I) { TargetLowering::ArgListEntry Entry; - const Value *V = *i; + const Value *V = *I; // Skip empty types if (V->getType()->isEmptyTy()) @@ -7203,16 +7180,16 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, SDValue ArgNode = getValue(V); Entry.Node = ArgNode; Entry.Ty = V->getType(); - Entry.setAttributes(&CS, i - CS.arg_begin()); + Entry.setAttributes(&CB, I - CB.arg_begin()); // Use swifterror virtual register as input to the call. if (Entry.IsSwiftError && TLI.supportSwiftError()) { SwiftErrorVal = V; // We find the virtual register for the actual swifterror argument. // Instead of using the Value, we use the virtual register instead. - Entry.Node = DAG.getRegister( - SwiftError.getOrCreateVRegUseAt(CS.getInstruction(), FuncInfo.MBB, V), - EVT(TLI.getPointerTy(DL))); + Entry.Node = + DAG.getRegister(SwiftError.getOrCreateVRegUseAt(&CB, FuncInfo.MBB, V), + EVT(TLI.getPointerTy(DL))); } Args.push_back(Entry); @@ -7225,7 +7202,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, // If call site has a cfguardtarget operand bundle, create and add an // additional ArgListEntry. - if (auto Bundle = CS.getOperandBundle(LLVMContext::OB_cfguardtarget)) { + if (auto Bundle = CB.getOperandBundle(LLVMContext::OB_cfguardtarget)) { TargetLowering::ArgListEntry Entry; Value *V = Bundle->Inputs[0]; SDValue ArgNode = getValue(V); @@ -7237,7 +7214,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, // Check if target-independent constraints permit a tail call here. // Target-dependent constraints are checked within TLI->LowerCallTo. - if (isTailCall && !isInTailCallPosition(CS, DAG.getTarget())) + if (isTailCall && !isInTailCallPosition(CB, DAG.getTarget())) isTailCall = false; // Disable tail calls if there is an swifterror argument. Targets have not @@ -7248,15 +7225,16 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(getCurSDLoc()) .setChain(getRoot()) - .setCallee(RetTy, FTy, Callee, std::move(Args), CS) + .setCallee(RetTy, FTy, Callee, std::move(Args), CB) .setTailCall(isTailCall) - .setConvergent(CS.isConvergent()); + .setConvergent(CB.isConvergent()) + .setIsPreallocated( + CB.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0); std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB); if (Result.first.getNode()) { - const Instruction *Inst = CS.getInstruction(); - Result.first = lowerRangeToAssertZExt(DAG, *Inst, Result.first); - setValue(Inst, Result.first); + Result.first = lowerRangeToAssertZExt(DAG, CB, Result.first); + setValue(&CB, Result.first); } // The last element of CLI.InVals has the SDValue for swifterror return. @@ -7265,8 +7243,8 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, if (SwiftErrorVal && TLI.supportSwiftError()) { // Get the last element of InVals. SDValue Src = CLI.InVals.back(); - Register VReg = SwiftError.getOrCreateVRegDefAt( - CS.getInstruction(), FuncInfo.MBB, SwiftErrorVal); + Register VReg = + SwiftError.getOrCreateVRegDefAt(&CB, FuncInfo.MBB, SwiftErrorVal); SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src); DAG.setRoot(CopyNode); } @@ -7281,7 +7259,7 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, Type *LoadTy = Type::getIntNTy(PtrVal->getContext(), LoadVT.getScalarSizeInBits()); if (LoadVT.isVector()) - LoadTy = VectorType::get(LoadTy, LoadVT.getVectorNumElements()); + LoadTy = FixedVectorType::get(LoadTy, LoadVT.getVectorNumElements()); LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput), PointerType::getUnqual(LoadTy)); @@ -7455,11 +7433,10 @@ bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) { SDValue Src = getValue(I.getArgOperand(1)); SDValue Size = getValue(I.getArgOperand(2)); - unsigned DstAlign = DAG.InferPtrAlignment(Dst); - unsigned SrcAlign = DAG.InferPtrAlignment(Src); - unsigned Align = std::min(DstAlign, SrcAlign); - if (Align == 0) // Alignment of one or both could not be inferred. - Align = 1; // 0 and 1 both specify no alignment, but 0 is reserved. + Align DstAlign = DAG.InferPtrAlign(Dst).valueOrOne(); + Align SrcAlign = DAG.InferPtrAlign(Src).valueOrOne(); + // DAG::getMemcpy needs Alignment to be defined. + Align Alignment = std::min(DstAlign, SrcAlign); bool isVol = false; SDLoc sdl = getCurSDLoc(); @@ -7468,8 +7445,8 @@ bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) { // because the return pointer needs to be adjusted by the size of // the copied memory. SDValue Root = isVol ? getRoot() : getMemoryRoot(); - SDValue MC = DAG.getMemcpy(Root, sdl, Dst, Src, Size, Align, isVol, - false, /*isTailCall=*/false, + SDValue MC = DAG.getMemcpy(Root, sdl, Dst, Src, Size, Alignment, isVol, false, + /*isTailCall=*/false, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1))); assert(MC.getNode() != nullptr && @@ -7611,8 +7588,8 @@ bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I, void SelectionDAGBuilder::visitCall(const CallInst &I) { // Handle inline assembly differently. - if (isa<InlineAsm>(I.getCalledValue())) { - visitInlineAsm(&I); + if (I.isInlineAsm()) { + visitInlineAsm(I); return; } @@ -7778,12 +7755,12 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't // have to do anything here to lower funclet bundles. // CFGuardTarget bundles are lowered in LowerCallTo. - assert(!I.hasOperandBundlesOtherThan({LLVMContext::OB_deopt, - LLVMContext::OB_funclet, - LLVMContext::OB_cfguardtarget}) && + assert(!I.hasOperandBundlesOtherThan( + {LLVMContext::OB_deopt, LLVMContext::OB_funclet, + LLVMContext::OB_cfguardtarget, LLVMContext::OB_preallocated}) && "Cannot lower calls with arbitrary operand bundles!"); - SDValue Callee = getValue(I.getCalledValue()); + SDValue Callee = getValue(I.getCalledOperand()); if (I.countOperandBundlesOfType(LLVMContext::OB_deopt)) LowerCallSiteWithDeoptBundle(&I, Callee, nullptr); @@ -7791,7 +7768,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { // Check if we can potentially perform a tail call. More detailed checking // is be done within LowerCallTo, after more information about the call is // known. - LowerCallTo(&I, Callee, I.isTailCall()); + LowerCallTo(I, Callee, I.isTailCall()); } namespace { @@ -7834,7 +7811,7 @@ public: if (!CallOperandVal) return MVT::Other; if (isa<BasicBlock>(CallOperandVal)) - return TLI.getPointerTy(DL); + return TLI.getProgramPointerTy(DL); llvm::Type *OpTy = CallOperandVal->getType(); @@ -7874,7 +7851,6 @@ public: } }; -using SDISelAsmOperandInfoVector = SmallVector<SDISelAsmOperandInfo, 16>; } // end anonymous namespace @@ -7936,9 +7912,9 @@ static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location, Type *Ty = OpVal->getType(); auto &DL = DAG.getDataLayout(); uint64_t TySize = DL.getTypeAllocSize(Ty); - unsigned Align = DL.getPrefTypeAlignment(Ty); MachineFunction &MF = DAG.getMachineFunction(); - int SSFI = MF.getFrameInfo().CreateStackObject(TySize, Align, false); + int SSFI = MF.getFrameInfo().CreateStackObject( + TySize, DL.getPrefTypeAlign(Ty), false); SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getFrameIndexTy(DL)); Chain = DAG.getTruncStore(Chain, Location, OpInfo.CallOperand, StackSlot, MachinePointerInfo::getFixedStack(MF, SSFI), @@ -8083,13 +8059,13 @@ class ExtraFlags { unsigned Flags = 0; public: - explicit ExtraFlags(ImmutableCallSite CS) { - const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); + explicit ExtraFlags(const CallBase &Call) { + const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand()); if (IA->hasSideEffects()) Flags |= InlineAsm::Extra_HasSideEffects; if (IA->isAlignStack()) Flags |= InlineAsm::Extra_IsAlignStack; - if (CS.isConvergent()) + if (Call.isConvergent()) Flags |= InlineAsm::Extra_IsConvergent; Flags |= IA->getDialect() * InlineAsm::Extra_AsmDialect; } @@ -8116,23 +8092,24 @@ public: } // end anonymous namespace /// visitInlineAsm - Handle a call to an InlineAsm object. -void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { - const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); +void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call) { + const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand()); /// ConstraintOperands - Information about all of the constraints. - SDISelAsmOperandInfoVector ConstraintOperands; + SmallVector<SDISelAsmOperandInfo, 16> ConstraintOperands; const TargetLowering &TLI = DAG.getTargetLoweringInfo(); TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints( - DAG.getDataLayout(), DAG.getSubtarget().getRegisterInfo(), CS); + DAG.getDataLayout(), DAG.getSubtarget().getRegisterInfo(), Call); // First Pass: Calculate HasSideEffects and ExtraFlags (AlignStack, // AsmDialect, MayLoad, MayStore). bool HasSideEffect = IA->hasSideEffects(); - ExtraFlags ExtraInfo(CS); + ExtraFlags ExtraInfo(Call); unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. unsigned ResNo = 0; // ResNo - The result number of the next output. + unsigned NumMatchingOps = 0; for (auto &T : TargetConstraints) { ConstraintOperands.push_back(SDISelAsmOperandInfo(T)); SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back(); @@ -8140,14 +8117,17 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Compute the value type for each operand. if (OpInfo.Type == InlineAsm::isInput || (OpInfo.Type == InlineAsm::isOutput && OpInfo.isIndirect)) { - OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); + OpInfo.CallOperandVal = Call.getArgOperand(ArgNo++); // Process the call argument. BasicBlocks are labels, currently appearing // only in asm's. - const Instruction *I = CS.getInstruction(); - if (isa<CallBrInst>(I) && - (ArgNo - 1) >= (cast<CallBrInst>(I)->getNumArgOperands() - - cast<CallBrInst>(I)->getNumIndirectDests())) { + if (isa<CallBrInst>(Call) && + ArgNo - 1 >= (cast<CallBrInst>(&Call)->getNumArgOperands() - + cast<CallBrInst>(&Call)->getNumIndirectDests() - + NumMatchingOps) && + (NumMatchingOps == 0 || + ArgNo - 1 < (cast<CallBrInst>(&Call)->getNumArgOperands() - + NumMatchingOps))) { const auto *BA = cast<BlockAddress>(OpInfo.CallOperandVal); EVT VT = TLI.getValueType(DAG.getDataLayout(), BA->getType(), true); OpInfo.CallOperand = DAG.getTargetBlockAddress(BA, VT); @@ -8164,20 +8144,23 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } else if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect) { // The return value of the call is this value. As such, there is no // corresponding argument. - assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); - if (StructType *STy = dyn_cast<StructType>(CS.getType())) { + assert(!Call.getType()->isVoidTy() && "Bad inline asm!"); + if (StructType *STy = dyn_cast<StructType>(Call.getType())) { OpInfo.ConstraintVT = TLI.getSimpleValueType( DAG.getDataLayout(), STy->getElementType(ResNo)); } else { assert(ResNo == 0 && "Asm only has one result!"); OpInfo.ConstraintVT = - TLI.getSimpleValueType(DAG.getDataLayout(), CS.getType()); + TLI.getSimpleValueType(DAG.getDataLayout(), Call.getType()); } ++ResNo; } else { OpInfo.ConstraintVT = MVT::Other; } + if (OpInfo.hasMatchingInput()) + ++NumMatchingOps; + if (!HasSideEffect) HasSideEffect = OpInfo.hasMemory(TLI); @@ -8191,9 +8174,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { OpInfo.CallOperand && !isa<ConstantSDNode>(OpInfo.CallOperand)) // We've delayed emitting a diagnostic like the "n" constraint because // inlining could cause an integer showing up. - return emitInlineAsmError( - CS, "constraint '" + Twine(T.ConstraintCode) + "' expects an " - "integer constant expression"); + return emitInlineAsmError(Call, "constraint '" + Twine(T.ConstraintCode) + + "' expects an integer constant " + "expression"); ExtraInfo.update(T); } @@ -8203,7 +8186,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // memory and is nonvolatile. SDValue Flag, Chain = (HasSideEffect) ? getRoot() : DAG.getRoot(); - bool IsCallBr = isa<CallBrInst>(CS.getInstruction()); + bool IsCallBr = isa<CallBrInst>(Call); if (IsCallBr) { // If this is a callbr we need to flush pending exports since inlineasm_br // is a terminator. We need to do this before nodes are glued to @@ -8253,12 +8236,12 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { std::vector<SDValue> AsmNodeOperands; AsmNodeOperands.push_back(SDValue()); // reserve space for input chain AsmNodeOperands.push_back(DAG.getTargetExternalSymbol( - IA->getAsmString().c_str(), TLI.getPointerTy(DAG.getDataLayout()))); + IA->getAsmString().c_str(), TLI.getProgramPointerTy(DAG.getDataLayout()))); // If we have a !srcloc metadata node associated with it, we want to attach // this to the ultimately generated inline asm machineinstr. To do this, we // pass in the third operand as this (potentially null) inline asm MDNode. - const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc"); + const MDNode *SrcLoc = Call.getMetadata("srcloc"); AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc)); // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore @@ -8276,6 +8259,21 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { : OpInfo; GetRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo); + auto DetectWriteToReservedRegister = [&]() { + const MachineFunction &MF = DAG.getMachineFunction(); + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + for (unsigned Reg : OpInfo.AssignedRegs.Regs) { + if (Register::isPhysicalRegister(Reg) && + TRI.isInlineAsmReadOnlyReg(MF, Reg)) { + const char *RegName = TRI.getName(Reg); + emitInlineAsmError(Call, "write to reserved register '" + + Twine(RegName) + "'"); + return true; + } + } + return false; + }; + switch (OpInfo.Type) { case InlineAsm::isOutput: if (OpInfo.ConstraintType == TargetLowering::C_Memory) { @@ -8296,11 +8294,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // C_Immediate/C_Other). Find a register that we can use. if (OpInfo.AssignedRegs.Regs.empty()) { emitInlineAsmError( - CS, "couldn't allocate output register for constraint '" + - Twine(OpInfo.ConstraintCode) + "'"); + Call, "couldn't allocate output register for constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); return; } + if (DetectWriteToReservedRegister()) + return; + // Add information to the INLINEASM node to know that this register is // set. OpInfo.AssignedRegs.AddInlineAsmOperands( @@ -8325,9 +8326,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Add (OpFlag&0xffff)>>3 registers to MatchedRegs. if (OpInfo.isIndirect) { // This happens on gcc/testsuite/gcc.dg/pr8788-1.c - emitInlineAsmError(CS, "inline asm not supported yet:" - " don't know how to handle tied " - "indirect register inputs"); + emitInlineAsmError(Call, "inline asm not supported yet: " + "don't know how to handle tied " + "indirect register inputs"); return; } @@ -8341,8 +8342,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { for (unsigned i = 0; i != NumRegs; ++i) Regs.push_back(RegInfo.createVirtualRegister(RC)); } else { - emitInlineAsmError(CS, "inline asm error: This value type register " - "class is not natively supported!"); + emitInlineAsmError(Call, + "inline asm error: This value type register " + "class is not natively supported!"); return; } @@ -8350,8 +8352,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { SDLoc dl = getCurSDLoc(); // Use the produced MatchedRegs object to - MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag, - CS.getInstruction()); + MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag, &Call); MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, true, OpInfo.getMatchedOperand(), dl, DAG, AsmNodeOperands); @@ -8385,13 +8386,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { if (Ops.empty()) { if (OpInfo.ConstraintType == TargetLowering::C_Immediate) if (isa<ConstantSDNode>(InOperandVal)) { - emitInlineAsmError(CS, "value out of range for constraint '" + - Twine(OpInfo.ConstraintCode) + "'"); + emitInlineAsmError(Call, "value out of range for constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); return; } - emitInlineAsmError(CS, "invalid operand for inline asm constraint '" + - Twine(OpInfo.ConstraintCode) + "'"); + emitInlineAsmError(Call, + "invalid operand for inline asm constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); return; } @@ -8432,23 +8434,27 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // TODO: Support this. if (OpInfo.isIndirect) { emitInlineAsmError( - CS, "Don't know how to handle indirect register inputs yet " - "for constraint '" + - Twine(OpInfo.ConstraintCode) + "'"); + Call, "Don't know how to handle indirect register inputs yet " + "for constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); return; } // Copy the input into the appropriate registers. if (OpInfo.AssignedRegs.Regs.empty()) { - emitInlineAsmError(CS, "couldn't allocate input reg for constraint '" + - Twine(OpInfo.ConstraintCode) + "'"); + emitInlineAsmError(Call, + "couldn't allocate input reg for constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); return; } + if (DetectWriteToReservedRegister()) + return; + SDLoc dl = getCurSDLoc(); - OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, dl, - Chain, &Flag, CS.getInstruction()); + OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Flag, + &Call); OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0, dl, DAG, AsmNodeOperands); @@ -8480,12 +8486,12 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { SmallVector<SDValue, 1> ResultValues; SmallVector<SDValue, 8> OutChains; - llvm::Type *CSResultType = CS.getType(); + llvm::Type *CallResultType = Call.getType(); ArrayRef<Type *> ResultTypes; - if (StructType *StructResult = dyn_cast<StructType>(CSResultType)) + if (StructType *StructResult = dyn_cast<StructType>(CallResultType)) ResultTypes = StructResult->elements(); - else if (!CSResultType->isVoidTy()) - ResultTypes = makeArrayRef(CSResultType); + else if (!CallResultType->isVoidTy()) + ResultTypes = makeArrayRef(CallResultType); auto CurResultType = ResultTypes.begin(); auto handleRegAssign = [&](SDValue V) { @@ -8529,8 +8535,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { switch (OpInfo.ConstraintType) { case TargetLowering::C_Register: case TargetLowering::C_RegisterClass: - Val = OpInfo.AssignedRegs.getCopyFromRegs( - DAG, FuncInfo, getCurSDLoc(), Chain, &Flag, CS.getInstruction()); + Val = OpInfo.AssignedRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), + Chain, &Flag, &Call); break; case TargetLowering::C_Immediate: case TargetLowering::C_Other: @@ -8552,7 +8558,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { OutChains.push_back(Store); } else { // generate CopyFromRegs to associated registers. - assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); + assert(!Call.getType()->isVoidTy() && "Bad inline asm!"); if (Val.getOpcode() == ISD::MERGE_VALUES) { for (const SDValue &V : Val->op_values()) handleRegAssign(V); @@ -8571,7 +8577,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { SDValue V = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), DAG.getVTList(ResultVTs), ResultValues); - setValue(CS.getInstruction(), V); + setValue(&Call, V); } // Collect store chains. @@ -8583,15 +8589,15 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { DAG.setRoot(Chain); } -void SelectionDAGBuilder::emitInlineAsmError(ImmutableCallSite CS, +void SelectionDAGBuilder::emitInlineAsmError(const CallBase &Call, const Twine &Message) { LLVMContext &Ctx = *DAG.getContext(); - Ctx.emitError(CS.getInstruction(), Message); + Ctx.emitError(&Call, Message); // Make sure we leave the DAG in a valid state const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector<EVT, 1> ValueVTs; - ComputeValueVTs(TLI, DAG.getDataLayout(), CS->getType(), ValueVTs); + ComputeValueVTs(TLI, DAG.getDataLayout(), Call.getType(), ValueVTs); if (ValueVTs.empty()) return; @@ -8600,7 +8606,7 @@ void SelectionDAGBuilder::emitInlineAsmError(ImmutableCallSite CS, for (unsigned i = 0, e = ValueVTs.size(); i != e; ++i) Ops.push_back(DAG.getUNDEF(ValueVTs[i])); - setValue(CS.getInstruction(), DAG.getMergeValues(Ops, getCurSDLoc())); + setValue(&Call, DAG.getMergeValues(Ops, getCurSDLoc())); } void SelectionDAGBuilder::visitVAStart(const CallInst &I) { @@ -8616,7 +8622,7 @@ void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { SDValue V = DAG.getVAArg( TLI.getMemValueType(DAG.getDataLayout(), I.getType()), getCurSDLoc(), getRoot(), getValue(I.getOperand(0)), DAG.getSrcValue(I.getOperand(0)), - DL.getABITypeAlignment(I.getType())); + DL.getABITypeAlign(I.getType()).value()); DAG.setRoot(V.getValue(1)); if (I.getType()->isPointerTy()) @@ -8711,7 +8717,9 @@ void SelectionDAGBuilder::populateCallLoweringInfo( .setChain(getRoot()) .setCallee(Call->getCallingConv(), ReturnTy, Callee, std::move(Args)) .setDiscardResult(Call->use_empty()) - .setIsPatchPoint(IsPatchPoint); + .setIsPatchPoint(IsPatchPoint) + .setIsPreallocated( + Call->countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0); } /// Add a stack map intrinsic call's live variable operands to a stackmap @@ -8731,11 +8739,11 @@ void SelectionDAGBuilder::populateCallLoweringInfo( /// assumption made by the llvm.gcroot intrinsic). If the alloca's location were /// only available in a register, then the runtime would need to trap when /// execution reaches the StackMap in order to read the alloca's location. -static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx, +static void addStackMapLiveVars(const CallBase &Call, unsigned StartIdx, const SDLoc &DL, SmallVectorImpl<SDValue> &Ops, SelectionDAGBuilder &Builder) { - for (unsigned i = StartIdx, e = CS.arg_size(); i != e; ++i) { - SDValue OpVal = Builder.getValue(CS.getArgument(i)); + for (unsigned i = StartIdx, e = Call.arg_size(); i != e; ++i) { + SDValue OpVal = Builder.getValue(Call.getArgOperand(i)); if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) { Ops.push_back( Builder.DAG.getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64)); @@ -8761,7 +8769,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { SmallVector<SDValue, 32> Ops; SDLoc DL = getCurSDLoc(); - Callee = getValue(CI.getCalledValue()); + Callee = getValue(CI.getCalledOperand()); NullPtr = DAG.getIntPtrConstant(0, DL, true); // The stackmap intrinsic only records the live variables (the arguments @@ -8787,7 +8795,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { MVT::i32)); // Push live variables for the stack map. - addStackMapLiveVars(&CI, 2, DL, Ops, *this); + addStackMapLiveVars(CI, 2, DL, Ops, *this); // We are not pushing any register mask info here on the operands list, // because the stackmap doesn't clobber anything. @@ -8814,7 +8822,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { } /// Lower llvm.experimental.patchpoint directly to its target opcode. -void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, +void SelectionDAGBuilder::visitPatchpoint(const CallBase &CB, const BasicBlock *EHPadBB) { // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>, // i32 <numBytes>, @@ -8823,11 +8831,11 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, // [Args...], // [live variables...]) - CallingConv::ID CC = CS.getCallingConv(); + CallingConv::ID CC = CB.getCallingConv(); bool IsAnyRegCC = CC == CallingConv::AnyReg; - bool HasDef = !CS->getType()->isVoidTy(); + bool HasDef = !CB.getType()->isVoidTy(); SDLoc dl = getCurSDLoc(); - SDValue Callee = getValue(CS->getOperand(PatchPointOpers::TargetPos)); + SDValue Callee = getValue(CB.getArgOperand(PatchPointOpers::TargetPos)); // Handle immediate and symbolic callees. if (auto* ConstCallee = dyn_cast<ConstantSDNode>(Callee)) @@ -8839,23 +8847,23 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, SymbolicCallee->getValueType(0)); // Get the real number of arguments participating in the call <numArgs> - SDValue NArgVal = getValue(CS.getArgument(PatchPointOpers::NArgPos)); + SDValue NArgVal = getValue(CB.getArgOperand(PatchPointOpers::NArgPos)); unsigned NumArgs = cast<ConstantSDNode>(NArgVal)->getZExtValue(); // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs> // Intrinsics include all meta-operands up to but not including CC. unsigned NumMetaOpers = PatchPointOpers::CCPos; - assert(CS.arg_size() >= NumMetaOpers + NumArgs && + assert(CB.arg_size() >= NumMetaOpers + NumArgs && "Not enough arguments provided to the patchpoint intrinsic"); // For AnyRegCC the arguments are lowered later on manually. unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs; Type *ReturnTy = - IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CS->getType(); + IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CB.getType(); TargetLowering::CallLoweringInfo CLI(DAG); - populateCallLoweringInfo(CLI, cast<CallBase>(CS.getInstruction()), - NumMetaOpers, NumCallArgs, Callee, ReturnTy, true); + populateCallLoweringInfo(CLI, &CB, NumMetaOpers, NumCallArgs, Callee, + ReturnTy, true); std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB); SDNode *CallEnd = Result.second.getNode(); @@ -8873,10 +8881,10 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, SmallVector<SDValue, 8> Ops; // Add the <id> and <numBytes> constants. - SDValue IDVal = getValue(CS->getOperand(PatchPointOpers::IDPos)); + SDValue IDVal = getValue(CB.getArgOperand(PatchPointOpers::IDPos)); Ops.push_back(DAG.getTargetConstant( cast<ConstantSDNode>(IDVal)->getZExtValue(), dl, MVT::i64)); - SDValue NBytesVal = getValue(CS->getOperand(PatchPointOpers::NBytesPos)); + SDValue NBytesVal = getValue(CB.getArgOperand(PatchPointOpers::NBytesPos)); Ops.push_back(DAG.getTargetConstant( cast<ConstantSDNode>(NBytesVal)->getZExtValue(), dl, MVT::i32)); @@ -8898,14 +8906,14 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, // place these in any free register. if (IsAnyRegCC) for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i) - Ops.push_back(getValue(CS.getArgument(i))); + Ops.push_back(getValue(CB.getArgOperand(i))); // Push the arguments from the call instruction up to the register mask. SDNode::op_iterator e = HasGlue ? Call->op_end()-2 : Call->op_end()-1; Ops.append(Call->op_begin() + 2, e); // Push live variables for the stack map. - addStackMapLiveVars(CS, NumMetaOpers + NumArgs, dl, Ops, *this); + addStackMapLiveVars(CB, NumMetaOpers + NumArgs, dl, Ops, *this); // Push the register mask info. if (HasGlue) @@ -8926,7 +8934,7 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, // Create the return types based on the intrinsic definition const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SmallVector<EVT, 3> ValueVTs; - ComputeValueVTs(TLI, DAG.getDataLayout(), CS->getType(), ValueVTs); + ComputeValueVTs(TLI, DAG.getDataLayout(), CB.getType(), ValueVTs); assert(ValueVTs.size() == 1 && "Expected only one return value type."); // There is always a chain and a glue type at the end @@ -8943,9 +8951,9 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, // Update the NodeMap. if (HasDef) { if (IsAnyRegCC) - setValue(CS.getInstruction(), SDValue(MN, 0)); + setValue(&CB, SDValue(MN, 0)); else - setValue(CS.getInstruction(), Result.first); + setValue(&CB, Result.first); } // Fixup the consumers of the intrinsic. The chain and glue may be used in the @@ -9094,9 +9102,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { // assert(!CS.hasInAllocaArgument() && // "sret demotion is incompatible with inalloca"); uint64_t TySize = DL.getTypeAllocSize(CLI.RetTy); - unsigned Align = DL.getPrefTypeAlignment(CLI.RetTy); + Align Alignment = DL.getPrefTypeAlign(CLI.RetTy); MachineFunction &MF = CLI.DAG.getMachineFunction(); - DemoteStackIdx = MF.getFrameInfo().CreateStackObject(TySize, Align, false); + DemoteStackIdx = + MF.getFrameInfo().CreateStackObject(TySize, Alignment, false); Type *StackSlotPtrType = PointerType::get(CLI.RetTy, DL.getAllocaAddrSpace()); @@ -9114,7 +9123,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Entry.IsSwiftSelf = false; Entry.IsSwiftError = false; Entry.IsCFGuardTarget = false; - Entry.Alignment = Align; + Entry.Alignment = Alignment; CLI.getArgs().insert(CLI.getArgs().begin(), Entry); CLI.NumFixedArgs += 1; CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext()); @@ -9230,6 +9239,15 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Flags.setCFGuardTarget(); if (Args[i].IsByVal) Flags.setByVal(); + if (Args[i].IsPreallocated) { + Flags.setPreallocated(); + // Set the byval flag for CCAssignFn callbacks that don't know about + // preallocated. This way we can know how many bytes we should've + // allocated and how many bytes a callee cleanup function will pop. If + // we port preallocated to more targets, we'll have to add custom + // preallocated handling in the various CC lowering callbacks. + Flags.setByVal(); + } if (Args[i].IsInAlloca) { Flags.setInAlloca(); // Set the byval flag for CCAssignFn callbacks that don't know about @@ -9239,7 +9257,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { // in the various CC lowering callbacks. Flags.setByVal(); } - if (Args[i].IsByVal || Args[i].IsInAlloca) { + if (Args[i].IsByVal || Args[i].IsInAlloca || Args[i].IsPreallocated) { PointerType *Ty = cast<PointerType>(Args[i].Ty); Type *ElementTy = Ty->getElementType(); @@ -9248,12 +9266,12 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Flags.setByValSize(FrameSize); // info is not there but there are cases it cannot get right. - unsigned FrameAlign; - if (Args[i].Alignment) - FrameAlign = Args[i].Alignment; + Align FrameAlign; + if (auto MA = Args[i].Alignment) + FrameAlign = *MA; else - FrameAlign = getByValTypeAlignment(ElementTy, DL); - Flags.setByValAlign(Align(FrameAlign)); + FrameAlign = Align(getByValTypeAlignment(ElementTy, DL)); + Flags.setByValAlign(FrameAlign); } if (Args[i].IsNest) Flags.setNest(); @@ -9298,8 +9316,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Flags.setReturned(); } - getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT, - CLI.CS.getInstruction(), CLI.CallConv, ExtendKind); + getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT, CLI.CB, + CLI.CallConv, ExtendKind); for (unsigned j = 0; j != NumParts; ++j) { // if it isn't first piece, alignment must be 1 @@ -9311,7 +9329,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { if (NumParts > 1 && j == 0) MyFlags.Flags.setSplit(); else if (j != 0) { - MyFlags.Flags.setOrigAlign(Align::None()); + MyFlags.Flags.setOrigAlign(Align(1)); if (j == NumParts - 1) MyFlags.Flags.setSplitEnd(); } @@ -9376,6 +9394,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { SDNodeFlags Flags; Flags.setNoUnsignedWrap(true); + MachineFunction &MF = CLI.DAG.getMachineFunction(); + Align HiddenSRetAlign = MF.getFrameInfo().getObjectAlign(DemoteStackIdx); for (unsigned i = 0; i < NumValues; ++i) { SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot, CLI.DAG.getConstant(Offsets[i], CLI.DL, @@ -9384,7 +9404,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { RetTys[i], CLI.DL, CLI.Chain, Add, MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(), DemoteStackIdx, Offsets[i]), - /* Alignment = */ 1); + HiddenSRetAlign); ReturnValues[i] = L; Chains[i] = L.getValue(1); } @@ -9551,7 +9571,7 @@ findArgumentCopyElisionCandidates(const DataLayout &DL, // initializes the alloca. Don't elide copies from the same argument twice. const Value *Val = SI->getValueOperand()->stripPointerCasts(); const auto *Arg = dyn_cast<Argument>(Val); - if (!Arg || Arg->hasInAllocaAttr() || Arg->hasByValAttr() || + if (!Arg || Arg->hasPassPointeeByValueAttr() || Arg->getType()->isEmptyTy() || DL.getTypeStoreSize(Arg->getType()) != DL.getTypeAllocSize(AI->getAllocatedType()) || @@ -9607,16 +9627,12 @@ static void tryToElideArgumentCopy( "object size\n"); return; } - unsigned RequiredAlignment = AI->getAlignment(); - if (!RequiredAlignment) { - RequiredAlignment = FuncInfo.MF->getDataLayout().getABITypeAlignment( - AI->getAllocatedType()); - } - if (MFI.getObjectAlignment(FixedIndex) < RequiredAlignment) { + Align RequiredAlignment = AI->getAlign(); + if (MFI.getObjectAlign(FixedIndex) < RequiredAlignment) { LLVM_DEBUG(dbgs() << " argument copy elision failed: alignment of alloca " "greater than stack argument alignment (" - << RequiredAlignment << " vs " - << MFI.getObjectAlignment(FixedIndex) << ")\n"); + << DebugStr(RequiredAlignment) << " vs " + << DebugStr(MFI.getObjectAlign(FixedIndex)) << ")\n"); return; } @@ -9653,6 +9669,10 @@ void SelectionDAGISel::LowerArguments(const Function &F) { const DataLayout &DL = DAG.getDataLayout(); SmallVector<ISD::InputArg, 16> Ins; + // In Naked functions we aren't going to save any registers. + if (F.hasFnAttribute(Attribute::Naked)) + return; + if (!FuncInfo->CanLowerReturn) { // Put in an sret pointer parameter before all the other parameters. SmallVector<EVT, 1> ValueVTs; @@ -9741,12 +9761,21 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // in the various CC lowering callbacks. Flags.setByVal(); } + if (Arg.hasAttribute(Attribute::Preallocated)) { + Flags.setPreallocated(); + // Set the byval flag for CCAssignFn callbacks that don't know about + // preallocated. This way we can know how many bytes we should've + // allocated and how many bytes a callee cleanup function will pop. If + // we port preallocated to more targets, we'll have to add custom + // preallocated handling in the various CC lowering callbacks. + Flags.setByVal(); + } if (F.getCallingConv() == CallingConv::X86_INTR) { // IA Interrupt passes frame (1st parameter) by value in the stack. if (ArgNo == 0) Flags.setByVal(); } - if (Flags.isByVal() || Flags.isInAlloca()) { + if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated()) { Type *ElementTy = Arg.getParamByValType(); // For ByVal, size and alignment should be passed from FE. BE will @@ -9786,7 +9815,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { MyFlags.Flags.setSplit(); // if it isn't first piece, alignment must be 1 else if (i > 0) { - MyFlags.Flags.setOrigAlign(Align::None()); + MyFlags.Flags.setOrigAlign(Align(1)); if (i == NumRegs - 1) MyFlags.Flags.setSplitEnd(); } @@ -9988,7 +10017,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { } // Finally, if the target has anything special to do, allow it to do so. - EmitFunctionEntryCode(); + emitFunctionEntryCode(); } /// Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to @@ -10040,7 +10069,7 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { } Reg = RegOut; } else { - DenseMap<const Value *, unsigned>::iterator I = + DenseMap<const Value *, Register>::iterator I = FuncInfo.ValueMap.find(PHIOp); if (I != FuncInfo.ValueMap.end()) Reg = I->second; @@ -10654,6 +10683,19 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { } void SelectionDAGBuilder::visitFreeze(const FreezeInst &I) { - SDValue N = getValue(I.getOperand(0)); - setValue(&I, N); + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(), + ValueVTs); + unsigned NumValues = ValueVTs.size(); + if (NumValues == 0) return; + + SmallVector<SDValue, 4> Values(NumValues); + SDValue Op = getValue(I.getOperand(0)); + + for (unsigned i = 0; i != NumValues; ++i) + Values[i] = DAG.getNode(ISD::FREEZE, getCurSDLoc(), ValueVTs[i], + SDValue(Op.getNode(), Op.getResNo() + i)); + + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), + DAG.getVTList(ValueVTs), Values)); } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 18e0edf7fc047..f0b7fb0d52299 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -14,19 +14,16 @@ #define LLVM_LIB_CODEGEN_SELECTIONDAG_SELECTIONDAGBUILDER_H #include "StatepointLowering.h" -#include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/ISDOpcodes.h" -#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/SwitchLoweringUtils.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/ValueTypes.h" -#include "llvm/IR/CallSite.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Statepoint.h" @@ -55,7 +52,6 @@ class CatchSwitchInst; class CleanupPadInst; class CleanupReturnInst; class Constant; -class ConstantInt; class ConstrainedFPIntrinsic; class DbgValueInst; class DataLayout; @@ -77,6 +73,7 @@ class PHINode; class ResumeInst; class ReturnInst; class SDDbgValue; +class SelectionDAG; class StoreInst; class SwiftErrorValueTracking; class SwitchInst; @@ -409,6 +406,8 @@ public: SelectionDAGBuilder *SDB; }; + // Data related to deferred switch lowerings. Used to construct additional + // Basic Blocks in SelectionDAGISel::FinishBasicBlock. std::unique_ptr<SDAGSwitchLowering> SL; /// A StackProtectorDescriptor structure used to communicate stack protector @@ -518,7 +517,6 @@ public: void resolveOrClearDbgInfo(); SDValue getValue(const Value *V); - bool findValue(const Value *V) const; /// Return the SDNode for the specified IR value if it exists. SDNode *getNodeForIRValue(const Value *V) { @@ -557,7 +555,7 @@ public: bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB); void CopyToExportRegsIfNeeded(const Value *V); void ExportFromCurrentBlock(const Value *V); - void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall, + void LowerCallTo(const CallBase &CB, SDValue Callee, bool IsTailCall, const BasicBlock *EHPadBB = nullptr); // Lower range metadata from 0 to N to assert zext to an integer of nearest @@ -627,7 +625,7 @@ public: // This function is responsible for the whole statepoint lowering process. // It uniformly handles invoke and call statepoints. - void LowerStatepoint(ImmutableStatepoint ISP, + void LowerStatepoint(const GCStatepointInst &I, const BasicBlock *EHPadBB = nullptr); void LowerCallSiteWithDeoptBundle(const CallBase *Call, SDValue Callee, @@ -764,7 +762,7 @@ private: void visitStoreToSwiftError(const StoreInst &I); void visitFreeze(const FreezeInst &I); - void visitInlineAsm(ImmutableCallSite CS); + void visitInlineAsm(const CallBase &Call); void visitIntrinsicCall(const CallInst &I, unsigned Intrinsic); void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic); void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI); @@ -774,8 +772,7 @@ private: void visitVAEnd(const CallInst &I); void visitVACopy(const CallInst &I); void visitStackmap(const CallInst &I); - void visitPatchpoint(ImmutableCallSite CS, - const BasicBlock *EHPadBB = nullptr); + void visitPatchpoint(const CallBase &CB, const BasicBlock *EHPadBB = nullptr); // These two are implemented in StatepointLowering.cpp void visitGCRelocate(const GCRelocateInst &Relocate); @@ -795,7 +792,7 @@ private: void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB); - void emitInlineAsmError(ImmutableCallSite CS, const Twine &Message); + void emitInlineAsmError(const CallBase &Call, const Twine &Message); /// If V is an function argument then create corresponding DBG_VALUE machine /// instruction for it now. At the end of instruction selection, they will be diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 6fd71393bf38c..42e3016e65b83 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -65,7 +65,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { if (G) if (const TargetInstrInfo *TII = G->getSubtarget().getInstrInfo()) if (getMachineOpcode() < TII->getNumOpcodes()) - return TII->getName(getMachineOpcode()); + return std::string(TII->getName(getMachineOpcode())); return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>"; } if (G) { @@ -106,6 +106,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::TokenFactor: return "TokenFactor"; case ISD::AssertSext: return "AssertSext"; case ISD::AssertZext: return "AssertZext"; + case ISD::AssertAlign: return "AssertAlign"; case ISD::BasicBlock: return "BasicBlock"; case ISD::VALUETYPE: return "ValueType"; @@ -170,6 +171,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::CopyToReg: return "CopyToReg"; case ISD::CopyFromReg: return "CopyFromReg"; case ISD::UNDEF: return "undef"; + case ISD::VSCALE: return "vscale"; case ISD::MERGE_VALUES: return "merge_values"; case ISD::INLINEASM: return "inlineasm"; case ISD::INLINEASM_BR: return "inlineasm_br"; @@ -210,6 +212,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::STRICT_FNEARBYINT: return "strict_fnearbyint"; case ISD::FROUND: return "fround"; case ISD::STRICT_FROUND: return "strict_fround"; + case ISD::FROUNDEVEN: return "froundeven"; + case ISD::STRICT_FROUNDEVEN: return "strict_froundeven"; case ISD::FEXP: return "fexp"; case ISD::STRICT_FEXP: return "strict_fexp"; case ISD::FEXP2: return "fexp2"; @@ -313,7 +317,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::UMULFIXSAT: return "umulfixsat"; case ISD::SDIVFIX: return "sdivfix"; + case ISD::SDIVFIXSAT: return "sdivfixsat"; case ISD::UDIVFIX: return "udivfix"; + case ISD::UDIVFIXSAT: return "udivfixsat"; // Conversion operators. case ISD::SIGN_EXTEND: return "sign_extend"; @@ -341,7 +347,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::BITCAST: return "bitcast"; case ISD::ADDRSPACECAST: return "addrspacecast"; case ISD::FP16_TO_FP: return "fp16_to_fp"; + case ISD::STRICT_FP16_TO_FP: return "strict_fp16_to_fp"; case ISD::FP_TO_FP16: return "fp_to_fp16"; + case ISD::STRICT_FP_TO_FP16: return "strict_fp_to_fp16"; case ISD::LROUND: return "lround"; case ISD::STRICT_LROUND: return "strict_lround"; case ISD::LLROUND: return "llround"; @@ -387,6 +395,11 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::GC_TRANSITION_START: return "gc_transition.start"; case ISD::GC_TRANSITION_END: return "gc_transition.end"; case ISD::GET_DYNAMIC_AREA_OFFSET: return "get.dynamic.area.offset"; + case ISD::FREEZE: return "freeze"; + case ISD::PREALLOCATED_SETUP: + return "call_setup"; + case ISD::PREALLOCATED_ARG: + return "call_alloc"; // Bit manipulation case ISD::ABS: return "abs"; @@ -547,9 +560,6 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { if (getFlags().hasAllowReassociation()) OS << " reassoc"; - if (getFlags().hasVectorReduction()) - OS << " vector-reduction"; - if (getFlags().hasNoFPExcept()) OS << " nofpexcept"; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 6c57c72d47a7f..1f0432196a2d7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -215,6 +215,7 @@ namespace llvm { OptLevelChanger(SelectionDAGISel &ISel, CodeGenOpt::Level NewOptLevel) : IS(ISel) { SavedOptLevel = IS.OptLevel; + SavedFastISel = IS.TM.Options.EnableFastISel; if (NewOptLevel == SavedOptLevel) return; IS.OptLevel = NewOptLevel; @@ -223,7 +224,6 @@ namespace llvm { << IS.MF->getFunction().getName() << "\n"); LLVM_DEBUG(dbgs() << "\tBefore: -O" << SavedOptLevel << " ; After: -O" << NewOptLevel << "\n"); - SavedFastISel = IS.TM.Options.EnableFastISel; if (NewOptLevel == CodeGenOpt::None) { IS.TM.setFastISel(IS.TM.getO0WantsFastISel()); LLVM_DEBUG( @@ -337,7 +337,8 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { if (UseMBPI && OptLevel != CodeGenOpt::None) AU.addRequired<BranchProbabilityInfoWrapperPass>(); AU.addRequired<ProfileSummaryInfoWrapperPass>(); - LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU); + if (OptLevel != CodeGenOpt::None) + LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU); MachineFunctionPass::getAnalysisUsage(AU); } @@ -441,9 +442,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>(); LoopInfo *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); - auto *BFI = (PSI && PSI->hasProfileSummary()) ? - &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() : - nullptr; + BlockFrequencyInfo *BFI = nullptr; + if (PSI && PSI->hasProfileSummary() && OptLevel != CodeGenOpt::None) + BFI = &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI(); LLVM_DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); @@ -513,15 +514,15 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // registers. If we don't apply the reg fixups before, some registers may // appear as unused and will be skipped, resulting in bad MI. MachineRegisterInfo &MRI = MF->getRegInfo(); - for (DenseMap<unsigned, unsigned>::iterator I = FuncInfo->RegFixups.begin(), + for (DenseMap<Register, Register>::iterator I = FuncInfo->RegFixups.begin(), E = FuncInfo->RegFixups.end(); I != E; ++I) { - unsigned From = I->first; - unsigned To = I->second; + Register From = I->first; + Register To = I->second; // If To is also scheduled to be replaced, find what its ultimate // replacement is. while (true) { - DenseMap<unsigned, unsigned>::iterator J = FuncInfo->RegFixups.find(To); + DenseMap<Register, Register>::iterator J = FuncInfo->RegFixups.find(To); if (J == E) break; To = J->second; @@ -622,7 +623,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // Otherwise this is another use or second copy use. CopyUseMI = nullptr; break; } - if (CopyUseMI) { + if (CopyUseMI && + TRI.getRegSizeInBits(LDI->second, MRI) == + TRI.getRegSizeInBits(CopyUseMI->getOperand(0).getReg(), MRI)) { // Use MI's debug location, which describes where Variable was // declared, rather than whatever is attached to CopyUseMI. MachineInstr *NewMI = @@ -658,36 +661,6 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // Determine if floating point is used for msvc computeUsesMSVCFloatingPoint(TM.getTargetTriple(), Fn, MF->getMMI()); - // Replace forward-declared registers with the registers containing - // the desired value. - for (DenseMap<unsigned, unsigned>::iterator - I = FuncInfo->RegFixups.begin(), E = FuncInfo->RegFixups.end(); - I != E; ++I) { - unsigned From = I->first; - unsigned To = I->second; - // If To is also scheduled to be replaced, find what its ultimate - // replacement is. - while (true) { - DenseMap<unsigned, unsigned>::iterator J = FuncInfo->RegFixups.find(To); - if (J == E) break; - To = J->second; - } - // Make sure the new register has a sufficiently constrained register class. - if (Register::isVirtualRegister(From) && Register::isVirtualRegister(To)) - MRI.constrainRegClass(To, MRI.getRegClass(From)); - // Replace it. - - - // Replacing one register with another won't touch the kill flags. - // We need to conservatively clear the kill flags as a kill on the old - // register might dominate existing uses of the new register. - if (!MRI.use_empty(To)) - MRI.clearKillFlags(From); - MRI.replaceRegWith(From, To); - } - - TLI->finalizeLowering(*MF); - // Release function-specific state. SDB and CurDAG are already cleared // at this point. FuncInfo->clear(); @@ -1321,8 +1294,11 @@ static void processDbgDeclares(FunctionLoweringInfo &FuncInfo) { assert(DI->getVariable() && "Missing variable"); assert(DI->getDebugLoc() && "Missing location"); const Value *Address = DI->getAddress(); - if (!Address) + if (!Address) { + LLVM_DEBUG(dbgs() << "processDbgDeclares skipping " << *DI + << " (bad address)\n"); continue; + } // Look through casts and constant offset GEPs. These mostly come from // inalloca. @@ -1347,6 +1323,8 @@ static void processDbgDeclares(FunctionLoweringInfo &FuncInfo) { if (Offset.getBoolValue()) Expr = DIExpression::prepend(Expr, DIExpression::ApplyOffset, Offset.getZExtValue()); + LLVM_DEBUG(dbgs() << "processDbgDeclares: setVariableDbgInfo FI=" << FI + << ", " << *DI << "\n"); MF->setVariableDbgInfo(DI->getVariable(), Expr, FI, DI->getDebugLoc()); } } @@ -1513,8 +1491,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // to keep track of gc-relocates for a particular gc-statepoint. This is // done by SelectionDAGBuilder::LowerAsSTATEPOINT, called before // visitGCRelocate. - if (isa<CallInst>(Inst) && !isStatepoint(Inst) && !isGCRelocate(Inst) && - !isGCResult(Inst)) { + if (isa<CallInst>(Inst) && !isa<GCStatepointInst>(Inst) && + !isa<GCRelocateInst>(Inst) && !isa<GCResultInst>(Inst)) { OptimizationRemarkMissed R("sdagisel", "FastISelFailure", Inst->getDebugLoc(), LLVMBB); @@ -1532,7 +1510,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { if (!Inst->getType()->isVoidTy() && !Inst->getType()->isTokenTy() && !Inst->use_empty()) { - unsigned &R = FuncInfo->ValueMap[Inst]; + Register &R = FuncInfo->ValueMap[Inst]; if (!R) R = FuncInfo->CreateRegs(Inst); } @@ -2234,14 +2212,14 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, return !findNonImmUse(Root, N.getNode(), U, IgnoreChains); } -void SelectionDAGISel::Select_INLINEASM(SDNode *N, bool Branch) { +void SelectionDAGISel::Select_INLINEASM(SDNode *N) { SDLoc DL(N); std::vector<SDValue> Ops(N->op_begin(), N->op_end()); SelectInlineAsmMemoryOperands(Ops, DL); const EVT VTs[] = {MVT::Other, MVT::Glue}; - SDValue New = CurDAG->getNode(Branch ? ISD::INLINEASM_BR : ISD::INLINEASM, DL, VTs, Ops); + SDValue New = CurDAG->getNode(N->getOpcode(), DL, VTs, Ops); New->setNodeId(-1); ReplaceUses(N, New.getNode()); CurDAG->RemoveDeadNode(N); @@ -2285,6 +2263,14 @@ void SelectionDAGISel::Select_UNDEF(SDNode *N) { CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0)); } +void SelectionDAGISel::Select_FREEZE(SDNode *N) { + // TODO: We don't have FREEZE pseudo-instruction in MachineInstr-level now. + // If FREEZE instruction is added later, the code below must be changed as + // well. + CurDAG->SelectNodeTo(N, TargetOpcode::COPY, N->getValueType(0), + N->getOperand(0)); +} + /// GetVBR - decode a vbr encoding whose top bit is set. LLVM_ATTRIBUTE_ALWAYS_INLINE static inline uint64_t GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) { @@ -2804,13 +2790,13 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, return; case ISD::AssertSext: case ISD::AssertZext: + case ISD::AssertAlign: ReplaceUses(SDValue(NodeToMatch, 0), NodeToMatch->getOperand(0)); CurDAG->RemoveDeadNode(NodeToMatch); return; case ISD::INLINEASM: case ISD::INLINEASM_BR: - Select_INLINEASM(NodeToMatch, - NodeToMatch->getOpcode() == ISD::INLINEASM_BR); + Select_INLINEASM(NodeToMatch); return; case ISD::READ_REGISTER: Select_READ_REGISTER(NodeToMatch); @@ -2821,6 +2807,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, case ISD::UNDEF: Select_UNDEF(NodeToMatch); return; + case ISD::FREEZE: + Select_FREEZE(NodeToMatch); + return; } assert(!NodeToMatch->isMachineOpcode() && "Node already selected!"); @@ -3693,12 +3682,11 @@ bool SelectionDAGISel::isOrEquivalentToAdd(const SDNode *N) const { // Detect when "or" is used to add an offset to a stack object. if (auto *FN = dyn_cast<FrameIndexSDNode>(N->getOperand(0))) { MachineFrameInfo &MFI = MF->getFrameInfo(); - unsigned A = MFI.getObjectAlignment(FN->getIndex()); - assert(isPowerOf2_32(A) && "Unexpected alignment"); + Align A = MFI.getObjectAlign(FN->getIndex()); int32_t Off = C->getSExtValue(); // If the alleged offset fits in the zero bits guaranteed by // the alignment, then this or is really an add. - return (Off >= 0) && (((A - 1) & Off) == unsigned(Off)); + return (Off >= 0) && (((A.value() - 1) & Off) == unsigned(Off)); } return false; } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index cdc09d59f6a49..059a6baf967af 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -70,7 +70,7 @@ namespace llvm { } static std::string getGraphName(const SelectionDAG *G) { - return G->getMachineFunction().getName(); + return std::string(G->getMachineFunction().getName()); } static bool renderGraphFromBottomUp() { @@ -164,6 +164,20 @@ void SelectionDAG::viewGraph() { viewGraph(""); } +/// Just dump dot graph to a user-provided path and title. +/// This doesn't open the dot viewer program and +/// helps visualization when outside debugging session. +/// FileName expects absolute path. If provided +/// without any path separators then the file +/// will be created in the current directory. +/// Error will be emitted if the path is insane. +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void SelectionDAG::dumpDotGraph(const Twine &FileName, + const Twine &Title) { + dumpDotGraphToFile(this, FileName, Title); +} +#endif + /// clearGraphAttrs - Clear all previously defined node graph attributes. /// Intended to be used from a debugging tool (eg. gdb). void SelectionDAG::clearGraphAttrs() { diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index c628f379e415d..2cb57c1d1ccc8 100644 --- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" @@ -41,6 +42,7 @@ #include "llvm/IR/Statepoint.h" #include "llvm/IR/Type.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/MachineValueType.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" @@ -61,6 +63,10 @@ STATISTIC(NumOfStatepoints, "Number of statepoint nodes encountered"); STATISTIC(StatepointMaxSlotsRequired, "Maximum number of stack slots required for a singe statepoint"); +cl::opt<bool> UseRegistersForDeoptValues( + "use-registers-for-deopt-values", cl::Hidden, cl::init(false), + cl::desc("Allow using registers for non pointer deopt args")); + static void pushStackMapConstant(SmallVectorImpl<SDValue>& Ops, SelectionDAGBuilder &Builder, uint64_t Value) { SDLoc L = Builder.getCurSDLoc(); @@ -215,6 +221,28 @@ static Optional<int> findPreviousSpillSlot(const Value *Val, return None; } + +/// Return true if-and-only-if the given SDValue can be lowered as either a +/// constant argument or a stack reference. The key point is that the value +/// doesn't need to be spilled or tracked as a vreg use. +static bool willLowerDirectly(SDValue Incoming) { + // We are making an unchecked assumption that the frame size <= 2^16 as that + // is the largest offset which can be encoded in the stackmap format. + if (isa<FrameIndexSDNode>(Incoming)) + return true; + + // The largest constant describeable in the StackMap format is 64 bits. + // Potential Optimization: Constants values are sign extended by consumer, + // and thus there are many constants of static type > 64 bits whose value + // happens to be sext(Con64) and could thus be lowered directly. + if (Incoming.getValueType().getSizeInBits() > 64) + return false; + + return (isa<ConstantSDNode>(Incoming) || isa<ConstantFPSDNode>(Incoming) || + Incoming.isUndef()); +} + + /// Try to find existing copies of the incoming values in stack slots used for /// statepoint spilling. If we can find a spill slot for the incoming value, /// mark that slot as allocated, and reuse the same slot for this safepoint. @@ -224,11 +252,10 @@ static void reservePreviousStackSlotForValue(const Value *IncomingValue, SelectionDAGBuilder &Builder) { SDValue Incoming = Builder.getValue(IncomingValue); - if (isa<ConstantSDNode>(Incoming) || isa<FrameIndexSDNode>(Incoming)) { - // We won't need to spill this, so no need to check for previously - // allocated stack slots + // If we won't spill this, we don't need to check for previously allocated + // stack slots. + if (willLowerDirectly(Incoming)) return; - } SDValue OldLocation = Builder.StatepointLowering.getLocation(Incoming); if (OldLocation.getNode()) @@ -268,45 +295,6 @@ static void reservePreviousStackSlotForValue(const Value *IncomingValue, Builder.StatepointLowering.setLocation(Incoming, Loc); } -/// Remove any duplicate (as SDValues) from the derived pointer pairs. This -/// is not required for correctness. It's purpose is to reduce the size of -/// StackMap section. It has no effect on the number of spill slots required -/// or the actual lowering. -static void -removeDuplicateGCPtrs(SmallVectorImpl<const Value *> &Bases, - SmallVectorImpl<const Value *> &Ptrs, - SmallVectorImpl<const GCRelocateInst *> &Relocs, - SelectionDAGBuilder &Builder, - FunctionLoweringInfo::StatepointSpillMap &SSM) { - DenseMap<SDValue, const Value *> Seen; - - SmallVector<const Value *, 64> NewBases, NewPtrs; - SmallVector<const GCRelocateInst *, 64> NewRelocs; - for (size_t i = 0, e = Ptrs.size(); i < e; i++) { - SDValue SD = Builder.getValue(Ptrs[i]); - auto SeenIt = Seen.find(SD); - - if (SeenIt == Seen.end()) { - // Only add non-duplicates - NewBases.push_back(Bases[i]); - NewPtrs.push_back(Ptrs[i]); - NewRelocs.push_back(Relocs[i]); - Seen[SD] = Ptrs[i]; - } else { - // Duplicate pointer found, note in SSM and move on: - SSM.DuplicateMap[Ptrs[i]] = SeenIt->second; - } - } - assert(Bases.size() >= NewBases.size()); - assert(Ptrs.size() >= NewPtrs.size()); - assert(Relocs.size() >= NewRelocs.size()); - Bases = NewBases; - Ptrs = NewPtrs; - Relocs = NewRelocs; - assert(Ptrs.size() == Bases.size()); - assert(Ptrs.size() == Relocs.size()); -} - /// Extract call from statepoint, lower it and return pointer to the /// call node. Also update NodeMap so that getValue(statepoint) will /// reference lowered call result @@ -353,9 +341,9 @@ static MachineMemOperand* getMachineMemOperand(MachineFunction &MF, auto MMOFlags = MachineMemOperand::MOStore | MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile; auto &MFI = MF.getFrameInfo(); - return MF.getMachineMemOperand(PtrInfo, MMOFlags, + return MF.getMachineMemOperand(PtrInfo, MMOFlags, MFI.getObjectSize(FI.getIndex()), - MFI.getObjectAlignment(FI.getIndex())); + MFI.getObjectAlign(FI.getIndex())); } /// Spill a value incoming to the statepoint. It might be either part of @@ -393,10 +381,9 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain, // slots with preferred alignments larger than frame alignment.. auto &MF = Builder.DAG.getMachineFunction(); auto PtrInfo = MachinePointerInfo::getFixedStack(MF, Index); - auto *StoreMMO = - MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, - MFI.getObjectSize(Index), - MFI.getObjectAlignment(Index)); + auto *StoreMMO = MF.getMachineMemOperand( + PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(Index), + MFI.getObjectAlign(Index)); Chain = Builder.DAG.getStore(Chain, Builder.getCurSDLoc(), Incoming, Loc, StoreMMO); @@ -412,59 +399,81 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain, /// Lower a single value incoming to a statepoint node. This value can be /// either a deopt value or a gc value, the handling is the same. We special /// case constants and allocas, then fall back to spilling if required. -static void lowerIncomingStatepointValue(SDValue Incoming, bool LiveInOnly, - SmallVectorImpl<SDValue> &Ops, - SmallVectorImpl<MachineMemOperand*> &MemRefs, - SelectionDAGBuilder &Builder) { - // Note: We know all of these spills are independent, but don't bother to - // exploit that chain wise. DAGCombine will happily do so as needed, so - // doing it here would be a small compile time win at most. - SDValue Chain = Builder.getRoot(); - - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Incoming)) { +static void +lowerIncomingStatepointValue(SDValue Incoming, bool RequireSpillSlot, + SmallVectorImpl<SDValue> &Ops, + SmallVectorImpl<MachineMemOperand *> &MemRefs, + SelectionDAGBuilder &Builder) { + + if (willLowerDirectly(Incoming)) { + if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) { + // This handles allocas as arguments to the statepoint (this is only + // really meaningful for a deopt value. For GC, we'd be trying to + // relocate the address of the alloca itself?) + assert(Incoming.getValueType() == Builder.getFrameIndexTy() && + "Incoming value is a frame index!"); + Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(), + Builder.getFrameIndexTy())); + + auto &MF = Builder.DAG.getMachineFunction(); + auto *MMO = getMachineMemOperand(MF, *FI); + MemRefs.push_back(MMO); + return; + } + + assert(Incoming.getValueType().getSizeInBits() <= 64); + + if (Incoming.isUndef()) { + // Put an easily recognized constant that's unlikely to be a valid + // value so that uses of undef by the consumer of the stackmap is + // easily recognized. This is legal since the compiler is always + // allowed to chose an arbitrary value for undef. + pushStackMapConstant(Ops, Builder, 0xFEFEFEFE); + return; + } + // If the original value was a constant, make sure it gets recorded as // such in the stackmap. This is required so that the consumer can // parse any internal format to the deopt state. It also handles null - // pointers and other constant pointers in GC states. Note the constant - // vectors do not appear to actually hit this path and that anything larger - // than an i64 value (not type!) will fail asserts here. - pushStackMapConstant(Ops, Builder, C->getSExtValue()); - } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) { - // This handles allocas as arguments to the statepoint (this is only - // really meaningful for a deopt value. For GC, we'd be trying to - // relocate the address of the alloca itself?) - assert(Incoming.getValueType() == Builder.getFrameIndexTy() && - "Incoming value is a frame index!"); - Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(), - Builder.getFrameIndexTy())); + // pointers and other constant pointers in GC states. + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Incoming)) { + pushStackMapConstant(Ops, Builder, C->getSExtValue()); + return; + } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Incoming)) { + pushStackMapConstant(Ops, Builder, + C->getValueAPF().bitcastToAPInt().getZExtValue()); + return; + } - auto &MF = Builder.DAG.getMachineFunction(); - auto *MMO = getMachineMemOperand(MF, *FI); - MemRefs.push_back(MMO); - - } else if (LiveInOnly) { + llvm_unreachable("unhandled direct lowering case"); + } + + + + if (!RequireSpillSlot) { // If this value is live in (not live-on-return, or live-through), we can // treat it the same way patchpoint treats it's "live in" values. We'll // end up folding some of these into stack references, but they'll be // handled by the register allocator. Note that we do not have the notion // of a late use so these values might be placed in registers which are - // clobbered by the call. This is fine for live-in. + // clobbered by the call. This is fine for live-in. For live-through + // fix-up pass should be executed to force spilling of such registers. Ops.push_back(Incoming); } else { - // Otherwise, locate a spill slot and explicitly spill it so it - // can be found by the runtime later. We currently do not support - // tracking values through callee saved registers to their eventual - // spill location. This would be a useful optimization, but would - // need to be optional since it requires a lot of complexity on the - // runtime side which not all would support. + // Otherwise, locate a spill slot and explicitly spill it so it can be + // found by the runtime later. Note: We know all of these spills are + // independent, but don't bother to exploit that chain wise. DAGCombine + // will happily do so as needed, so doing it here would be a small compile + // time win at most. + SDValue Chain = Builder.getRoot(); auto Res = spillIncomingStatepointValue(Incoming, Chain, Builder); Ops.push_back(std::get<0>(Res)); if (auto *MMO = std::get<2>(Res)) MemRefs.push_back(MMO); Chain = std::get<1>(Res);; + Builder.DAG.setRoot(Chain); } - Builder.DAG.setRoot(Chain); } /// Lower deopt state and gc pointer arguments of the statepoint. The actual @@ -522,8 +531,18 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, const bool LiveInDeopt = SI.StatepointFlags & (uint64_t)StatepointFlags::DeoptLiveIn; - auto isGCValue =[&](const Value *V) { - return is_contained(SI.Ptrs, V) || is_contained(SI.Bases, V); + auto isGCValue = [&](const Value *V) { + auto *Ty = V->getType(); + if (!Ty->isPtrOrPtrVectorTy()) + return false; + if (auto *GFI = Builder.GFI) + if (auto IsManaged = GFI->getStrategy().isGCManagedPointer(Ty)) + return *IsManaged; + return true; // conservative + }; + + auto requireSpillSlot = [&](const Value *V) { + return !(LiveInDeopt || UseRegistersForDeoptValues) || isGCValue(V); }; // Before we actually start lowering (and allocating spill slots for values), @@ -532,7 +551,7 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, // doesn't change semantics at all. It is important for performance that we // reserve slots for both deopt and gc values before lowering either. for (const Value *V : SI.DeoptState) { - if (!LiveInDeopt || isGCValue(V)) + if (requireSpillSlot(V)) reservePreviousStackSlotForValue(V, Builder); } for (unsigned i = 0; i < SI.Bases.size(); ++i) { @@ -559,8 +578,8 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, } if (!Incoming.getNode()) Incoming = Builder.getValue(V); - const bool LiveInValue = LiveInDeopt && !isGCValue(V); - lowerIncomingStatepointValue(Incoming, LiveInValue, Ops, MemRefs, Builder); + lowerIncomingStatepointValue(Incoming, requireSpillSlot(V), Ops, MemRefs, + Builder); } // Finally, go ahead and lower all the gc arguments. There's no prefixed @@ -570,12 +589,14 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, // (base[0], ptr[0], base[1], ptr[1], ...) for (unsigned i = 0; i < SI.Bases.size(); ++i) { const Value *Base = SI.Bases[i]; - lowerIncomingStatepointValue(Builder.getValue(Base), /*LiveInOnly*/ false, - Ops, MemRefs, Builder); + lowerIncomingStatepointValue(Builder.getValue(Base), + /*RequireSpillSlot*/ true, Ops, MemRefs, + Builder); const Value *Ptr = SI.Ptrs[i]; - lowerIncomingStatepointValue(Builder.getValue(Ptr), /*LiveInOnly*/ false, - Ops, MemRefs, Builder); + lowerIncomingStatepointValue(Builder.getValue(Ptr), + /*RequireSpillSlot*/ true, Ops, MemRefs, + Builder); } // If there are any explicit spill slots passed to the statepoint, record @@ -610,7 +631,7 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, SDValue Loc = Builder.StatepointLowering.getLocation(SDV); if (Loc.getNode()) { - SpillMap.SlotMap[V] = cast<FrameIndexSDNode>(Loc)->getIndex(); + SpillMap[V] = cast<FrameIndexSDNode>(Loc)->getIndex(); } else { // Record value as visited, but not spilled. This is case for allocas // and constants. For this values we can avoid emitting spill load while @@ -618,7 +639,7 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, // Actually we do not need to record them in this map at all. // We do this only to check that we are not relocating any unvisited // value. - SpillMap.SlotMap[V] = None; + SpillMap[V] = None; // Default llvm mechanisms for exporting values which are used in // different basic blocks does not work for gc relocates. @@ -641,24 +662,15 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( NumOfStatepoints++; // Clear state StatepointLowering.startNewStatepoint(*this); + assert(SI.Bases.size() == SI.Ptrs.size() && + SI.Ptrs.size() <= SI.GCRelocates.size()); #ifndef NDEBUG - // We schedule gc relocates before removeDuplicateGCPtrs since we _will_ - // encounter the duplicate gc relocates we elide in removeDuplicateGCPtrs. for (auto *Reloc : SI.GCRelocates) if (Reloc->getParent() == SI.StatepointInstr->getParent()) StatepointLowering.scheduleRelocCall(*Reloc); #endif - // Remove any redundant llvm::Values which map to the same SDValue as another - // input. Also has the effect of removing duplicates in the original - // llvm::Value input list as well. This is a useful optimization for - // reducing the size of the StackMap section. It has no other impact. - removeDuplicateGCPtrs(SI.Bases, SI.Ptrs, SI.GCRelocates, *this, - FuncInfo.StatepointSpillMaps[SI.StatepointInstr]); - assert(SI.Bases.size() == SI.Ptrs.size() && - SI.Ptrs.size() == SI.GCRelocates.size()); - // Lower statepoint vmstate and gcstate arguments SmallVector<SDValue, 10> LoweredMetaArgs; SmallVector<MachineMemOperand*, 16> MemRefs; @@ -830,97 +842,109 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( } void -SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP, +SelectionDAGBuilder::LowerStatepoint(const GCStatepointInst &I, const BasicBlock *EHPadBB /*= nullptr*/) { - assert(ISP.getCall()->getCallingConv() != CallingConv::AnyReg && + assert(I.getCallingConv() != CallingConv::AnyReg && "anyregcc is not supported on statepoints!"); #ifndef NDEBUG - // If this is a malformed statepoint, report it early to simplify debugging. - // This should catch any IR level mistake that's made when constructing or - // transforming statepoints. - ISP.verify(); - // Check that the associated GCStrategy expects to encounter statepoints. assert(GFI->getStrategy().useStatepoints() && "GCStrategy does not expect to encounter statepoints"); #endif SDValue ActualCallee; + SDValue Callee = getValue(I.getActualCalledOperand()); - if (ISP.getNumPatchBytes() > 0) { + if (I.getNumPatchBytes() > 0) { // If we've been asked to emit a nop sequence instead of a call instruction // for this statepoint then don't lower the call target, but use a constant - // `null` instead. Not lowering the call target lets statepoint clients get - // away without providing a physical address for the symbolic call target at - // link time. - - const auto &TLI = DAG.getTargetLoweringInfo(); - const auto &DL = DAG.getDataLayout(); - - unsigned AS = ISP.getCalledValue()->getType()->getPointerAddressSpace(); - ActualCallee = DAG.getConstant(0, getCurSDLoc(), TLI.getPointerTy(DL, AS)); + // `undef` instead. Not lowering the call target lets statepoint clients + // get away without providing a physical address for the symbolic call + // target at link time. + ActualCallee = DAG.getUNDEF(Callee.getValueType()); } else { - ActualCallee = getValue(ISP.getCalledValue()); + ActualCallee = Callee; } StatepointLoweringInfo SI(DAG); - populateCallLoweringInfo(SI.CLI, ISP.getCall(), - ImmutableStatepoint::CallArgsBeginPos, - ISP.getNumCallArgs(), ActualCallee, - ISP.getActualReturnType(), false /* IsPatchPoint */); - - for (const GCRelocateInst *Relocate : ISP.getRelocates()) { + populateCallLoweringInfo(SI.CLI, &I, GCStatepointInst::CallArgsBeginPos, + I.getNumCallArgs(), ActualCallee, + I.getActualReturnType(), false /* IsPatchPoint */); + + // There may be duplication in the gc.relocate list; such as two copies of + // each relocation on normal and exceptional path for an invoke. We only + // need to spill once and record one copy in the stackmap, but we need to + // reload once per gc.relocate. (Dedupping gc.relocates is trickier and best + // handled as a CSE problem elsewhere.) + // TODO: There a couple of major stackmap size optimizations we could do + // here if we wished. + // 1) If we've encountered a derived pair {B, D}, we don't need to actually + // record {B,B} if it's seen later. + // 2) Due to rematerialization, actual derived pointers are somewhat rare; + // given that, we could change the format to record base pointer relocations + // separately with half the space. This would require a format rev and a + // fairly major rework of the STATEPOINT node though. + SmallSet<SDValue, 8> Seen; + for (const GCRelocateInst *Relocate : I.getGCRelocates()) { SI.GCRelocates.push_back(Relocate); - SI.Bases.push_back(Relocate->getBasePtr()); - SI.Ptrs.push_back(Relocate->getDerivedPtr()); + + SDValue DerivedSD = getValue(Relocate->getDerivedPtr()); + if (Seen.insert(DerivedSD).second) { + SI.Bases.push_back(Relocate->getBasePtr()); + SI.Ptrs.push_back(Relocate->getDerivedPtr()); + } } - SI.GCArgs = ArrayRef<const Use>(ISP.gc_args_begin(), ISP.gc_args_end()); - SI.StatepointInstr = ISP.getInstruction(); - SI.GCTransitionArgs = - ArrayRef<const Use>(ISP.gc_args_begin(), ISP.gc_args_end()); - SI.ID = ISP.getID(); - SI.DeoptState = ArrayRef<const Use>(ISP.deopt_begin(), ISP.deopt_end()); - SI.StatepointFlags = ISP.getFlags(); - SI.NumPatchBytes = ISP.getNumPatchBytes(); + SI.GCArgs = ArrayRef<const Use>(I.gc_args_begin(), I.gc_args_end()); + SI.StatepointInstr = &I; + SI.ID = I.getID(); + + SI.DeoptState = ArrayRef<const Use>(I.deopt_begin(), I.deopt_end()); + SI.GCTransitionArgs = ArrayRef<const Use>(I.gc_transition_args_begin(), + I.gc_transition_args_end()); + + SI.StatepointFlags = I.getFlags(); + SI.NumPatchBytes = I.getNumPatchBytes(); SI.EHPadBB = EHPadBB; SDValue ReturnValue = LowerAsSTATEPOINT(SI); // Export the result value if needed - const GCResultInst *GCResult = ISP.getGCResult(); - Type *RetTy = ISP.getActualReturnType(); - if (!RetTy->isVoidTy() && GCResult) { - if (GCResult->getParent() != ISP.getCall()->getParent()) { - // Result value will be used in a different basic block so we need to - // export it now. Default exporting mechanism will not work here because - // statepoint call has a different type than the actual call. It means - // that by default llvm will create export register of the wrong type - // (always i32 in our case). So instead we need to create export register - // with correct type manually. - // TODO: To eliminate this problem we can remove gc.result intrinsics - // completely and make statepoint call to return a tuple. - unsigned Reg = FuncInfo.CreateRegs(RetTy); - RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), - DAG.getDataLayout(), Reg, RetTy, - ISP.getCall()->getCallingConv()); - SDValue Chain = DAG.getEntryNode(); - - RFV.getCopyToRegs(ReturnValue, DAG, getCurSDLoc(), Chain, nullptr); - PendingExports.push_back(Chain); - FuncInfo.ValueMap[ISP.getInstruction()] = Reg; - } else { - // Result value will be used in a same basic block. Don't export it or - // perform any explicit register copies. - // We'll replace the actuall call node shortly. gc_result will grab - // this value. - setValue(ISP.getInstruction(), ReturnValue); - } - } else { - // The token value is never used from here on, just generate a poison value - setValue(ISP.getInstruction(), DAG.getIntPtrConstant(-1, getCurSDLoc())); + const GCResultInst *GCResult = I.getGCResult(); + Type *RetTy = I.getActualReturnType(); + + if (RetTy->isVoidTy() || !GCResult) { + // The return value is not needed, just generate a poison value. + setValue(&I, DAG.getIntPtrConstant(-1, getCurSDLoc())); + return; + } + + if (GCResult->getParent() == I.getParent()) { + // Result value will be used in a same basic block. Don't export it or + // perform any explicit register copies. The gc_result will simply grab + // this value. + setValue(&I, ReturnValue); + return; } + + // Result value will be used in a different basic block so we need to export + // it now. Default exporting mechanism will not work here because statepoint + // call has a different type than the actual call. It means that by default + // llvm will create export register of the wrong type (always i32 in our + // case). So instead we need to create export register with correct type + // manually. + // TODO: To eliminate this problem we can remove gc.result intrinsics + // completely and make statepoint call to return a tuple. + unsigned Reg = FuncInfo.CreateRegs(RetTy); + RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), + DAG.getDataLayout(), Reg, RetTy, + I.getCallingConv()); + SDValue Chain = DAG.getEntryNode(); + + RFV.getCopyToRegs(ReturnValue, DAG, getCurSDLoc(), Chain, nullptr); + PendingExports.push_back(Chain); + FuncInfo.ValueMap[&I] = Reg; } void SelectionDAGBuilder::LowerCallSiteWithDeoptBundleImpl( @@ -966,26 +990,23 @@ void SelectionDAGBuilder::LowerCallSiteWithDeoptBundle( void SelectionDAGBuilder::visitGCResult(const GCResultInst &CI) { // The result value of the gc_result is simply the result of the actual // call. We've already emitted this, so just grab the value. - const Instruction *I = CI.getStatepoint(); - - if (I->getParent() != CI.getParent()) { - // Statepoint is in different basic block so we should have stored call - // result in a virtual register. - // We can not use default getValue() functionality to copy value from this - // register because statepoint and actual call return types can be - // different, and getValue() will use CopyFromReg of the wrong type, - // which is always i32 in our case. - PointerType *CalleeType = cast<PointerType>( - ImmutableStatepoint(I).getCalledValue()->getType()); - Type *RetTy = - cast<FunctionType>(CalleeType->getElementType())->getReturnType(); - SDValue CopyFromReg = getCopyFromRegs(I, RetTy); - - assert(CopyFromReg.getNode()); - setValue(&CI, CopyFromReg); - } else { - setValue(&CI, getValue(I)); + const GCStatepointInst *SI = CI.getStatepoint(); + + if (SI->getParent() == CI.getParent()) { + setValue(&CI, getValue(SI)); + return; } + // Statepoint is in different basic block so we should have stored call + // result in a virtual register. + // We can not use default getValue() functionality to copy value from this + // register because statepoint and actual call return types can be + // different, and getValue() will use CopyFromReg of the wrong type, + // which is always i32 in our case. + Type *RetTy = SI->getActualReturnType(); + SDValue CopyFromReg = getCopyFromRegs(SI, RetTy); + + assert(CopyFromReg.getNode()); + setValue(&CI, CopyFromReg); } void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { @@ -1005,6 +1026,13 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { const Value *DerivedPtr = Relocate.getDerivedPtr(); SDValue SD = getValue(DerivedPtr); + if (SD.isUndef() && SD.getValueType().getSizeInBits() <= 64) { + // Lowering relocate(undef) as arbitrary constant. Current constant value + // is chosen such that it's unlikely to be a valid pointer. + setValue(&Relocate, DAG.getTargetConstant(0xFEFEFEFE, SDLoc(SD), MVT::i64)); + return; + } + auto &SpillMap = FuncInfo.StatepointSpillMaps[Relocate.getStatepoint()]; auto SlotIt = SpillMap.find(DerivedPtr); assert(SlotIt != SpillMap.end() && "Relocating not lowered gc value"); @@ -1020,26 +1048,27 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { unsigned Index = *DerivedPtrLocation; SDValue SpillSlot = DAG.getTargetFrameIndex(Index, getFrameIndexTy()); - // Note: We know all of these reloads are independent, but don't bother to - // exploit that chain wise. DAGCombine will happily do so as needed, so - // doing it here would be a small compile time win at most. - SDValue Chain = getRoot(); + // All the reloads are independent and are reading memory only modified by + // statepoints (i.e. no other aliasing stores); informing SelectionDAG of + // this this let's CSE kick in for free and allows reordering of instructions + // if possible. The lowering for statepoint sets the root, so this is + // ordering all reloads with the either a) the statepoint node itself, or b) + // the entry of the current block for an invoke statepoint. + const SDValue Chain = DAG.getRoot(); // != Builder.getRoot() auto &MF = DAG.getMachineFunction(); auto &MFI = MF.getFrameInfo(); auto PtrInfo = MachinePointerInfo::getFixedStack(MF, Index); - auto *LoadMMO = - MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, - MFI.getObjectSize(Index), - MFI.getObjectAlignment(Index)); + auto *LoadMMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, + MFI.getObjectSize(Index), + MFI.getObjectAlign(Index)); auto LoadVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), Relocate.getType()); SDValue SpillLoad = DAG.getLoad(LoadVT, getCurSDLoc(), Chain, SpillSlot, LoadMMO); - - DAG.setRoot(SpillLoad.getValue(1)); + PendingLoads.push_back(SpillLoad.getValue(1)); assert(SpillLoad.getNode()); setValue(&Relocate, SpillLoad); diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h index 70507932681d0..634ef87f3840e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h @@ -15,11 +15,9 @@ #define LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/SelectionDAGNodes.h" -#include "llvm/CodeGen/ValueTypes.h" #include <cassert> namespace llvm { diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 24ab65171a179..96df20039b15d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -83,7 +83,7 @@ bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI, const CCValAssign &ArgLoc = ArgLocs[I]; if (!ArgLoc.isRegLoc()) continue; - Register Reg = ArgLoc.getLocReg(); + MCRegister Reg = ArgLoc.getLocReg(); // Only look at callee saved registers. if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg)) continue; @@ -93,7 +93,7 @@ bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI, SDValue Value = OutVals[I]; if (Value->getOpcode() != ISD::CopyFromReg) return false; - unsigned ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg(); + MCRegister ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg(); if (MRI.getLiveInPhysReg(ArgReg) != Reg) return false; } @@ -110,14 +110,18 @@ void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call, IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet); IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest); IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal); + IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated); IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca); IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned); IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf); IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError); - Alignment = Call->getParamAlignment(ArgIdx); + Alignment = Call->getParamAlign(ArgIdx); ByValType = nullptr; - if (Call->paramHasAttr(ArgIdx, Attribute::ByVal)) + if (IsByVal) ByValType = Call->getParamByValType(ArgIdx); + PreallocatedType = nullptr; + if (IsPreallocated) + PreallocatedType = Call->getParamPreallocatedType(ArgIdx); } /// Generate a libcall taking the given operands as arguments and returning a @@ -176,38 +180,24 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, return LowerCallTo(CLI); } -bool -TargetLowering::findOptimalMemOpLowering(std::vector<EVT> &MemOps, - unsigned Limit, uint64_t Size, - unsigned DstAlign, unsigned SrcAlign, - bool IsMemset, - bool ZeroMemset, - bool MemcpyStrSrc, - bool AllowOverlap, - unsigned DstAS, unsigned SrcAS, - const AttributeList &FuncAttributes) const { - // If 'SrcAlign' is zero, that means the memory operation does not need to - // load the value, i.e. memset or memcpy from constant string. Otherwise, - // it's the inferred alignment of the source. 'DstAlign', on the other hand, - // is the specified alignment of the memory operation. If it is zero, that - // means it's possible to change the alignment of the destination. - // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does - // not need to be loaded. - if (!(SrcAlign == 0 || SrcAlign >= DstAlign)) +bool TargetLowering::findOptimalMemOpLowering( + std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, + unsigned SrcAS, const AttributeList &FuncAttributes) const { + if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign()) return false; - EVT VT = getOptimalMemOpType(Size, DstAlign, SrcAlign, - IsMemset, ZeroMemset, MemcpyStrSrc, - FuncAttributes); + EVT VT = getOptimalMemOpType(Op, FuncAttributes); if (VT == MVT::Other) { // Use the largest integer type whose alignment constraints are satisfied. // We only need to check DstAlign here as SrcAlign is always greater or // equal to DstAlign (or zero). VT = MVT::i64; - while (DstAlign && DstAlign < VT.getSizeInBits() / 8 && - !allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign)) - VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1); + if (Op.isFixedDstAlign()) + while ( + Op.getDstAlign() < (VT.getSizeInBits() / 8) && + !allowsMisalignedMemoryAccesses(VT, DstAS, Op.getDstAlign().value())) + VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1); assert(VT.isInteger()); // Find the largest legal integer type. @@ -223,7 +213,8 @@ TargetLowering::findOptimalMemOpLowering(std::vector<EVT> &MemOps, } unsigned NumMemOps = 0; - while (Size != 0) { + uint64_t Size = Op.size(); + while (Size) { unsigned VTSize = VT.getSizeInBits() / 8; while (VTSize > Size) { // For now, only use non-vector load / store's for the left-over pieces. @@ -257,9 +248,10 @@ TargetLowering::findOptimalMemOpLowering(std::vector<EVT> &MemOps, // If the new VT cannot cover all of the remaining bits, then consider // issuing a (or a pair of) unaligned and overlapping load / store. bool Fast; - if (NumMemOps && AllowOverlap && NewVTSize < Size && - allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, - MachineMemOperand::MONone, &Fast) && + if (NumMemOps && Op.allowOverlap() && NewVTSize < Size && + allowsMisalignedMemoryAccesses( + VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign().value() : 0, + MachineMemOperand::MONone, &Fast) && Fast) VTSize = Size; else { @@ -491,13 +483,15 @@ TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { /// If the specified instruction has a constant integer operand and there are /// bits set in that constant that are not demanded, then clear those bits and /// return true. -bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded, +bool TargetLowering::ShrinkDemandedConstant(SDValue Op, + const APInt &DemandedBits, + const APInt &DemandedElts, TargetLoweringOpt &TLO) const { SDLoc DL(Op); unsigned Opcode = Op.getOpcode(); // Do target-specific constant optimization. - if (targetShrinkDemandedConstant(Op, Demanded, TLO)) + if (targetShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO)) return TLO.New.getNode(); // FIXME: ISD::SELECT, ISD::SELECT_CC @@ -513,12 +507,12 @@ bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded, // If this is a 'not' op, don't touch it because that's a canonical form. const APInt &C = Op1C->getAPIntValue(); - if (Opcode == ISD::XOR && Demanded.isSubsetOf(C)) + if (Opcode == ISD::XOR && DemandedBits.isSubsetOf(C)) return false; - if (!C.isSubsetOf(Demanded)) { + if (!C.isSubsetOf(DemandedBits)) { EVT VT = Op.getValueType(); - SDValue NewC = TLO.DAG.getConstant(Demanded & C, DL, VT); + SDValue NewC = TLO.DAG.getConstant(DemandedBits & C, DL, VT); SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC); return TLO.CombineTo(Op, NewOp); } @@ -530,6 +524,16 @@ bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded, return false; } +bool TargetLowering::ShrinkDemandedConstant(SDValue Op, + const APInt &DemandedBits, + TargetLoweringOpt &TLO) const { + EVT VT = Op.getValueType(); + APInt DemandedElts = VT.isVector() + ? APInt::getAllOnesValue(VT.getVectorNumElements()) + : APInt(1, 1); + return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO); +} + /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. /// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be /// generalized for targets with other types of implicit widening casts. @@ -598,6 +602,16 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, unsigned Depth, bool AssumeSingleUse) const { EVT VT = Op.getValueType(); + + // TODO: We can probably do more work on calculating the known bits and + // simplifying the operations for scalable vectors, but for now we just + // bail out. + if (VT.isScalableVector()) { + // Pretend we don't know anything for now. + Known = KnownBits(DemandedBits.getBitWidth()); + return false; + } + APInt DemandedElts = VT.isVector() ? APInt::getAllOnesValue(VT.getVectorNumElements()) : APInt(1, 1); @@ -623,15 +637,18 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( return DAG.getUNDEF(Op.getValueType()); unsigned NumElts = DemandedElts.getBitWidth(); + unsigned BitWidth = DemandedBits.getBitWidth(); KnownBits LHSKnown, RHSKnown; switch (Op.getOpcode()) { case ISD::BITCAST: { SDValue Src = peekThroughBitcasts(Op.getOperand(0)); EVT SrcVT = Src.getValueType(); EVT DstVT = Op.getValueType(); + if (SrcVT == DstVT) + return Src; + unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits(); unsigned NumDstEltBits = DstVT.getScalarSizeInBits(); - if (NumSrcEltBits == NumDstEltBits) if (SDValue V = SimplifyMultipleUseDemandedBits( Src, DemandedBits, DemandedElts, DAG, Depth + 1)) @@ -719,6 +736,21 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( return Op.getOperand(1); break; } + case ISD::SHL: { + // If we are only demanding sign bits then we can use the shift source + // directly. + if (const APInt *MaxSA = + DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) { + SDValue Op0 = Op.getOperand(0); + unsigned ShAmt = MaxSA->getZExtValue(); + unsigned NumSignBits = + DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1); + unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros(); + if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits)) + return Op0; + } + break; + } case ISD::SETCC: { SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); @@ -727,7 +759,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( // width as the setcc result, and (3) the result of a setcc conforms to 0 or // -1, we may be able to bypass the setcc. if (DemandedBits.isSignMask() && - Op0.getScalarValueSizeInBits() == DemandedBits.getBitWidth() && + Op0.getScalarValueSizeInBits() == BitWidth && getBooleanContents(Op0.getValueType()) == BooleanContent::ZeroOrNegativeOneBooleanContent) { // If we're testing X < 0, then this compare isn't needed - just use X! @@ -742,9 +774,30 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( } case ISD::SIGN_EXTEND_INREG: { // If none of the extended bits are demanded, eliminate the sextinreg. + SDValue Op0 = Op.getOperand(0); EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); - if (DemandedBits.getActiveBits() <= ExVT.getScalarSizeInBits()) - return Op.getOperand(0); + unsigned ExBits = ExVT.getScalarSizeInBits(); + if (DemandedBits.getActiveBits() <= ExBits) + return Op0; + // If the input is already sign extended, just drop the extension. + unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1); + if (NumSignBits >= (BitWidth - ExBits + 1)) + return Op0; + break; + } + case ISD::ANY_EXTEND_VECTOR_INREG: + case ISD::SIGN_EXTEND_VECTOR_INREG: + case ISD::ZERO_EXTEND_VECTOR_INREG: { + // If we only want the lowest element and none of extended bits, then we can + // return the bitcasted source vector. + SDValue Src = Op.getOperand(0); + EVT SrcVT = Src.getValueType(); + EVT DstVT = Op.getValueType(); + if (DemandedElts == 1 && DstVT.getSizeInBits() == SrcVT.getSizeInBits() && + DAG.getDataLayout().isLittleEndian() && + DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) { + return DAG.getBitcast(DstVT, Src); + } break; } case ISD::INSERT_VECTOR_ELT: { @@ -757,6 +810,16 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( return Vec; break; } + case ISD::INSERT_SUBVECTOR: { + // If we don't demand the inserted subvector, return the base vector. + SDValue Vec = Op.getOperand(0); + SDValue Sub = Op.getOperand(1); + uint64_t Idx = Op.getConstantOperandVal(2); + unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); + if (DemandedElts.extractBits(NumSubElts, Idx) == 0) + return Vec; + break; + } case ISD::VECTOR_SHUFFLE: { ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask(); @@ -790,6 +853,25 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( return SDValue(); } +SDValue TargetLowering::SimplifyMultipleUseDemandedBits( + SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG, + unsigned Depth) const { + EVT VT = Op.getValueType(); + APInt DemandedElts = VT.isVector() + ? APInt::getAllOnesValue(VT.getVectorNumElements()) + : APInt(1, 1); + return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG, + Depth); +} + +SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts( + SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG, + unsigned Depth) const { + APInt DemandedBits = APInt::getAllOnesValue(Op.getScalarValueSizeInBits()); + return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG, + Depth); +} + /// Look at Op. At this point, we know that only the OriginalDemandedBits of the /// result of Op are ever used downstream. If we can use this information to /// simplify Op, create a new simplified DAG node and return true, returning the @@ -805,6 +887,15 @@ bool TargetLowering::SimplifyDemandedBits( assert(Op.getScalarValueSizeInBits() == BitWidth && "Mask size mismatches value type size!"); + // Don't know anything. + Known = KnownBits(BitWidth); + + // TODO: We can probably do more work on calculating the known bits and + // simplifying the operations for scalable vectors, but for now we just + // bail out. + if (Op.getValueType().isScalableVector()) + return false; + unsigned NumElts = OriginalDemandedElts.getBitWidth(); assert((!Op.getValueType().isVector() || NumElts == Op.getValueType().getVectorNumElements()) && @@ -815,9 +906,6 @@ bool TargetLowering::SimplifyDemandedBits( SDLoc dl(Op); auto &DL = TLO.DAG.getDataLayout(); - // Don't know anything. - Known = KnownBits(BitWidth); - // Undef operand. if (Op.isUndef()) return false; @@ -850,7 +938,7 @@ bool TargetLowering::SimplifyDemandedBits( return false; } - KnownBits Known2, KnownOut; + KnownBits Known2; switch (Op.getOpcode()) { case ISD::TargetConstant: llvm_unreachable("Can't simplify this node"); @@ -864,7 +952,11 @@ bool TargetLowering::SimplifyDemandedBits( APInt SrcDemandedBits = DemandedBits.zextOrSelf(SrcBitWidth); if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1)) return true; - Known = SrcKnown.zextOrTrunc(BitWidth, false); + + // Upper elements are undef, so only get the knownbits if we just demand + // the bottom element. + if (DemandedElts == 1) + Known = SrcKnown.anyextOrTrunc(BitWidth); break; } case ISD::BUILD_VECTOR: @@ -877,6 +969,12 @@ bool TargetLowering::SimplifyDemandedBits( if (getTargetConstantFromLoad(LD)) { Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth); return false; // Don't fall through, will infinitely loop. + } else if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) { + // If this is a ZEXTLoad and we are looking at the loaded value. + EVT MemVT = LD->getMemoryVT(); + unsigned MemBits = MemVT.getScalarSizeInBits(); + Known.Zero.setBitsFrom(MemBits); + return false; // Don't fall through, will infinitely loop. } break; } @@ -904,7 +1002,7 @@ bool TargetLowering::SimplifyDemandedBits( if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1)) return true; - Known = KnownScl.zextOrTrunc(BitWidth, false); + Known = KnownScl.anyextOrTrunc(BitWidth); KnownBits KnownVec; if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO, @@ -919,57 +1017,75 @@ bool TargetLowering::SimplifyDemandedBits( return false; } case ISD::INSERT_SUBVECTOR: { - SDValue Base = Op.getOperand(0); + // Demand any elements from the subvector and the remainder from the src its + // inserted into. + SDValue Src = Op.getOperand(0); SDValue Sub = Op.getOperand(1); - EVT SubVT = Sub.getValueType(); - unsigned NumSubElts = SubVT.getVectorNumElements(); - - // If index isn't constant, assume we need the original demanded base - // elements and ALL the inserted subvector elements. - APInt BaseElts = DemandedElts; - APInt SubElts = APInt::getAllOnesValue(NumSubElts); - if (isa<ConstantSDNode>(Op.getOperand(2))) { - const APInt &Idx = Op.getConstantOperandAPInt(2); - if (Idx.ule(NumElts - NumSubElts)) { - unsigned SubIdx = Idx.getZExtValue(); - SubElts = DemandedElts.extractBits(NumSubElts, SubIdx); - BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx); - } - } - - KnownBits KnownSub, KnownBase; - if (SimplifyDemandedBits(Sub, DemandedBits, SubElts, KnownSub, TLO, + uint64_t Idx = Op.getConstantOperandVal(2); + unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); + APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); + APInt DemandedSrcElts = DemandedElts; + DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx); + + KnownBits KnownSub, KnownSrc; + if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO, Depth + 1)) return true; - if (SimplifyDemandedBits(Base, DemandedBits, BaseElts, KnownBase, TLO, + if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, KnownSrc, TLO, Depth + 1)) return true; Known.Zero.setAllBits(); Known.One.setAllBits(); - if (!!SubElts) { - Known.One &= KnownSub.One; - Known.Zero &= KnownSub.Zero; + if (!!DemandedSubElts) { + Known.One &= KnownSub.One; + Known.Zero &= KnownSub.Zero; } - if (!!BaseElts) { - Known.One &= KnownBase.One; - Known.Zero &= KnownBase.Zero; + if (!!DemandedSrcElts) { + Known.One &= KnownSrc.One; + Known.Zero &= KnownSrc.Zero; + } + + // Attempt to avoid multi-use src if we don't need anything from it. + if (!DemandedBits.isAllOnesValue() || !DemandedSubElts.isAllOnesValue() || + !DemandedSrcElts.isAllOnesValue()) { + SDValue NewSub = SimplifyMultipleUseDemandedBits( + Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1); + SDValue NewSrc = SimplifyMultipleUseDemandedBits( + Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1); + if (NewSub || NewSrc) { + NewSub = NewSub ? NewSub : Sub; + NewSrc = NewSrc ? NewSrc : Src; + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc, NewSub, + Op.getOperand(2)); + return TLO.CombineTo(Op, NewOp); + } } break; } case ISD::EXTRACT_SUBVECTOR: { - // If index isn't constant, assume we need all the source vector elements. + // Offset the demanded elts by the subvector index. SDValue Src = Op.getOperand(0); - ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1)); + if (Src.getValueType().isScalableVector()) + break; + uint64_t Idx = Op.getConstantOperandVal(1); unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); - APInt SrcElts = APInt::getAllOnesValue(NumSrcElts); - if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) { - // Offset the demanded elts by the subvector index. - uint64_t Idx = SubIdx->getZExtValue(); - SrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); - } - if (SimplifyDemandedBits(Src, DemandedBits, SrcElts, Known, TLO, Depth + 1)) + APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); + + if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO, + Depth + 1)) return true; + + // Attempt to avoid multi-use src if we don't need anything from it. + if (!DemandedBits.isAllOnesValue() || !DemandedSrcElts.isAllOnesValue()) { + SDValue DemandedSrc = SimplifyMultipleUseDemandedBits( + Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1); + if (DemandedSrc) { + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, + Op.getOperand(1)); + return TLO.CombineTo(Op, NewOp); + } + } break; } case ISD::CONCAT_VECTORS: { @@ -1069,7 +1185,8 @@ bool TargetLowering::SimplifyDemandedBits( // If any of the set bits in the RHS are known zero on the LHS, shrink // the constant. - if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits, TLO)) + if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & DemandedBits, + DemandedElts, TLO)) return true; // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its @@ -1117,16 +1234,14 @@ bool TargetLowering::SimplifyDemandedBits( if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero)) return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT)); // If the RHS is a constant, see if we can simplify it. - if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, TLO)) + if (ShrinkDemandedConstant(Op, ~Known2.Zero & DemandedBits, DemandedElts, + TLO)) return true; // If the operation can be done in a smaller type, do so. if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) return true; - // Output known-1 bits are only known if set in both the LHS & RHS. - Known.One &= Known2.One; - // Output known-0 are known to be clear if zero in either the LHS | RHS. - Known.Zero |= Known2.Zero; + Known &= Known2; break; } case ISD::OR: { @@ -1163,16 +1278,13 @@ bool TargetLowering::SimplifyDemandedBits( if (DemandedBits.isSubsetOf(Known.One | Known2.Zero)) return TLO.CombineTo(Op, Op1); // If the RHS is a constant, see if we can simplify it. - if (ShrinkDemandedConstant(Op, DemandedBits, TLO)) + if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO)) return true; // If the operation can be done in a smaller type, do so. if (ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) return true; - // Output known-0 bits are only known if clear in both the LHS & RHS. - Known.Zero &= Known2.Zero; - // Output known-1 are known to be set if set in either the LHS | RHS. - Known.One |= Known2.One; + Known |= Known2; break; } case ISD::XOR: { @@ -1218,12 +1330,8 @@ bool TargetLowering::SimplifyDemandedBits( if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero)) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1)); - // Output known-0 bits are known if clear or set in both the LHS & RHS. - KnownOut.Zero = (Known.Zero & Known2.Zero) | (Known.One & Known2.One); - // Output known-1 are known to be set if set in only one of the LHS, RHS. - KnownOut.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero); - - if (ConstantSDNode *C = isConstOrConstSplat(Op1)) { + ConstantSDNode* C = isConstOrConstSplat(Op1, DemandedElts); + if (C) { // If one side is a constant, and all of the known set bits on the other // side are also set in the constant, turn this into an AND, as we know // the bits will be cleared. @@ -1238,19 +1346,20 @@ bool TargetLowering::SimplifyDemandedBits( // If the RHS is a constant, see if we can change it. Don't alter a -1 // constant because that's a 'not' op, and that is better for combining // and codegen. - if (!C->isAllOnesValue()) { - if (DemandedBits.isSubsetOf(C->getAPIntValue())) { - // We're flipping all demanded bits. Flip the undemanded bits too. - SDValue New = TLO.DAG.getNOT(dl, Op0, VT); - return TLO.CombineTo(Op, New); - } - // If we can't turn this into a 'not', try to shrink the constant. - if (ShrinkDemandedConstant(Op, DemandedBits, TLO)) - return true; + if (!C->isAllOnesValue() && + DemandedBits.isSubsetOf(C->getAPIntValue())) { + // We're flipping all demanded bits. Flip the undemanded bits too. + SDValue New = TLO.DAG.getNOT(dl, Op0, VT); + return TLO.CombineTo(Op, New); } } - Known = std::move(KnownOut); + // If we can't turn this into a 'not', try to shrink the constant. + if (!C || !C->isAllOnesValue()) + if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO)) + return true; + + Known ^= Known2; break; } case ISD::SELECT: @@ -1264,7 +1373,7 @@ bool TargetLowering::SimplifyDemandedBits( assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); // If the operands are constants, see if we can simplify them. - if (ShrinkDemandedConstant(Op, DemandedBits, TLO)) + if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO)) return true; // Only known if known in both the LHS and RHS. @@ -1282,7 +1391,7 @@ bool TargetLowering::SimplifyDemandedBits( assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); // If the operands are constants, see if we can simplify them. - if (ShrinkDemandedConstant(Op, DemandedBits, TLO)) + if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO)) return true; // Only known if known in both the LHS and RHS. @@ -1320,12 +1429,10 @@ bool TargetLowering::SimplifyDemandedBits( case ISD::SHL: { SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); + EVT ShiftVT = Op1.getValueType(); - if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) { - // If the shift count is an invalid immediate, don't do anything. - if (SA->getAPIntValue().uge(BitWidth)) - break; - + if (const APInt *SA = + TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) { unsigned ShAmt = SA->getZExtValue(); if (ShAmt == 0) return TLO.CombineTo(Op, Op0); @@ -1336,37 +1443,25 @@ bool TargetLowering::SimplifyDemandedBits( // TODO - support non-uniform vector amounts. if (Op0.getOpcode() == ISD::SRL) { if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) { - if (ConstantSDNode *SA2 = - isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) { - if (SA2->getAPIntValue().ult(BitWidth)) { - unsigned C1 = SA2->getZExtValue(); - unsigned Opc = ISD::SHL; - int Diff = ShAmt - C1; - if (Diff < 0) { - Diff = -Diff; - Opc = ISD::SRL; - } - - SDValue NewSA = TLO.DAG.getConstant(Diff, dl, Op1.getValueType()); - return TLO.CombineTo( - Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA)); + if (const APInt *SA2 = + TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) { + unsigned C1 = SA2->getZExtValue(); + unsigned Opc = ISD::SHL; + int Diff = ShAmt - C1; + if (Diff < 0) { + Diff = -Diff; + Opc = ISD::SRL; } + SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT); + return TLO.CombineTo( + Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA)); } } } - if (SimplifyDemandedBits(Op0, DemandedBits.lshr(ShAmt), DemandedElts, - Known, TLO, Depth + 1)) - return true; - - // Try shrinking the operation as long as the shift amount will still be - // in range. - if ((ShAmt < DemandedBits.getActiveBits()) && - ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) - return true; - // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits // are not demanded. This will likely allow the anyext to be folded away. + // TODO - support non-uniform vector amounts. if (Op0.getOpcode() == ISD::ANY_EXTEND) { SDValue InnerOp = Op0.getOperand(0); EVT InnerVT = InnerOp.getValueType(); @@ -1382,22 +1477,24 @@ bool TargetLowering::SimplifyDemandedBits( return TLO.CombineTo( Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl)); } + // Repeat the SHL optimization above in cases where an extension // intervenes: (shl (anyext (shr x, c1)), c2) to // (shl (anyext x), c2-c1). This requires that the bottom c1 bits // aren't demanded (as above) and that the shifted upper c1 bits of // x aren't demanded. + // TODO - support non-uniform vector amounts. if (Op0.hasOneUse() && InnerOp.getOpcode() == ISD::SRL && InnerOp.hasOneUse()) { - if (ConstantSDNode *SA2 = - isConstOrConstSplat(InnerOp.getOperand(1))) { - unsigned InnerShAmt = SA2->getLimitedValue(InnerBits); + if (const APInt *SA2 = + TLO.DAG.getValidShiftAmountConstant(InnerOp, DemandedElts)) { + unsigned InnerShAmt = SA2->getZExtValue(); if (InnerShAmt < ShAmt && InnerShAmt < InnerBits && DemandedBits.getActiveBits() <= (InnerBits - InnerShAmt + ShAmt) && DemandedBits.countTrailingZeros() >= ShAmt) { - SDValue NewSA = TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, - Op1.getValueType()); + SDValue NewSA = + TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, ShiftVT); SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, InnerOp.getOperand(0)); return TLO.CombineTo( @@ -1407,60 +1504,76 @@ bool TargetLowering::SimplifyDemandedBits( } } + APInt InDemandedMask = DemandedBits.lshr(ShAmt); + if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO, + Depth + 1)) + return true; + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); Known.Zero <<= ShAmt; Known.One <<= ShAmt; // low bits known zero. Known.Zero.setLowBits(ShAmt); + + // Try shrinking the operation as long as the shift amount will still be + // in range. + if ((ShAmt < DemandedBits.getActiveBits()) && + ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) + return true; + } + + // If we are only demanding sign bits then we can use the shift source + // directly. + if (const APInt *MaxSA = + TLO.DAG.getValidMaximumShiftAmountConstant(Op, DemandedElts)) { + unsigned ShAmt = MaxSA->getZExtValue(); + unsigned NumSignBits = + TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1); + unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros(); + if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= (UpperDemandedBits)) + return TLO.CombineTo(Op, Op0); } break; } case ISD::SRL: { SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); + EVT ShiftVT = Op1.getValueType(); - if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) { - // If the shift count is an invalid immediate, don't do anything. - if (SA->getAPIntValue().uge(BitWidth)) - break; - + if (const APInt *SA = + TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) { unsigned ShAmt = SA->getZExtValue(); if (ShAmt == 0) return TLO.CombineTo(Op, Op0); - EVT ShiftVT = Op1.getValueType(); - APInt InDemandedMask = (DemandedBits << ShAmt); - - // If the shift is exact, then it does demand the low bits (and knows that - // they are zero). - if (Op->getFlags().hasExact()) - InDemandedMask.setLowBits(ShAmt); - // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a // single shift. We can do this if the top bits (which are shifted out) // are never demanded. // TODO - support non-uniform vector amounts. if (Op0.getOpcode() == ISD::SHL) { - if (ConstantSDNode *SA2 = - isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) { - if (!DemandedBits.intersects( - APInt::getHighBitsSet(BitWidth, ShAmt))) { - if (SA2->getAPIntValue().ult(BitWidth)) { - unsigned C1 = SA2->getZExtValue(); - unsigned Opc = ISD::SRL; - int Diff = ShAmt - C1; - if (Diff < 0) { - Diff = -Diff; - Opc = ISD::SHL; - } - - SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT); - return TLO.CombineTo( - Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA)); + if (!DemandedBits.intersects(APInt::getHighBitsSet(BitWidth, ShAmt))) { + if (const APInt *SA2 = + TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) { + unsigned C1 = SA2->getZExtValue(); + unsigned Opc = ISD::SRL; + int Diff = ShAmt - C1; + if (Diff < 0) { + Diff = -Diff; + Opc = ISD::SHL; } + SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT); + return TLO.CombineTo( + Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA)); } } } + APInt InDemandedMask = (DemandedBits << ShAmt); + + // If the shift is exact, then it does demand the low bits (and knows that + // they are zero). + if (Op->getFlags().hasExact()) + InDemandedMask.setLowBits(ShAmt); + // Compute the new bits that are at the top now. if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO, Depth + 1)) @@ -1468,14 +1581,22 @@ bool TargetLowering::SimplifyDemandedBits( assert(!Known.hasConflict() && "Bits known to be one AND zero?"); Known.Zero.lshrInPlace(ShAmt); Known.One.lshrInPlace(ShAmt); - - Known.Zero.setHighBits(ShAmt); // High bits known zero. + // High bits known zero. + Known.Zero.setHighBits(ShAmt); } break; } case ISD::SRA: { SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); + EVT ShiftVT = Op1.getValueType(); + + // If we only want bits that already match the signbit then we don't need + // to shift. + unsigned NumHiDemandedBits = BitWidth - DemandedBits.countTrailingZeros(); + if (TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1) >= + NumHiDemandedBits) + return TLO.CombineTo(Op, Op0); // If this is an arithmetic shift right and only the low-bit is set, we can // always convert this into a logical shr, even if the shift amount is @@ -1484,11 +1605,8 @@ bool TargetLowering::SimplifyDemandedBits( if (DemandedBits.isOneValue()) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1)); - if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) { - // If the shift count is an invalid immediate, don't do anything. - if (SA->getAPIntValue().uge(BitWidth)) - break; - + if (const APInt *SA = + TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) { unsigned ShAmt = SA->getZExtValue(); if (ShAmt == 0) return TLO.CombineTo(Op, Op0); @@ -1525,14 +1643,23 @@ bool TargetLowering::SimplifyDemandedBits( int Log2 = DemandedBits.exactLogBase2(); if (Log2 >= 0) { // The bit must come from the sign. - SDValue NewSA = - TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, Op1.getValueType()); + SDValue NewSA = TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, ShiftVT); return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, NewSA)); } if (Known.One[BitWidth - ShAmt - 1]) // New bits are known one. Known.One.setHighBits(ShAmt); + + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!InDemandedMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { + SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( + Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1); + if (DemandedOp0) { + SDValue NewOp = TLO.DAG.getNode(ISD::SRA, dl, VT, DemandedOp0, Op1); + return TLO.CombineTo(Op, NewOp); + } + } } break; } @@ -1573,6 +1700,32 @@ bool TargetLowering::SimplifyDemandedBits( Known.One |= Known2.One; Known.Zero |= Known2.Zero; } + + // For pow-2 bitwidths we only demand the bottom modulo amt bits. + if (isPowerOf2_32(BitWidth)) { + APInt DemandedAmtBits(Op2.getScalarValueSizeInBits(), BitWidth - 1); + if (SimplifyDemandedBits(Op2, DemandedAmtBits, DemandedElts, + Known2, TLO, Depth + 1)) + return true; + } + break; + } + case ISD::ROTL: + case ISD::ROTR: { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + + // If we're rotating an 0/-1 value, then it stays an 0/-1 value. + if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1)) + return TLO.CombineTo(Op, Op0); + + // For pow-2 bitwidths we only demand the bottom modulo amt bits. + if (isPowerOf2_32(BitWidth)) { + APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1); + if (SimplifyDemandedBits(Op1, DemandedAmtBits, DemandedElts, Known2, TLO, + Depth + 1)) + return true; + } break; } case ISD::BITREVERSE: { @@ -1602,7 +1755,8 @@ bool TargetLowering::SimplifyDemandedBits( // If we only care about the highest bit, don't bother shifting right. if (DemandedBits.isSignMask()) { - unsigned NumSignBits = TLO.DAG.ComputeNumSignBits(Op0); + unsigned NumSignBits = + TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1); bool AlreadySignExtended = NumSignBits >= BitWidth - ExVTBits + 1; // However if the input is already sign extended we expect the sign // extension to be dropped altogether later and do not simplify. @@ -1639,8 +1793,7 @@ bool TargetLowering::SimplifyDemandedBits( // If the input sign bit is known zero, convert this into a zero extension. if (Known.Zero[ExVTBits - 1]) - return TLO.CombineTo( - Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT.getScalarType())); + return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(Op0, dl, ExVT)); APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits); if (Known.One[ExVTBits - 1]) { // Input sign bit known set @@ -1704,7 +1857,7 @@ bool TargetLowering::SimplifyDemandedBits( return true; assert(!Known.hasConflict() && "Bits known to be one AND zero?"); assert(Known.getBitWidth() == InBits && "Src width has changed?"); - Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */); + Known = Known.zext(BitWidth); break; } case ISD::SIGN_EXTEND: @@ -1777,7 +1930,12 @@ bool TargetLowering::SimplifyDemandedBits( return true; assert(!Known.hasConflict() && "Bits known to be one AND zero?"); assert(Known.getBitWidth() == InBits && "Src width has changed?"); - Known = Known.zext(BitWidth, false /* => any extend */); + Known = Known.anyext(BitWidth); + + // Attempt to avoid multi-use ops if we don't need anything from them. + if (SDValue NewSrc = SimplifyMultipleUseDemandedBits( + Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1)) + return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc)); break; } case ISD::TRUNCATE: { @@ -1886,7 +2044,7 @@ bool TargetLowering::SimplifyDemandedBits( Known = Known2; if (BitWidth > EltBitWidth) - Known = Known.zext(BitWidth, false /* => any extend */); + Known = Known.anyext(BitWidth); break; } case ISD::BITCAST: { @@ -2151,14 +2309,20 @@ bool TargetLowering::SimplifyDemandedVectorElts( APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth, bool AssumeSingleUse) const { EVT VT = Op.getValueType(); + unsigned Opcode = Op.getOpcode(); APInt DemandedElts = OriginalDemandedElts; unsigned NumElts = DemandedElts.getBitWidth(); assert(VT.isVector() && "Expected vector op"); - assert(VT.getVectorNumElements() == NumElts && - "Mask size mismatches value type element count!"); KnownUndef = KnownZero = APInt::getNullValue(NumElts); + // TODO: For now we assume we know nothing about scalable vectors. + if (VT.isScalableVector()) + return false; + + assert(VT.getVectorNumElements() == NumElts && + "Mask size mismatches value type element count!"); + // Undef operand. if (Op.isUndef()) { KnownUndef.setAllBits(); @@ -2182,7 +2346,22 @@ bool TargetLowering::SimplifyDemandedVectorElts( SDLoc DL(Op); unsigned EltSizeInBits = VT.getScalarSizeInBits(); - switch (Op.getOpcode()) { + // Helper for demanding the specified elements and all the bits of both binary + // operands. + auto SimplifyDemandedVectorEltsBinOp = [&](SDValue Op0, SDValue Op1) { + SDValue NewOp0 = SimplifyMultipleUseDemandedVectorElts(Op0, DemandedElts, + TLO.DAG, Depth + 1); + SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts, + TLO.DAG, Depth + 1); + if (NewOp0 || NewOp1) { + SDValue NewOp = TLO.DAG.getNode( + Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0, NewOp1 ? NewOp1 : Op1); + return TLO.CombineTo(Op, NewOp); + } + return false; + }; + + switch (Opcode) { case ISD::SCALAR_TO_VECTOR: { if (!DemandedElts[0]) { KnownUndef.setAllBits(); @@ -2234,7 +2413,8 @@ bool TargetLowering::SimplifyDemandedVectorElts( } KnownBits Known; - if (SimplifyDemandedBits(Src, SrcDemandedBits, Known, TLO, Depth + 1)) + if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known, + TLO, Depth + 1)) return true; } @@ -2323,53 +2503,75 @@ bool TargetLowering::SimplifyDemandedVectorElts( break; } case ISD::INSERT_SUBVECTOR: { - if (!isa<ConstantSDNode>(Op.getOperand(2))) - break; - SDValue Base = Op.getOperand(0); + // Demand any elements from the subvector and the remainder from the src its + // inserted into. + SDValue Src = Op.getOperand(0); SDValue Sub = Op.getOperand(1); - EVT SubVT = Sub.getValueType(); - unsigned NumSubElts = SubVT.getVectorNumElements(); - const APInt &Idx = Op.getConstantOperandAPInt(2); - if (Idx.ugt(NumElts - NumSubElts)) - break; - unsigned SubIdx = Idx.getZExtValue(); - APInt SubElts = DemandedElts.extractBits(NumSubElts, SubIdx); + uint64_t Idx = Op.getConstantOperandVal(2); + unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); + APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); + APInt DemandedSrcElts = DemandedElts; + DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx); + APInt SubUndef, SubZero; - if (SimplifyDemandedVectorElts(Sub, SubElts, SubUndef, SubZero, TLO, + if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO, Depth + 1)) return true; - APInt BaseElts = DemandedElts; - BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx); - - // If none of the base operand elements are demanded, replace it with undef. - if (!BaseElts && !Base.isUndef()) - return TLO.CombineTo(Op, - TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, - TLO.DAG.getUNDEF(VT), - Op.getOperand(1), - Op.getOperand(2))); - - if (SimplifyDemandedVectorElts(Base, BaseElts, KnownUndef, KnownZero, TLO, - Depth + 1)) + + // If none of the src operand elements are demanded, replace it with undef. + if (!DemandedSrcElts && !Src.isUndef()) + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, + TLO.DAG.getUNDEF(VT), Sub, + Op.getOperand(2))); + + if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownUndef, KnownZero, + TLO, Depth + 1)) return true; - KnownUndef.insertBits(SubUndef, SubIdx); - KnownZero.insertBits(SubZero, SubIdx); + KnownUndef.insertBits(SubUndef, Idx); + KnownZero.insertBits(SubZero, Idx); + + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!DemandedSrcElts.isAllOnesValue() || + !DemandedSubElts.isAllOnesValue()) { + SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts( + Src, DemandedSrcElts, TLO.DAG, Depth + 1); + SDValue NewSub = SimplifyMultipleUseDemandedVectorElts( + Sub, DemandedSubElts, TLO.DAG, Depth + 1); + if (NewSrc || NewSub) { + NewSrc = NewSrc ? NewSrc : Src; + NewSub = NewSub ? NewSub : Sub; + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc, + NewSub, Op.getOperand(2)); + return TLO.CombineTo(Op, NewOp); + } + } break; } case ISD::EXTRACT_SUBVECTOR: { + // Offset the demanded elts by the subvector index. SDValue Src = Op.getOperand(0); - ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1)); + if (Src.getValueType().isScalableVector()) + break; + uint64_t Idx = Op.getConstantOperandVal(1); unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); - if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) { - // Offset the demanded elts by the subvector index. - uint64_t Idx = SubIdx->getZExtValue(); - APInt SrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); - APInt SrcUndef, SrcZero; - if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO, - Depth + 1)) - return true; - KnownUndef = SrcUndef.extractBits(NumElts, Idx); - KnownZero = SrcZero.extractBits(NumElts, Idx); + APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); + + APInt SrcUndef, SrcZero; + if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO, + Depth + 1)) + return true; + KnownUndef = SrcUndef.extractBits(NumElts, Idx); + KnownZero = SrcZero.extractBits(NumElts, Idx); + + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!DemandedElts.isAllOnesValue()) { + SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts( + Src, DemandedSrcElts, TLO.DAG, Depth + 1); + if (NewSrc) { + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, NewSrc, + Op.getOperand(1)); + return TLO.CombineTo(Op, NewOp); + } } break; } @@ -2538,7 +2740,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( break; } - // TODO: There are more binop opcodes that could be handled here - MUL, MIN, + // TODO: There are more binop opcodes that could be handled here - MIN, // MAX, saturated math, etc. case ISD::OR: case ISD::XOR: @@ -2549,17 +2751,26 @@ bool TargetLowering::SimplifyDemandedVectorElts( case ISD::FMUL: case ISD::FDIV: case ISD::FREM: { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + APInt UndefRHS, ZeroRHS; - if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, UndefRHS, - ZeroRHS, TLO, Depth + 1)) + if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO, + Depth + 1)) return true; APInt UndefLHS, ZeroLHS; - if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UndefLHS, - ZeroLHS, TLO, Depth + 1)) + if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO, + Depth + 1)) return true; KnownZero = ZeroLHS & ZeroRHS; KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS); + + // Attempt to avoid multi-use ops if we don't need anything from them. + // TODO - use KnownUndef to relax the demandedelts? + if (!DemandedElts.isAllOnesValue()) + if (SimplifyDemandedVectorEltsBinOp(Op0, Op1)) + return true; break; } case ISD::SHL: @@ -2567,27 +2778,39 @@ bool TargetLowering::SimplifyDemandedVectorElts( case ISD::SRA: case ISD::ROTL: case ISD::ROTR: { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + APInt UndefRHS, ZeroRHS; - if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, UndefRHS, - ZeroRHS, TLO, Depth + 1)) + if (SimplifyDemandedVectorElts(Op1, DemandedElts, UndefRHS, ZeroRHS, TLO, + Depth + 1)) return true; APInt UndefLHS, ZeroLHS; - if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UndefLHS, - ZeroLHS, TLO, Depth + 1)) + if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO, + Depth + 1)) return true; KnownZero = ZeroLHS; KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop? + + // Attempt to avoid multi-use ops if we don't need anything from them. + // TODO - use KnownUndef to relax the demandedelts? + if (!DemandedElts.isAllOnesValue()) + if (SimplifyDemandedVectorEltsBinOp(Op0, Op1)) + return true; break; } case ISD::MUL: case ISD::AND: { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + APInt SrcUndef, SrcZero; - if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef, - SrcZero, TLO, Depth + 1)) + if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO, + Depth + 1)) return true; - if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef, - KnownZero, TLO, Depth + 1)) + if (SimplifyDemandedVectorElts(Op0, DemandedElts, KnownUndef, KnownZero, + TLO, Depth + 1)) return true; // If either side has a zero element, then the result element is zero, even @@ -2597,6 +2820,12 @@ bool TargetLowering::SimplifyDemandedVectorElts( KnownZero |= SrcZero; KnownUndef &= SrcUndef; KnownUndef &= ~KnownZero; + + // Attempt to avoid multi-use ops if we don't need anything from them. + // TODO - use KnownUndef to relax the demandedelts? + if (!DemandedElts.isAllOnesValue()) + if (SimplifyDemandedVectorEltsBinOp(Op0, Op1)) + return true; break; } case ISD::TRUNCATE: @@ -2661,17 +2890,16 @@ void TargetLowering::computeKnownBitsForTargetInstr( Known.resetAll(); } -void TargetLowering::computeKnownBitsForFrameIndex(const SDValue Op, - KnownBits &Known, - const APInt &DemandedElts, - const SelectionDAG &DAG, - unsigned Depth) const { - assert(isa<FrameIndexSDNode>(Op) && "expected FrameIndex"); +void TargetLowering::computeKnownBitsForFrameIndex( + const int FrameIdx, KnownBits &Known, const MachineFunction &MF) const { + // The low bits are known zero if the pointer is aligned. + Known.Zero.setLowBits(Log2(MF.getFrameInfo().getObjectAlign(FrameIdx))); +} - if (unsigned Align = DAG.InferPtrAlignment(Op)) { - // The low bits are known zero if the pointer is aligned. - Known.Zero.setLowBits(Log2_32(Align)); - } +Align TargetLowering::computeKnownAlignForTargetInstr( + GISelKnownBits &Analysis, Register R, const MachineRegisterInfo &MRI, + unsigned Depth) const { + return Align(1); } /// This method can be implemented by targets that want to expose additional @@ -2689,6 +2917,12 @@ unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, return 1; } +unsigned TargetLowering::computeNumSignBitsForTargetInstr( + GISelKnownBits &Analysis, Register R, const APInt &DemandedElts, + const MachineRegisterInfo &MRI, unsigned Depth) const { + return 1; +} + bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode( SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth) const { @@ -3788,33 +4022,18 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // the comparison operands is infinity or negative infinity, convert the // condition to a less-awkward <= or >=. if (CFP->getValueAPF().isInfinity()) { - if (CFP->getValueAPF().isNegative()) { - if (Cond == ISD::SETOEQ && - isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType())) - return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLE); - if (Cond == ISD::SETUEQ && - isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType())) - return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULE); - if (Cond == ISD::SETUNE && - isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType())) - return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGT); - if (Cond == ISD::SETONE && - isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType())) - return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGT); - } else { - if (Cond == ISD::SETOEQ && - isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType())) - return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGE); - if (Cond == ISD::SETUEQ && - isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType())) - return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGE); - if (Cond == ISD::SETUNE && - isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType())) - return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULT); - if (Cond == ISD::SETONE && - isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType())) - return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLT); + bool IsNegInf = CFP->getValueAPF().isNegative(); + ISD::CondCode NewCond = ISD::SETCC_INVALID; + switch (Cond) { + case ISD::SETOEQ: NewCond = IsNegInf ? ISD::SETOLE : ISD::SETOGE; break; + case ISD::SETUEQ: NewCond = IsNegInf ? ISD::SETULE : ISD::SETUGE; break; + case ISD::SETUNE: NewCond = IsNegInf ? ISD::SETUGT : ISD::SETULT; break; + case ISD::SETONE: NewCond = IsNegInf ? ISD::SETOGT : ISD::SETOLT; break; + default: break; } + if (NewCond != ISD::SETCC_INVALID && + isCondCodeLegal(NewCond, N0.getSimpleValueType())) + return DAG.getSetCC(dl, VT, N0, N1, NewCond); } } } @@ -4245,10 +4464,10 @@ unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const { TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, - ImmutableCallSite CS) const { + const CallBase &Call) const { /// Information about all of the constraints. AsmOperandInfoVector ConstraintOperands; - const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); + const InlineAsm *IA = cast<InlineAsm>(Call.getCalledOperand()); unsigned maCount = 0; // Largest number of multiple alternative constraints. // Do a prepass over the constraints, canonicalizing them, and building up the @@ -4271,25 +4490,24 @@ TargetLowering::ParseConstraints(const DataLayout &DL, case InlineAsm::isOutput: // Indirect outputs just consume an argument. if (OpInfo.isIndirect) { - OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); + OpInfo.CallOperandVal = Call.getArgOperand(ArgNo++); break; } // The return value of the call is this value. As such, there is no // corresponding argument. - assert(!CS.getType()->isVoidTy() && - "Bad inline asm!"); - if (StructType *STy = dyn_cast<StructType>(CS.getType())) { + assert(!Call.getType()->isVoidTy() && "Bad inline asm!"); + if (StructType *STy = dyn_cast<StructType>(Call.getType())) { OpInfo.ConstraintVT = getSimpleValueType(DL, STy->getElementType(ResNo)); } else { assert(ResNo == 0 && "Asm only has one result!"); - OpInfo.ConstraintVT = getSimpleValueType(DL, CS.getType()); + OpInfo.ConstraintVT = getSimpleValueType(DL, Call.getType()); } ++ResNo; break; case InlineAsm::isInput: - OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); + OpInfo.CallOperandVal = Call.getArgOperand(ArgNo++); break; case InlineAsm::isClobber: // Nothing to do. @@ -5479,251 +5697,221 @@ verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const { return false; } -char TargetLowering::isNegatibleForFree(SDValue Op, SelectionDAG &DAG, - bool LegalOperations, bool ForCodeSize, - unsigned Depth) const { +SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, + bool LegalOps, bool OptForSize, + NegatibleCost &Cost, + unsigned Depth) const { // fneg is removable even if it has multiple uses. - if (Op.getOpcode() == ISD::FNEG) - return 2; + if (Op.getOpcode() == ISD::FNEG) { + Cost = NegatibleCost::Cheaper; + return Op.getOperand(0); + } - // Don't allow anything with multiple uses unless we know it is free. - EVT VT = Op.getValueType(); + // Don't recurse exponentially. + if (Depth > SelectionDAG::MaxRecursionDepth) + return SDValue(); + + // Pre-increment recursion depth for use in recursive calls. + ++Depth; const SDNodeFlags Flags = Op->getFlags(); const TargetOptions &Options = DAG.getTarget().Options; - if (!Op.hasOneUse() && !(Op.getOpcode() == ISD::FP_EXTEND && - isFPExtFree(VT, Op.getOperand(0).getValueType()))) - return 0; + EVT VT = Op.getValueType(); + unsigned Opcode = Op.getOpcode(); - // Don't recurse exponentially. - if (Depth > SelectionDAG::MaxRecursionDepth) - return 0; + // Don't allow anything with multiple uses unless we know it is free. + if (!Op.hasOneUse() && Opcode != ISD::ConstantFP) { + bool IsFreeExtend = Opcode == ISD::FP_EXTEND && + isFPExtFree(VT, Op.getOperand(0).getValueType()); + if (!IsFreeExtend) + return SDValue(); + } - switch (Op.getOpcode()) { - case ISD::ConstantFP: { - if (!LegalOperations) - return 1; + SDLoc DL(Op); + switch (Opcode) { + case ISD::ConstantFP: { // Don't invert constant FP values after legalization unless the target says // the negated constant is legal. - return isOperationLegal(ISD::ConstantFP, VT) || - isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT, - ForCodeSize); + bool IsOpLegal = + isOperationLegal(ISD::ConstantFP, VT) || + isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT, + OptForSize); + + if (LegalOps && !IsOpLegal) + break; + + APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF(); + V.changeSign(); + SDValue CFP = DAG.getConstantFP(V, DL, VT); + + // If we already have the use of the negated floating constant, it is free + // to negate it even it has multiple uses. + if (!Op.hasOneUse() && CFP.use_empty()) + break; + Cost = NegatibleCost::Neutral; + return CFP; } case ISD::BUILD_VECTOR: { // Only permit BUILD_VECTOR of constants. if (llvm::any_of(Op->op_values(), [&](SDValue N) { return !N.isUndef() && !isa<ConstantFPSDNode>(N); })) - return 0; - if (!LegalOperations) - return 1; - if (isOperationLegal(ISD::ConstantFP, VT) && - isOperationLegal(ISD::BUILD_VECTOR, VT)) - return 1; - return llvm::all_of(Op->op_values(), [&](SDValue N) { - return N.isUndef() || - isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT, - ForCodeSize); - }); + break; + + bool IsOpLegal = + (isOperationLegal(ISD::ConstantFP, VT) && + isOperationLegal(ISD::BUILD_VECTOR, VT)) || + llvm::all_of(Op->op_values(), [&](SDValue N) { + return N.isUndef() || + isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT, + OptForSize); + }); + + if (LegalOps && !IsOpLegal) + break; + + SmallVector<SDValue, 4> Ops; + for (SDValue C : Op->op_values()) { + if (C.isUndef()) { + Ops.push_back(C); + continue; + } + APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF(); + V.changeSign(); + Ops.push_back(DAG.getConstantFP(V, DL, C.getValueType())); + } + Cost = NegatibleCost::Neutral; + return DAG.getBuildVector(VT, DL, Ops); } - case ISD::FADD: + case ISD::FADD: { if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros()) - return 0; + break; // After operation legalization, it might not be legal to create new FSUBs. - if (LegalOperations && !isOperationLegalOrCustom(ISD::FSUB, VT)) - return 0; + if (LegalOps && !isOperationLegalOrCustom(ISD::FSUB, VT)) + break; + SDValue X = Op.getOperand(0), Y = Op.getOperand(1); - // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) - if (char V = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, - ForCodeSize, Depth + 1)) - return V; - // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) - return isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations, - ForCodeSize, Depth + 1); - case ISD::FSUB: + // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y) + NegatibleCost CostX = NegatibleCost::Expensive; + SDValue NegX = + getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth); + // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X) + NegatibleCost CostY = NegatibleCost::Expensive; + SDValue NegY = + getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth); + + // Negate the X if its cost is less or equal than Y. + if (NegX && (CostX <= CostY)) { + Cost = CostX; + return DAG.getNode(ISD::FSUB, DL, VT, NegX, Y, Flags); + } + + // Negate the Y if it is not expensive. + if (NegY) { + Cost = CostY; + return DAG.getNode(ISD::FSUB, DL, VT, NegY, X, Flags); + } + break; + } + case ISD::FSUB: { // We can't turn -(A-B) into B-A when we honor signed zeros. if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros()) - return 0; + break; - // fold (fneg (fsub A, B)) -> (fsub B, A) - return 1; + SDValue X = Op.getOperand(0), Y = Op.getOperand(1); + // fold (fneg (fsub 0, Y)) -> Y + if (ConstantFPSDNode *C = isConstOrConstSplatFP(X, /*AllowUndefs*/ true)) + if (C->isZero()) { + Cost = NegatibleCost::Cheaper; + return Y; + } + // fold (fneg (fsub X, Y)) -> (fsub Y, X) + Cost = NegatibleCost::Neutral; + return DAG.getNode(ISD::FSUB, DL, VT, Y, X, Flags); + } case ISD::FMUL: - case ISD::FDIV: - // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) - if (char V = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, - ForCodeSize, Depth + 1)) - return V; + case ISD::FDIV: { + SDValue X = Op.getOperand(0), Y = Op.getOperand(1); + + // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) + NegatibleCost CostX = NegatibleCost::Expensive; + SDValue NegX = + getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth); + // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) + NegatibleCost CostY = NegatibleCost::Expensive; + SDValue NegY = + getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth); + + // Negate the X if its cost is less or equal than Y. + if (NegX && (CostX <= CostY)) { + Cost = CostX; + return DAG.getNode(Opcode, DL, VT, NegX, Y, Flags); + } // Ignore X * 2.0 because that is expected to be canonicalized to X + X. if (auto *C = isConstOrConstSplatFP(Op.getOperand(1))) if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL) - return 0; - - return isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations, - ForCodeSize, Depth + 1); + break; + // Negate the Y if it is not expensive. + if (NegY) { + Cost = CostY; + return DAG.getNode(Opcode, DL, VT, X, NegY, Flags); + } + break; + } case ISD::FMA: case ISD::FMAD: { if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros()) - return 0; + break; + + SDValue X = Op.getOperand(0), Y = Op.getOperand(1), Z = Op.getOperand(2); + NegatibleCost CostZ = NegatibleCost::Expensive; + SDValue NegZ = + getNegatedExpression(Z, DAG, LegalOps, OptForSize, CostZ, Depth); + // Give up if fail to negate the Z. + if (!NegZ) + break; // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z)) + NegatibleCost CostX = NegatibleCost::Expensive; + SDValue NegX = + getNegatedExpression(X, DAG, LegalOps, OptForSize, CostX, Depth); // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z)) - char V2 = isNegatibleForFree(Op.getOperand(2), DAG, LegalOperations, - ForCodeSize, Depth + 1); - if (!V2) - return 0; - - // One of Op0/Op1 must be cheaply negatible, then select the cheapest. - char V0 = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, - ForCodeSize, Depth + 1); - char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations, - ForCodeSize, Depth + 1); - char V01 = std::max(V0, V1); - return V01 ? std::max(V01, V2) : 0; - } - - case ISD::FP_EXTEND: - case ISD::FP_ROUND: - case ISD::FSIN: - return isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, - ForCodeSize, Depth + 1); - } - - return 0; -} - -SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, - bool LegalOperations, - bool ForCodeSize, - unsigned Depth) const { - // fneg is removable even if it has multiple uses. - if (Op.getOpcode() == ISD::FNEG) - return Op.getOperand(0); + NegatibleCost CostY = NegatibleCost::Expensive; + SDValue NegY = + getNegatedExpression(Y, DAG, LegalOps, OptForSize, CostY, Depth); - assert(Depth <= SelectionDAG::MaxRecursionDepth && - "getNegatedExpression doesn't match isNegatibleForFree"); - const SDNodeFlags Flags = Op->getFlags(); - - switch (Op.getOpcode()) { - case ISD::ConstantFP: { - APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF(); - V.changeSign(); - return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType()); - } - case ISD::BUILD_VECTOR: { - SmallVector<SDValue, 4> Ops; - for (SDValue C : Op->op_values()) { - if (C.isUndef()) { - Ops.push_back(C); - continue; - } - APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF(); - V.changeSign(); - Ops.push_back(DAG.getConstantFP(V, SDLoc(Op), C.getValueType())); + // Negate the X if its cost is less or equal than Y. + if (NegX && (CostX <= CostY)) { + Cost = std::min(CostX, CostZ); + return DAG.getNode(Opcode, DL, VT, NegX, Y, NegZ, Flags); } - return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops); - } - case ISD::FADD: - assert((DAG.getTarget().Options.NoSignedZerosFPMath || - Flags.hasNoSignedZeros()) && - "Expected NSZ fp-flag"); - - // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) - if (isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, ForCodeSize, - Depth + 1)) - return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), - getNegatedExpression(Op.getOperand(0), DAG, - LegalOperations, ForCodeSize, - Depth + 1), - Op.getOperand(1), Flags); - // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) - return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), - getNegatedExpression(Op.getOperand(1), DAG, - LegalOperations, ForCodeSize, - Depth + 1), - Op.getOperand(0), Flags); - case ISD::FSUB: - // fold (fneg (fsub 0, B)) -> B - if (ConstantFPSDNode *N0CFP = - isConstOrConstSplatFP(Op.getOperand(0), /*AllowUndefs*/ true)) - if (N0CFP->isZero()) - return Op.getOperand(1); - - // fold (fneg (fsub A, B)) -> (fsub B, A) - return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), - Op.getOperand(1), Op.getOperand(0), Flags); - - case ISD::FMUL: - case ISD::FDIV: - // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) - if (isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, ForCodeSize, - Depth + 1)) - return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), - getNegatedExpression(Op.getOperand(0), DAG, - LegalOperations, ForCodeSize, - Depth + 1), - Op.getOperand(1), Flags); - - // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) - return DAG.getNode( - Op.getOpcode(), SDLoc(Op), Op.getValueType(), Op.getOperand(0), - getNegatedExpression(Op.getOperand(1), DAG, LegalOperations, - ForCodeSize, Depth + 1), - Flags); - case ISD::FMA: - case ISD::FMAD: { - assert((DAG.getTarget().Options.NoSignedZerosFPMath || - Flags.hasNoSignedZeros()) && - "Expected NSZ fp-flag"); - - SDValue Neg2 = getNegatedExpression(Op.getOperand(2), DAG, LegalOperations, - ForCodeSize, Depth + 1); - - char V0 = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, - ForCodeSize, Depth + 1); - char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations, - ForCodeSize, Depth + 1); - // TODO: This is a hack. It is possible that costs have changed between now - // and the initial calls to isNegatibleForFree(). That is because we - // are rewriting the expression, and that may change the number of - // uses (and therefore the cost) of values. If the negation costs are - // equal, only negate this value if it is a constant. Otherwise, try - // operand 1. A better fix would eliminate uses as a cost factor or - // track the change in uses as we rewrite the expression. - if (V0 > V1 || (V0 == V1 && isa<ConstantFPSDNode>(Op.getOperand(0)))) { - // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z)) - SDValue Neg0 = getNegatedExpression( - Op.getOperand(0), DAG, LegalOperations, ForCodeSize, Depth + 1); - return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Neg0, - Op.getOperand(1), Neg2, Flags); + // Negate the Y if it is not expensive. + if (NegY) { + Cost = std::min(CostY, CostZ); + return DAG.getNode(Opcode, DL, VT, X, NegY, NegZ, Flags); } - - // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z)) - SDValue Neg1 = getNegatedExpression(Op.getOperand(1), DAG, LegalOperations, - ForCodeSize, Depth + 1); - return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), - Op.getOperand(0), Neg1, Neg2, Flags); + break; } case ISD::FP_EXTEND: case ISD::FSIN: - return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), - getNegatedExpression(Op.getOperand(0), DAG, - LegalOperations, ForCodeSize, - Depth + 1)); + if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps, + OptForSize, Cost, Depth)) + return DAG.getNode(Opcode, DL, VT, NegV); + break; case ISD::FP_ROUND: - return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(), - getNegatedExpression(Op.getOperand(0), DAG, - LegalOperations, ForCodeSize, - Depth + 1), - Op.getOperand(1)); + if (SDValue NegV = getNegatedExpression(Op.getOperand(0), DAG, LegalOps, + OptForSize, Cost, Depth)) + return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1)); + break; } - llvm_unreachable("Unknown code"); + return SDValue(); } //===----------------------------------------------------------------------===// @@ -5929,6 +6117,14 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, return Ok; } +// Check that (every element of) Z is undef or not an exact multiple of BW. +static bool isNonZeroModBitWidth(SDValue Z, unsigned BW) { + return ISD::matchUnaryPredicate( + Z, + [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; }, + true); +} + bool TargetLowering::expandFunnelShift(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const { EVT VT = Node->getValueType(0); @@ -5939,41 +6135,54 @@ bool TargetLowering::expandFunnelShift(SDNode *Node, SDValue &Result, !isOperationLegalOrCustomOrPromote(ISD::OR, VT))) return false; - // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) - // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW)) SDValue X = Node->getOperand(0); SDValue Y = Node->getOperand(1); SDValue Z = Node->getOperand(2); - unsigned EltSizeInBits = VT.getScalarSizeInBits(); + unsigned BW = VT.getScalarSizeInBits(); bool IsFSHL = Node->getOpcode() == ISD::FSHL; SDLoc DL(SDValue(Node, 0)); EVT ShVT = Z.getValueType(); - SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT); - SDValue Zero = DAG.getConstant(0, DL, ShVT); - SDValue ShAmt; - if (isPowerOf2_32(EltSizeInBits)) { - SDValue Mask = DAG.getConstant(EltSizeInBits - 1, DL, ShVT); - ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask); - } else { + SDValue ShX, ShY; + SDValue ShAmt, InvShAmt; + if (isNonZeroModBitWidth(Z, BW)) { + // fshl: X << C | Y >> (BW - C) + // fshr: X << (BW - C) | Y >> C + // where C = Z % BW is not zero + SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT); ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC); - } - - SDValue InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt); - SDValue ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt); - SDValue ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt); - SDValue Or = DAG.getNode(ISD::OR, DL, VT, ShX, ShY); - - // If (Z % BW == 0), then the opposite direction shift is shift-by-bitwidth, - // and that is undefined. We must compare and select to avoid UB. - EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ShVT); + InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, ShAmt); + ShX = DAG.getNode(ISD::SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt); + ShY = DAG.getNode(ISD::SRL, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt); + } else { + // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW)) + // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW) + SDValue Mask = DAG.getConstant(BW - 1, DL, ShVT); + if (isPowerOf2_32(BW)) { + // Z % BW -> Z & (BW - 1) + ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Z, Mask); + // (BW - 1) - (Z % BW) -> ~Z & (BW - 1) + InvShAmt = DAG.getNode(ISD::AND, DL, ShVT, DAG.getNOT(DL, Z, ShVT), Mask); + } else { + SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT); + ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Z, BitWidthC); + InvShAmt = DAG.getNode(ISD::SUB, DL, ShVT, Mask, ShAmt); + } - // For fshl, 0-shift returns the 1st arg (X). - // For fshr, 0-shift returns the 2nd arg (Y). - SDValue IsZeroShift = DAG.getSetCC(DL, CCVT, ShAmt, Zero, ISD::SETEQ); - Result = DAG.getSelect(DL, VT, IsZeroShift, IsFSHL ? X : Y, Or); + SDValue One = DAG.getConstant(1, DL, ShVT); + if (IsFSHL) { + ShX = DAG.getNode(ISD::SHL, DL, VT, X, ShAmt); + SDValue ShY1 = DAG.getNode(ISD::SRL, DL, VT, Y, One); + ShY = DAG.getNode(ISD::SRL, DL, VT, ShY1, InvShAmt); + } else { + SDValue ShX1 = DAG.getNode(ISD::SHL, DL, VT, X, One); + ShX = DAG.getNode(ISD::SHL, DL, VT, ShX1, InvShAmt); + ShY = DAG.getNode(ISD::SRL, DL, VT, Y, ShAmt); + } + } + Result = DAG.getNode(ISD::OR, DL, VT, ShX, ShY); return true; } @@ -5988,12 +6197,15 @@ bool TargetLowering::expandROT(SDNode *Node, SDValue &Result, SDLoc DL(SDValue(Node, 0)); EVT ShVT = Op1.getValueType(); - SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT); + SDValue Zero = DAG.getConstant(0, DL, ShVT); - // If a rotate in the other direction is legal, use it. + assert(isPowerOf2_32(EltSizeInBits) && EltSizeInBits > 1 && + "Expecting the type bitwidth to be a power of 2"); + + // If a rotate in the other direction is supported, use it. unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL; - if (isOperationLegal(RevRot, VT)) { - SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, Op1); + if (isOperationLegalOrCustom(RevRot, VT)) { + SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1); Result = DAG.getNode(RevRot, DL, VT, Op0, Sub); return true; } @@ -6006,15 +6218,13 @@ bool TargetLowering::expandROT(SDNode *Node, SDValue &Result, return false; // Otherwise, - // (rotl x, c) -> (or (shl x, (and c, w-1)), (srl x, (and w-c, w-1))) - // (rotr x, c) -> (or (srl x, (and c, w-1)), (shl x, (and w-c, w-1))) + // (rotl x, c) -> (or (shl x, (and c, w-1)), (srl x, (and -c, w-1))) + // (rotr x, c) -> (or (srl x, (and c, w-1)), (shl x, (and -c, w-1))) // - assert(isPowerOf2_32(EltSizeInBits) && EltSizeInBits > 1 && - "Expecting the type bitwidth to be a power of 2"); unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL; unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL; SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT); - SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthC, Op1); + SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1); SDValue And0 = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC); SDValue And1 = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC); Result = DAG.getNode(ISD::OR, DL, VT, DAG.getNode(ShOpc, DL, VT, Op0, And0), @@ -6198,114 +6408,50 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result, EVT SrcVT = Src.getValueType(); EVT DstVT = Node->getValueType(0); - if (SrcVT.getScalarType() != MVT::i64) + if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64) + return false; + + // Only expand vector types if we have the appropriate vector bit operations. + if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) || + !isOperationLegalOrCustom(ISD::FADD, DstVT) || + !isOperationLegalOrCustom(ISD::FSUB, DstVT) || + !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) || + !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT))) return false; SDLoc dl(SDValue(Node, 0)); EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout()); - if (DstVT.getScalarType() == MVT::f32) { - // Only expand vector types if we have the appropriate vector bit - // operations. - if (SrcVT.isVector() && - (!isOperationLegalOrCustom(ISD::SRL, SrcVT) || - !isOperationLegalOrCustom(ISD::FADD, DstVT) || - !isOperationLegalOrCustom(ISD::SINT_TO_FP, SrcVT) || - !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) || - !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT))) - return false; - - // For unsigned conversions, convert them to signed conversions using the - // algorithm from the x86_64 __floatundisf in compiler_rt. - - // TODO: This really should be implemented using a branch rather than a - // select. We happen to get lucky and machinesink does the right - // thing most of the time. This would be a good candidate for a - // pseudo-op, or, even better, for whole-function isel. - EVT SetCCVT = - getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT); - - SDValue SignBitTest = DAG.getSetCC( - dl, SetCCVT, Src, DAG.getConstant(0, dl, SrcVT), ISD::SETLT); - - SDValue ShiftConst = DAG.getConstant(1, dl, ShiftVT); - SDValue Shr = DAG.getNode(ISD::SRL, dl, SrcVT, Src, ShiftConst); - SDValue AndConst = DAG.getConstant(1, dl, SrcVT); - SDValue And = DAG.getNode(ISD::AND, dl, SrcVT, Src, AndConst); - SDValue Or = DAG.getNode(ISD::OR, dl, SrcVT, And, Shr); - - SDValue Slow, Fast; - if (Node->isStrictFPOpcode()) { - // In strict mode, we must avoid spurious exceptions, and therefore - // must make sure to only emit a single STRICT_SINT_TO_FP. - SDValue InCvt = DAG.getSelect(dl, SrcVT, SignBitTest, Or, Src); - Fast = DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, { DstVT, MVT::Other }, - { Node->getOperand(0), InCvt }); - Slow = DAG.getNode(ISD::STRICT_FADD, dl, { DstVT, MVT::Other }, - { Fast.getValue(1), Fast, Fast }); - Chain = Slow.getValue(1); - // The STRICT_SINT_TO_FP inherits the exception mode from the - // incoming STRICT_UINT_TO_FP node; the STRICT_FADD node can - // never raise any exception. - SDNodeFlags Flags; - Flags.setNoFPExcept(Node->getFlags().hasNoFPExcept()); - Fast->setFlags(Flags); - Flags.setNoFPExcept(true); - Slow->setFlags(Flags); - } else { - SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Or); - Slow = DAG.getNode(ISD::FADD, dl, DstVT, SignCvt, SignCvt); - Fast = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src); - } - - Result = DAG.getSelect(dl, DstVT, SignBitTest, Slow, Fast); - return true; - } - - if (DstVT.getScalarType() == MVT::f64) { - // Only expand vector types if we have the appropriate vector bit - // operations. - if (SrcVT.isVector() && - (!isOperationLegalOrCustom(ISD::SRL, SrcVT) || - !isOperationLegalOrCustom(ISD::FADD, DstVT) || - !isOperationLegalOrCustom(ISD::FSUB, DstVT) || - !isOperationLegalOrCustomOrPromote(ISD::OR, SrcVT) || - !isOperationLegalOrCustomOrPromote(ISD::AND, SrcVT))) - return false; - - // Implementation of unsigned i64 to f64 following the algorithm in - // __floatundidf in compiler_rt. This implementation has the advantage - // of performing rounding correctly, both in the default rounding mode - // and in all alternate rounding modes. - SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT); - SDValue TwoP84PlusTwoP52 = DAG.getConstantFP( - BitsToDouble(UINT64_C(0x4530000000100000)), dl, DstVT); - SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT); - SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT); - SDValue HiShift = DAG.getConstant(32, dl, ShiftVT); - - SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask); - SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift); - SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52); - SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84); - SDValue LoFlt = DAG.getBitcast(DstVT, LoOr); - SDValue HiFlt = DAG.getBitcast(DstVT, HiOr); - if (Node->isStrictFPOpcode()) { - SDValue HiSub = - DAG.getNode(ISD::STRICT_FSUB, dl, {DstVT, MVT::Other}, - {Node->getOperand(0), HiFlt, TwoP84PlusTwoP52}); - Result = DAG.getNode(ISD::STRICT_FADD, dl, {DstVT, MVT::Other}, - {HiSub.getValue(1), LoFlt, HiSub}); - Chain = Result.getValue(1); - } else { - SDValue HiSub = - DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52); - Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub); - } - return true; + // Implementation of unsigned i64 to f64 following the algorithm in + // __floatundidf in compiler_rt. This implementation has the advantage + // of performing rounding correctly, both in the default rounding mode + // and in all alternate rounding modes. + SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT); + SDValue TwoP84PlusTwoP52 = DAG.getConstantFP( + BitsToDouble(UINT64_C(0x4530000000100000)), dl, DstVT); + SDValue TwoP84 = DAG.getConstant(UINT64_C(0x4530000000000000), dl, SrcVT); + SDValue LoMask = DAG.getConstant(UINT64_C(0x00000000FFFFFFFF), dl, SrcVT); + SDValue HiShift = DAG.getConstant(32, dl, ShiftVT); + + SDValue Lo = DAG.getNode(ISD::AND, dl, SrcVT, Src, LoMask); + SDValue Hi = DAG.getNode(ISD::SRL, dl, SrcVT, Src, HiShift); + SDValue LoOr = DAG.getNode(ISD::OR, dl, SrcVT, Lo, TwoP52); + SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84); + SDValue LoFlt = DAG.getBitcast(DstVT, LoOr); + SDValue HiFlt = DAG.getBitcast(DstVT, HiOr); + if (Node->isStrictFPOpcode()) { + SDValue HiSub = + DAG.getNode(ISD::STRICT_FSUB, dl, {DstVT, MVT::Other}, + {Node->getOperand(0), HiFlt, TwoP84PlusTwoP52}); + Result = DAG.getNode(ISD::STRICT_FADD, dl, {DstVT, MVT::Other}, + {HiSub.getValue(1), LoFlt, HiSub}); + Chain = Result.getValue(1); + } else { + SDValue HiSub = + DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52); + Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub); } - - return false; + return true; } SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node, @@ -6564,12 +6710,61 @@ TargetLowering::scalarizeVectorLoad(LoadSDNode *LD, SDValue Chain = LD->getChain(); SDValue BasePTR = LD->getBasePtr(); EVT SrcVT = LD->getMemoryVT(); + EVT DstVT = LD->getValueType(0); ISD::LoadExtType ExtType = LD->getExtensionType(); unsigned NumElem = SrcVT.getVectorNumElements(); EVT SrcEltVT = SrcVT.getScalarType(); - EVT DstEltVT = LD->getValueType(0).getScalarType(); + EVT DstEltVT = DstVT.getScalarType(); + + // A vector must always be stored in memory as-is, i.e. without any padding + // between the elements, since various code depend on it, e.g. in the + // handling of a bitcast of a vector type to int, which may be done with a + // vector store followed by an integer load. A vector that does not have + // elements that are byte-sized must therefore be stored as an integer + // built out of the extracted vector elements. + if (!SrcEltVT.isByteSized()) { + unsigned NumLoadBits = SrcVT.getStoreSizeInBits(); + EVT LoadVT = EVT::getIntegerVT(*DAG.getContext(), NumLoadBits); + + unsigned NumSrcBits = SrcVT.getSizeInBits(); + EVT SrcIntVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcBits); + + unsigned SrcEltBits = SrcEltVT.getSizeInBits(); + SDValue SrcEltBitMask = DAG.getConstant( + APInt::getLowBitsSet(NumLoadBits, SrcEltBits), SL, LoadVT); + + // Load the whole vector and avoid masking off the top bits as it makes + // the codegen worse. + SDValue Load = + DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR, + LD->getPointerInfo(), SrcIntVT, LD->getAlignment(), + LD->getMemOperand()->getFlags(), LD->getAAInfo()); + + SmallVector<SDValue, 8> Vals; + for (unsigned Idx = 0; Idx < NumElem; ++Idx) { + unsigned ShiftIntoIdx = + (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx); + SDValue ShiftAmount = + DAG.getShiftAmountConstant(ShiftIntoIdx * SrcEltVT.getSizeInBits(), + LoadVT, SL, /*LegalTypes=*/false); + SDValue ShiftedElt = DAG.getNode(ISD::SRL, SL, LoadVT, Load, ShiftAmount); + SDValue Elt = + DAG.getNode(ISD::AND, SL, LoadVT, ShiftedElt, SrcEltBitMask); + SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, Elt); + + if (ExtType != ISD::NON_EXTLOAD) { + unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType); + Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar); + } + + Vals.push_back(Scalar); + } + + SDValue Value = DAG.getBuildVector(DstVT, SL, Vals); + return std::make_pair(Value, Load.getValue(1)); + } unsigned Stride = SrcEltVT.getSizeInBits() / 8; assert(SrcEltVT.isByteSized()); @@ -6591,7 +6786,7 @@ TargetLowering::scalarizeVectorLoad(LoadSDNode *LD, } SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains); - SDValue Value = DAG.getBuildVector(LD->getValueType(0), SL, Vals); + SDValue Value = DAG.getBuildVector(DstVT, SL, Vals); return std::make_pair(Value, NewChain); } @@ -6612,7 +6807,6 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST, // The type of data as saved in memory. EVT MemSclVT = StVT.getScalarType(); - EVT IdxVT = getVectorIdxTy(DAG.getDataLayout()); unsigned NumElem = StVT.getVectorNumElements(); // A vector must always be stored in memory as-is, i.e. without any padding @@ -6629,7 +6823,7 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST, for (unsigned Idx = 0; Idx < NumElem; ++Idx) { SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value, - DAG.getConstant(Idx, SL, IdxVT)); + DAG.getVectorIdxConstant(Idx, SL)); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt); SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc); unsigned ShiftIntoIdx = @@ -6654,7 +6848,7 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST, SmallVector<SDValue, 8> Stores; for (unsigned Idx = 0; Idx < NumElem; ++Idx) { SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value, - DAG.getConstant(Idx, SL, IdxVT)); + DAG.getVectorIdxConstant(Idx, SL)); SDValue Ptr = DAG.getObjectPtrOffset(SL, BasePtr, Idx * Stride); @@ -7313,12 +7507,13 @@ SDValue TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, SDValue LHS, SDValue RHS, unsigned Scale, SelectionDAG &DAG) const { - assert((Opcode == ISD::SDIVFIX || - Opcode == ISD::UDIVFIX) && + assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT || + Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) && "Expected a fixed point division opcode"); EVT VT = LHS.getValueType(); - bool Signed = Opcode == ISD::SDIVFIX; + bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT; + bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT; EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); // If there is enough room in the type to upscale the LHS or downscale the @@ -7330,7 +7525,15 @@ TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, : DAG.computeKnownBits(LHS).countMinLeadingZeros(); unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros(); - if (LHSLead + RHSTrail < Scale) + // For signed saturating operations, we need to be able to detect true integer + // division overflow; that is, when you have MIN / -EPS. However, this + // is undefined behavior and if we emit divisions that could take such + // values it may cause undesired behavior (arithmetic exceptions on x86, for + // example). + // Avoid this by requiring an extra bit so that we never get this case. + // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale + // signed saturating division, we need to emit a whopping 32-bit division. + if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed)) return SDValue(); unsigned LHSShift = std::min(LHSLead, Scale); @@ -7384,8 +7587,6 @@ TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, Quot = DAG.getNode(ISD::UDIV, dl, VT, LHS, RHS); - // TODO: Saturation. - return Quot; } @@ -7659,3 +7860,26 @@ SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const { Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res); return Res; } + +bool TargetLowering::expandREM(SDNode *Node, SDValue &Result, + SelectionDAG &DAG) const { + EVT VT = Node->getValueType(0); + SDLoc dl(Node); + bool isSigned = Node->getOpcode() == ISD::SREM; + unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV; + unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; + SDValue Dividend = Node->getOperand(0); + SDValue Divisor = Node->getOperand(1); + if (isOperationLegalOrCustom(DivRemOpc, VT)) { + SDVTList VTs = DAG.getVTList(VT, VT); + Result = DAG.getNode(DivRemOpc, dl, VTs, Dividend, Divisor).getValue(1); + return true; + } else if (isOperationLegalOrCustom(DivOpc, VT)) { + // X % Y -> X-X/Y*Y + SDValue Divide = DAG.getNode(DivOpc, dl, VT, Dividend, Divisor); + SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Divide, Divisor); + Result = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul); + return true; + } + return false; +} |