diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2019-10-23 17:51:42 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2019-10-23 17:51:42 +0000 |
commit | 1d5ae1026e831016fc29fd927877c86af904481f (patch) | |
tree | 2cdfd12620fcfa5d9e4a0389f85368e8e36f63f9 /lib/CodeGen/SelectionDAG | |
parent | e6d1592492a3a379186bfb02bd0f4eda0669c0d5 (diff) |
Notes
Diffstat (limited to 'lib/CodeGen/SelectionDAG')
25 files changed, 3619 insertions, 2119 deletions
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 49c922f560fa..e8950b58d42d 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -24,7 +24,6 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" @@ -111,10 +110,20 @@ static cl::opt<bool> MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), cl::desc("DAG combiner may split indexing from loads")); +static cl::opt<bool> + EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true), + cl::desc("DAG combiner enable merging multiple stores " + "into a wider store")); + static cl::opt<unsigned> TokenFactorInlineLimit( "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048), cl::desc("Limit the number of operands to inline for Token Factors")); +static cl::opt<unsigned> StoreMergeDependenceLimit( + "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10), + cl::desc("Limit the number of times for the same StoreNode and RootNode " + "to bail out in store merging dependence check")); + namespace { class DAGCombiner { @@ -152,6 +161,14 @@ namespace { /// which have not yet been combined to the worklist. SmallPtrSet<SDNode *, 32> CombinedNodes; + /// Map from candidate StoreNode to the pair of RootNode and count. + /// The count is used to track how many times we have seen the StoreNode + /// with the same RootNode bail out in dependence check. If we have seen + /// the bail out for the same pair many times over a limit, we won't + /// consider the StoreNode with the same RootNode as store merging + /// candidate again. + DenseMap<SDNode *, std::pair<SDNode *, unsigned>> StoreRootCountMap; + // AA - Used for DAG load/store alias analysis. AliasAnalysis *AA; @@ -236,6 +253,7 @@ namespace { void removeFromWorklist(SDNode *N) { CombinedNodes.erase(N); PruningList.remove(N); + StoreRootCountMap.erase(N); auto It = WorklistMap.find(N); if (It == WorklistMap.end()) @@ -361,6 +379,7 @@ namespace { SDValue visitSUBE(SDNode *N); SDValue visitSUBCARRY(SDNode *N); SDValue visitMUL(SDNode *N); + SDValue visitMULFIX(SDNode *N); SDValue useDivRem(SDNode *N); SDValue visitSDIV(SDNode *N); SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N); @@ -421,7 +440,6 @@ namespace { SDValue visitFP_TO_SINT(SDNode *N); SDValue visitFP_TO_UINT(SDNode *N); SDValue visitFP_ROUND(SDNode *N); - SDValue visitFP_ROUND_INREG(SDNode *N); SDValue visitFP_EXTEND(SDNode *N); SDValue visitFNEG(SDNode *N); SDValue visitFABS(SDNode *N); @@ -470,7 +488,7 @@ namespace { SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, SDValue N1, SDNodeFlags Flags); - SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt); + SDValue visitShiftByConstant(SDNode *N); SDValue foldSelectOfConstants(SDNode *N); SDValue foldVSelectOfConstants(SDNode *N); @@ -497,6 +515,7 @@ namespace { bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, SDValue &CC) const; bool isOneUseSetCC(SDValue N) const; + bool isCheaperToUseNegatedFPOps(SDValue X, SDValue Y); SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, unsigned HiOp); @@ -510,7 +529,7 @@ namespace { SDValue BuildSDIVPow2(SDNode *N); SDValue BuildUDIV(SDNode *N); SDValue BuildLogBase2(SDValue V, const SDLoc &DL); - SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags); + SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags); SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags); SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags); SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip); @@ -521,11 +540,11 @@ namespace { SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits = true); SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); - SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, + SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, SDValue InnerPos, SDValue InnerNeg, unsigned PosOpcode, unsigned NegOpcode, const SDLoc &DL); - SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL); + SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL); SDValue MatchLoadCombine(SDNode *N); SDValue MatchStoreCombine(StoreSDNode *N); SDValue ReduceLoadWidth(SDNode *N); @@ -742,6 +761,11 @@ CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) { return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo); } +bool TargetLowering::DAGCombinerInfo:: +recursivelyDeleteUnusedNodes(SDNode *N) { + return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N); +} + void TargetLowering::DAGCombinerInfo:: CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO); @@ -766,195 +790,6 @@ void DAGCombiner::deleteAndRecombine(SDNode *N) { DAG.DeleteNode(N); } -/// Return 1 if we can compute the negated form of the specified expression for -/// the same cost as the expression itself, or 2 if we can compute the negated -/// form more cheaply than the expression itself. -static char isNegatibleForFree(SDValue Op, bool LegalOperations, - const TargetLowering &TLI, - const TargetOptions *Options, - bool ForCodeSize, - unsigned Depth = 0) { - // fneg is removable even if it has multiple uses. - if (Op.getOpcode() == ISD::FNEG) - return 2; - - // Don't allow anything with multiple uses unless we know it is free. - EVT VT = Op.getValueType(); - const SDNodeFlags Flags = Op->getFlags(); - if (!Op.hasOneUse() && - !(Op.getOpcode() == ISD::FP_EXTEND && - TLI.isFPExtFree(VT, Op.getOperand(0).getValueType()))) - return 0; - - // Don't recurse exponentially. - if (Depth > 6) - return 0; - - switch (Op.getOpcode()) { - default: return false; - case ISD::ConstantFP: { - if (!LegalOperations) - return 1; - - // Don't invert constant FP values after legalization unless the target says - // the negated constant is legal. - return TLI.isOperationLegal(ISD::ConstantFP, VT) || - TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT, - ForCodeSize); - } - case ISD::BUILD_VECTOR: { - // Only permit BUILD_VECTOR of constants. - if (llvm::any_of(Op->op_values(), [&](SDValue N) { - return !N.isUndef() && !isa<ConstantFPSDNode>(N); - })) - return 0; - if (!LegalOperations) - return 1; - if (TLI.isOperationLegal(ISD::ConstantFP, VT) && - TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) - return 1; - return llvm::all_of(Op->op_values(), [&](SDValue N) { - return N.isUndef() || - TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT, - ForCodeSize); - }); - } - case ISD::FADD: - if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros()) - return 0; - - // After operation legalization, it might not be legal to create new FSUBs. - if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) - return 0; - - // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) - if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, - Options, ForCodeSize, Depth + 1)) - return V; - // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) - return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, - ForCodeSize, Depth + 1); - case ISD::FSUB: - // We can't turn -(A-B) into B-A when we honor signed zeros. - if (!Options->NoSignedZerosFPMath && !Flags.hasNoSignedZeros()) - return 0; - - // fold (fneg (fsub A, B)) -> (fsub B, A) - return 1; - - case ISD::FMUL: - case ISD::FDIV: - // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) - if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, - Options, ForCodeSize, Depth + 1)) - return V; - - return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, - ForCodeSize, Depth + 1); - - case ISD::FP_EXTEND: - case ISD::FP_ROUND: - case ISD::FSIN: - return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options, - ForCodeSize, Depth + 1); - } -} - -/// If isNegatibleForFree returns true, return the newly negated expression. -static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, - bool LegalOperations, bool ForCodeSize, - unsigned Depth = 0) { - // fneg is removable even if it has multiple uses. - if (Op.getOpcode() == ISD::FNEG) - return Op.getOperand(0); - - assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree"); - const TargetOptions &Options = DAG.getTarget().Options; - const SDNodeFlags Flags = Op->getFlags(); - - switch (Op.getOpcode()) { - default: llvm_unreachable("Unknown code"); - case ISD::ConstantFP: { - APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF(); - V.changeSign(); - return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType()); - } - case ISD::BUILD_VECTOR: { - SmallVector<SDValue, 4> Ops; - for (SDValue C : Op->op_values()) { - if (C.isUndef()) { - Ops.push_back(C); - continue; - } - APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF(); - V.changeSign(); - Ops.push_back(DAG.getConstantFP(V, SDLoc(Op), C.getValueType())); - } - return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops); - } - case ISD::FADD: - assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros()); - - // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) - if (isNegatibleForFree(Op.getOperand(0), LegalOperations, - DAG.getTargetLoweringInfo(), &Options, ForCodeSize, - Depth + 1)) - return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), - GetNegatedExpression(Op.getOperand(0), DAG, - LegalOperations, ForCodeSize, - Depth + 1), - Op.getOperand(1), Flags); - // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) - return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), - GetNegatedExpression(Op.getOperand(1), DAG, - LegalOperations, ForCodeSize, - Depth + 1), - Op.getOperand(0), Flags); - case ISD::FSUB: - // fold (fneg (fsub 0, B)) -> B - if (ConstantFPSDNode *N0CFP = - isConstOrConstSplatFP(Op.getOperand(0), /*AllowUndefs*/ true)) - if (N0CFP->isZero()) - return Op.getOperand(1); - - // fold (fneg (fsub A, B)) -> (fsub B, A) - return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), - Op.getOperand(1), Op.getOperand(0), Flags); - - case ISD::FMUL: - case ISD::FDIV: - // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) - if (isNegatibleForFree(Op.getOperand(0), LegalOperations, - DAG.getTargetLoweringInfo(), &Options, ForCodeSize, - Depth + 1)) - return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), - GetNegatedExpression(Op.getOperand(0), DAG, - LegalOperations, ForCodeSize, - Depth + 1), - Op.getOperand(1), Flags); - - // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) - return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), - Op.getOperand(0), - GetNegatedExpression(Op.getOperand(1), DAG, - LegalOperations, ForCodeSize, - Depth + 1), Flags); - - case ISD::FP_EXTEND: - case ISD::FSIN: - return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), - GetNegatedExpression(Op.getOperand(0), DAG, - LegalOperations, ForCodeSize, - Depth + 1)); - case ISD::FP_ROUND: - return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(), - GetNegatedExpression(Op.getOperand(0), DAG, - LegalOperations, ForCodeSize, - Depth + 1), - Op.getOperand(1)); - } -} - // APInts must be the same size for most operations, this helper // function zero extends the shorter of the pair so that they match. // We provide an Offset so that we can create bitwidths that won't overflow. @@ -1124,7 +959,6 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1); if (!OpNode.getNode()) return SDValue(); - AddToWorklist(OpNode.getNode()); return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); } } @@ -1438,7 +1272,6 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { SDValue RV = DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1)); - AddToWorklist(N0.getNode()); if (Replace) ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode()); @@ -1591,8 +1424,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) { bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes); for (SDNode *LN : UpdatedNodes) { - AddToWorklist(LN); AddUsersToWorklist(LN); + AddToWorklist(LN); } if (!NIsValid) continue; @@ -1673,6 +1506,10 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::ADDCARRY: return visitADDCARRY(N); case ISD::SUBE: return visitSUBE(N); case ISD::SUBCARRY: return visitSUBCARRY(N); + case ISD::SMULFIX: + case ISD::SMULFIXSAT: + case ISD::UMULFIX: + case ISD::UMULFIXSAT: return visitMULFIX(N); case ISD::MUL: return visitMUL(N); case ISD::SDIV: return visitSDIV(N); case ISD::UDIV: return visitUDIV(N); @@ -1736,7 +1573,6 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::FP_TO_SINT: return visitFP_TO_SINT(N); case ISD::FP_TO_UINT: return visitFP_TO_UINT(N); case ISD::FP_ROUND: return visitFP_ROUND(N); - case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N); case ISD::FP_EXTEND: return visitFP_EXTEND(N); case ISD::FNEG: return visitFNEG(N); case ISD::FABS: return visitFABS(N); @@ -3308,6 +3144,18 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { } } + if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) { + // (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry) + if (SDValue Carry = getAsCarry(TLI, N0)) { + SDValue X = N1; + SDValue Zero = DAG.getConstant(0, DL, VT); + SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X); + return DAG.getNode(ISD::ADDCARRY, DL, + DAG.getVTList(VT, Carry.getValueType()), NegX, Zero, + Carry); + } + } + return SDValue(); } @@ -3442,6 +3290,30 @@ SDValue DAGCombiner::visitSUBCARRY(SDNode *N) { return SDValue(); } +// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and +// UMULFIXSAT here. +SDValue DAGCombiner::visitMULFIX(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue Scale = N->getOperand(2); + EVT VT = N0.getValueType(); + + // fold (mulfix x, undef, scale) -> 0 + if (N0.isUndef() || N1.isUndef()) + return DAG.getConstant(0, SDLoc(N), VT); + + // Canonicalize constant to RHS (vector doesn't have to splat) + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale); + + // fold (mulfix x, 0, scale) -> 0 + if (isNullConstant(N1)) + return DAG.getConstant(0, SDLoc(N), VT); + + return SDValue(); +} + SDValue DAGCombiner::visitMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -3537,7 +3409,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { // x * 15 --> (x << 4) - x // x * -33 --> -((x << 5) + x) // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4) - if (N1IsConst && TLI.decomposeMulByConstant(VT, N1)) { + if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) { // TODO: We could handle more general decomposition of any constant by // having the target set a limit on number of ops and making a // callback to determine that sequence (similar to sqrt expansion). @@ -4083,10 +3955,10 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { if (VT.isVector()) { // fold (mulhs x, 0) -> 0 - if (ISD::isBuildVectorAllZeros(N1.getNode())) - return N1; - if (ISD::isBuildVectorAllZeros(N0.getNode())) - return N0; + // do not return N0/N1, because undef node may exist. + if (ISD::isBuildVectorAllZeros(N0.getNode()) || + ISD::isBuildVectorAllZeros(N1.getNode())) + return DAG.getConstant(0, DL, VT); } // fold (mulhs x, 0) -> 0 @@ -4095,7 +3967,7 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { // fold (mulhs x, 1) -> (sra x, size(x)-1) if (isOneConstant(N1)) return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0, - DAG.getConstant(N0.getValueSizeInBits() - 1, DL, + DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL, getShiftAmountTy(N0.getValueType()))); // fold (mulhs x, undef) -> 0 @@ -4130,10 +4002,10 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { if (VT.isVector()) { // fold (mulhu x, 0) -> 0 - if (ISD::isBuildVectorAllZeros(N1.getNode())) - return N1; - if (ISD::isBuildVectorAllZeros(N0.getNode())) - return N0; + // do not return N0/N1, because undef node may exist. + if (ISD::isBuildVectorAllZeros(N0.getNode()) || + ISD::isBuildVectorAllZeros(N1.getNode())) + return DAG.getConstant(0, DL, VT); } // fold (mulhu x, 0) -> 0 @@ -4265,6 +4137,18 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); + // (umul_lohi N0, 0) -> (0, 0) + if (isNullConstant(N->getOperand(1))) { + SDValue Zero = DAG.getConstant(0, DL, VT); + return CombineTo(N, Zero, Zero); + } + + // (umul_lohi N0, 1) -> (N0, 0) + if (isOneConstant(N->getOperand(1))) { + SDValue Zero = DAG.getConstant(0, DL, VT); + return CombineTo(N, N->getOperand(0), Zero); + } + // If the type is twice as wide is legal, transform the mulhu to a wider // multiply plus a shift. if (VT.isSimple() && !VT.isVector()) { @@ -4290,13 +4174,29 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { } SDValue DAGCombiner::visitMULO(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N0.getValueType(); bool IsSigned = (ISD::SMULO == N->getOpcode()); + EVT CarryVT = N->getValueType(1); + SDLoc DL(N); + + // canonicalize constant to RHS. + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0); + + // fold (mulo x, 0) -> 0 + no carry out + if (isNullOrNullSplat(N1)) + return CombineTo(N, DAG.getConstant(0, DL, VT), + DAG.getConstant(0, DL, CarryVT)); + // (mulo x, 2) -> (addo x, x) - if (ConstantSDNode *C2 = isConstOrConstSplat(N->getOperand(1))) + if (ConstantSDNode *C2 = isConstOrConstSplat(N1)) if (C2->getAPIntValue() == 2) - return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, SDLoc(N), - N->getVTList(), N->getOperand(0), N->getOperand(0)); + return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL, + N->getVTList(), N0, N0); return SDValue(); } @@ -4444,7 +4344,9 @@ SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) { if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) && Level <= AfterLegalizeTypes) { // Input types must be integer and the same. - if (XVT.isInteger() && XVT == Y.getValueType()) { + if (XVT.isInteger() && XVT == Y.getValueType() && + !(VT.isVector() && TLI.isTypeLegal(VT) && + !XVT.isVector() && !TLI.isTypeLegal(XVT))) { SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y); return DAG.getNode(HandOpcode, DL, VT, Logic); } @@ -4770,8 +4672,8 @@ bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, return true; } - // Do not change the width of a volatile load. - if (LoadN->isVolatile()) + // Do not change the width of a volatile or atomic loads. + if (!LoadN->isSimple()) return false; // Do not generate loads of non-round integer types since these can @@ -4803,15 +4705,15 @@ bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST, if (!MemVT.isRound()) return false; - // Don't change the width of a volatile load. - if (LDST->isVolatile()) + // Don't change the width of a volatile or atomic loads. + if (!LDST->isSimple()) return false; // Verify that we are actually reducing a load width here. if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits()) return false; - // Ensure that this isn't going to produce an unsupported unaligned access. + // Ensure that this isn't going to produce an unsupported memory access. if (ShAmt && !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT, LDST->getAddressSpace(), ShAmt / 8, @@ -5076,6 +4978,59 @@ SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) { return T1; } +/// Try to replace shift/logic that tests if a bit is clear with mask + setcc. +/// For a target with a bit test, this is expected to become test + set and save +/// at least 1 instruction. +static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) { + assert(And->getOpcode() == ISD::AND && "Expected an 'and' op"); + + // This is probably not worthwhile without a supported type. + EVT VT = And->getValueType(0); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!TLI.isTypeLegal(VT)) + return SDValue(); + + // Look through an optional extension and find a 'not'. + // TODO: Should we favor test+set even without the 'not' op? + SDValue Not = And->getOperand(0), And1 = And->getOperand(1); + if (Not.getOpcode() == ISD::ANY_EXTEND) + Not = Not.getOperand(0); + if (!isBitwiseNot(Not) || !Not.hasOneUse() || !isOneConstant(And1)) + return SDValue(); + + // Look though an optional truncation. The source operand may not be the same + // type as the original 'and', but that is ok because we are masking off + // everything but the low bit. + SDValue Srl = Not.getOperand(0); + if (Srl.getOpcode() == ISD::TRUNCATE) + Srl = Srl.getOperand(0); + + // Match a shift-right by constant. + if (Srl.getOpcode() != ISD::SRL || !Srl.hasOneUse() || + !isa<ConstantSDNode>(Srl.getOperand(1))) + return SDValue(); + + // We might have looked through casts that make this transform invalid. + // TODO: If the source type is wider than the result type, do the mask and + // compare in the source type. + const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1); + unsigned VTBitWidth = VT.getSizeInBits(); + if (ShiftAmt.uge(VTBitWidth)) + return SDValue(); + + // Turn this into a bit-test pattern using mask op + setcc: + // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0 + SDLoc DL(And); + SDValue X = DAG.getZExtOrTrunc(Srl.getOperand(0), DL, VT); + EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + SDValue Mask = DAG.getConstant( + APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT); + SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask); + SDValue Zero = DAG.getConstant(0, DL, VT); + SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ); + return DAG.getZExtOrTrunc(Setcc, DL, VT); +} + SDValue DAGCombiner::visitAND(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -5163,6 +5118,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return SDValue(N, 0); // Return N so it doesn't get rechecked! } } + // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) -> // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must // already be zero by virtue of the width of the base type of the load. @@ -5337,7 +5293,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { unsigned MemBitSize = MemVT.getScalarSizeInBits(); APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize); if (DAG.MaskedValueIsZero(N1, ExtBits) && - ((!LegalOperations && !LN0->isVolatile()) || + ((!LegalOperations && LN0->isSimple()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(), @@ -5358,6 +5314,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N)) return Shifts; + if (TLI.hasBitTest(N0, N1)) + if (SDValue V = combineShiftAnd1ToBitTest(N, DAG)) + return V; + return SDValue(); } @@ -5564,6 +5524,23 @@ static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) { return true; } +// Match 2 elements of a packed halfword bswap. +static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) { + if (N.getOpcode() == ISD::OR) + return isBSwapHWordElement(N.getOperand(0), Parts) && + isBSwapHWordElement(N.getOperand(1), Parts); + + if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) { + ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1)); + if (!C || C->getAPIntValue() != 16) + return false; + Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode(); + return true; + } + + return false; +} + /// Match a 32-bit packed halfword bswap. That is /// ((x & 0x000000ff) << 8) | /// ((x & 0x0000ff00) >> 8) | @@ -5581,43 +5558,26 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { return SDValue(); // Look for either - // (or (or (and), (and)), (or (and), (and))) - // (or (or (or (and), (and)), (and)), (and)) - if (N0.getOpcode() != ISD::OR) - return SDValue(); - SDValue N00 = N0.getOperand(0); - SDValue N01 = N0.getOperand(1); + // (or (bswaphpair), (bswaphpair)) + // (or (or (bswaphpair), (and)), (and)) + // (or (or (and), (bswaphpair)), (and)) SDNode *Parts[4] = {}; - if (N1.getOpcode() == ISD::OR && - N00.getNumOperands() == 2 && N01.getNumOperands() == 2) { + if (isBSwapHWordPair(N0, Parts)) { // (or (or (and), (and)), (or (and), (and))) - if (!isBSwapHWordElement(N00, Parts)) + if (!isBSwapHWordPair(N1, Parts)) return SDValue(); - - if (!isBSwapHWordElement(N01, Parts)) - return SDValue(); - SDValue N10 = N1.getOperand(0); - if (!isBSwapHWordElement(N10, Parts)) - return SDValue(); - SDValue N11 = N1.getOperand(1); - if (!isBSwapHWordElement(N11, Parts)) - return SDValue(); - } else { + } else if (N0.getOpcode() == ISD::OR) { // (or (or (or (and), (and)), (and)), (and)) if (!isBSwapHWordElement(N1, Parts)) return SDValue(); - if (!isBSwapHWordElement(N01, Parts)) - return SDValue(); - if (N00.getOpcode() != ISD::OR) - return SDValue(); - SDValue N000 = N00.getOperand(0); - if (!isBSwapHWordElement(N000, Parts)) - return SDValue(); - SDValue N001 = N00.getOperand(1); - if (!isBSwapHWordElement(N001, Parts)) + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) && + !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts))) return SDValue(); - } + } else + return SDValue(); // Make sure the parts are all coming from the same node. if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3]) @@ -5791,15 +5751,11 @@ SDValue DAGCombiner::visitOR(SDNode *N) { SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0); SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0); - bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT); - if (!LegalMask) { - std::swap(NewLHS, NewRHS); - ShuffleVectorSDNode::commuteMask(Mask); - LegalMask = TLI.isShuffleMaskLegal(Mask, VT); - } - - if (LegalMask) - return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask); + SDValue LegalShuffle = + TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, + Mask, DAG); + if (LegalShuffle) + return LegalShuffle; } } } @@ -5867,8 +5823,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return V; // See if this is some rotate idiom. - if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N))) - return SDValue(Rot, 0); + if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N))) + return Rot; if (SDValue Load = MatchLoadCombine(N)) return Load; @@ -5914,6 +5870,9 @@ static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift, /// Otherwise, returns an expansion of \p ExtractFrom based on the following /// patterns: /// +/// (or (add v v) (shrl v bitwidth-1)): +/// expands (add v v) -> (shl v 1) +/// /// (or (mul v c0) (shrl (mul v c1) c2)): /// expands (mul v c0) -> (shl (mul v c1) c3) /// @@ -5936,6 +5895,23 @@ static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, "Existing shift must be valid as a rotate half"); ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask); + + // Value and Type of the shift. + SDValue OppShiftLHS = OppShift.getOperand(0); + EVT ShiftedVT = OppShiftLHS.getValueType(); + + // Amount of the existing shift. + ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1)); + + // (add v v) -> (shl v 1) + if (OppShift.getOpcode() == ISD::SRL && OppShiftCst && + ExtractFrom.getOpcode() == ISD::ADD && + ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) && + ExtractFrom.getOperand(0) == OppShiftLHS && + OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1) + return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS, + DAG.getShiftAmountConstant(1, ShiftedVT, DL)); + // Preconditions: // (or (op0 v c0) (shiftl/r (op0 v c1) c2)) // @@ -5959,15 +5935,11 @@ static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, // op0 must be the same opcode on both sides, have the same LHS argument, // and produce the same value type. - SDValue OppShiftLHS = OppShift.getOperand(0); - EVT ShiftedVT = OppShiftLHS.getValueType(); if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() || OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) || ShiftedVT != ExtractFrom.getValueType()) return SDValue(); - // Amount of the existing shift. - ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1)); // Constant mul/udiv/shift amount from the RHS of the shift's LHS op. ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1)); // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op. @@ -6137,7 +6109,7 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the // former being preferred if supported. InnerPos and InnerNeg are Pos and // Neg with outer conversions stripped away. -SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, +SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, SDValue InnerPos, SDValue InnerNeg, unsigned PosOpcode, unsigned NegOpcode, const SDLoc &DL) { @@ -6152,32 +6124,33 @@ SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) { bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted, - HasPos ? Pos : Neg).getNode(); + HasPos ? Pos : Neg); } - return nullptr; + return SDValue(); } // MatchRotate - Handle an 'or' of two operands. If this is one of the many // idioms for rotate, and if the target supports rotation instructions, generate // a rot[lr]. -SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { +SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { // Must be a legal type. Expanded 'n promoted things won't work with rotates. EVT VT = LHS.getValueType(); - if (!TLI.isTypeLegal(VT)) return nullptr; + if (!TLI.isTypeLegal(VT)) + return SDValue(); // The target must have at least one rotate flavor. bool HasROTL = hasOperation(ISD::ROTL, VT); bool HasROTR = hasOperation(ISD::ROTR, VT); - if (!HasROTL && !HasROTR) return nullptr; + if (!HasROTL && !HasROTR) + return SDValue(); // Check for truncated rotate. if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE && LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) { assert(LHS.getValueType() == RHS.getValueType()); - if (SDNode *Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) { - return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), - SDValue(Rot, 0)).getNode(); + if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) { + return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot); } } @@ -6192,7 +6165,7 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { // If neither side matched a rotate half, bail if (!LHSShift && !RHSShift) - return nullptr; + return SDValue(); // InstCombine may have combined a constant shl, srl, mul, or udiv with one // side of the rotate, so try to handle that here. In all cases we need to @@ -6215,15 +6188,15 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { // If a side is still missing, nothing else we can do. if (!RHSShift || !LHSShift) - return nullptr; + return SDValue(); // At this point we've matched or extracted a shift op on each side. if (LHSShift.getOperand(0) != RHSShift.getOperand(0)) - return nullptr; // Not shifting the same value. + return SDValue(); // Not shifting the same value. if (LHSShift.getOpcode() == RHSShift.getOpcode()) - return nullptr; // Shifts must disagree. + return SDValue(); // Shifts must disagree. // Canonicalize shl to left side in a shl/srl pair. if (RHSShift.getOpcode() == ISD::SHL) { @@ -6267,13 +6240,13 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask); } - return Rot.getNode(); + return Rot; } // If there is a mask here, and we have a variable shift, we can't be sure // that we're masking out the right stuff. if (LHSMask.getNode() || RHSMask.getNode()) - return nullptr; + return SDValue(); // If the shift amount is sign/zext/any-extended just peel it off. SDValue LExtOp0 = LHSShiftAmt; @@ -6290,17 +6263,17 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { RExtOp0 = RHSShiftAmt.getOperand(0); } - SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, + SDValue TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL); if (TryL) return TryL; - SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, + SDValue TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL); if (TryR) return TryR; - return nullptr; + return SDValue(); } namespace { @@ -6415,7 +6388,7 @@ calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, Depth + 1); case ISD::LOAD: { auto L = cast<LoadSDNode>(Op.getNode()); - if (L->isVolatile() || L->isIndexed()) + if (!L->isSimple() || L->isIndexed()) return None; unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits(); @@ -6504,8 +6477,9 @@ SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) { SDValue Chain; SmallVector<StoreSDNode *, 8> Stores; for (StoreSDNode *Store = N; Store; Store = dyn_cast<StoreSDNode>(Chain)) { + // TODO: Allow unordered atomics when wider type is legal (see D66309) if (Store->getMemoryVT() != MVT::i8 || - Store->isVolatile() || Store->isIndexed()) + !Store->isSimple() || Store->isIndexed()) return SDValue(); Stores.push_back(Store); Chain = Store->getChain(); @@ -6716,7 +6690,8 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { return SDValue(); LoadSDNode *L = P->Load; - assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() && + assert(L->hasNUsesOfValue(1, 0) && L->isSimple() && + !L->isIndexed() && "Must be enforced by calculateByteProvider"); assert(L->getOffset().isUndef() && "Unindexed load must have undef offset"); @@ -6958,25 +6933,25 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() && (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) { - SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); - if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) { + SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1); + if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) { unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND; - LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS - RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS - AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode()); - return DAG.getNode(NewOpcode, DL, VT, LHS, RHS); + N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00 + N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01 + AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode()); + return DAG.getNode(NewOpcode, DL, VT, N00, N01); } } // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants if (isAllOnesConstant(N1) && N0.hasOneUse() && (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) { - SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); - if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) { + SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1); + if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) { unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND; - LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS - RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS - AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode()); - return DAG.getNode(NewOpcode, DL, VT, LHS, RHS); + N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00 + N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01 + AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode()); + return DAG.getNode(NewOpcode, DL, VT, N00, N01); } } @@ -7079,26 +7054,103 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { return SDValue(); } +/// If we have a shift-by-constant of a bitwise logic op that itself has a +/// shift-by-constant operand with identical opcode, we may be able to convert +/// that into 2 independent shifts followed by the logic op. This is a +/// throughput improvement. +static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) { + // Match a one-use bitwise logic op. + SDValue LogicOp = Shift->getOperand(0); + if (!LogicOp.hasOneUse()) + return SDValue(); + + unsigned LogicOpcode = LogicOp.getOpcode(); + if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR && + LogicOpcode != ISD::XOR) + return SDValue(); + + // Find a matching one-use shift by constant. + unsigned ShiftOpcode = Shift->getOpcode(); + SDValue C1 = Shift->getOperand(1); + ConstantSDNode *C1Node = isConstOrConstSplat(C1); + assert(C1Node && "Expected a shift with constant operand"); + const APInt &C1Val = C1Node->getAPIntValue(); + auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp, + const APInt *&ShiftAmtVal) { + if (V.getOpcode() != ShiftOpcode || !V.hasOneUse()) + return false; + + ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1)); + if (!ShiftCNode) + return false; + + // Capture the shifted operand and shift amount value. + ShiftOp = V.getOperand(0); + ShiftAmtVal = &ShiftCNode->getAPIntValue(); + + // Shift amount types do not have to match their operand type, so check that + // the constants are the same width. + if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth()) + return false; + + // The fold is not valid if the sum of the shift values exceeds bitwidth. + if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits())) + return false; + + return true; + }; + + // Logic ops are commutative, so check each operand for a match. + SDValue X, Y; + const APInt *C0Val; + if (matchFirstShift(LogicOp.getOperand(0), X, C0Val)) + Y = LogicOp.getOperand(1); + else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val)) + Y = LogicOp.getOperand(0); + else + return SDValue(); + + // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1) + SDLoc DL(Shift); + EVT VT = Shift->getValueType(0); + EVT ShiftAmtVT = Shift->getOperand(1).getValueType(); + SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT); + SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC); + SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1); + return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2); +} + /// Handle transforms common to the three shifts, when the shift amount is a /// constant. /// We are looking for: (shift being one of shl/sra/srl) /// shift (binop X, C0), C1 /// And want to transform into: /// binop (shift X, C1), (shift C0, C1) -SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { +SDValue DAGCombiner::visitShiftByConstant(SDNode *N) { + assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand"); + // Do not turn a 'not' into a regular xor. if (isBitwiseNot(N->getOperand(0))) return SDValue(); // The inner binop must be one-use, since we want to replace it. - SDNode *LHS = N->getOperand(0).getNode(); - if (!LHS->hasOneUse()) return SDValue(); + SDValue LHS = N->getOperand(0); + if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level)) + return SDValue(); + + // TODO: This is limited to early combining because it may reveal regressions + // otherwise. But since we just checked a target hook to see if this is + // desirable, that should have filtered out cases where this interferes + // with some other pattern matching. + if (!LegalTypes) + if (SDValue R = combineShiftOfShiftedLogic(N, DAG)) + return R; // We want to pull some binops through shifts, so that we have (and (shift)) // instead of (shift (and)), likewise for add, or, xor, etc. This sort of // thing happens with address calculations, so it's important to canonicalize // it. - switch (LHS->getOpcode()) { + switch (LHS.getOpcode()) { default: return SDValue(); case ISD::OR: @@ -7112,14 +7164,14 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { } // We require the RHS of the binop to be a constant and not opaque as well. - ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1)); + ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS.getOperand(1)); if (!BinOpCst) return SDValue(); // FIXME: disable this unless the input to the binop is a shift by a constant // or is copy/select. Enable this in other cases when figure out it's exactly // profitable. - SDValue BinOpLHSVal = LHS->getOperand(0); + SDValue BinOpLHSVal = LHS.getOperand(0); bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL || BinOpLHSVal.getOpcode() == ISD::SRA || BinOpLHSVal.getOpcode() == ISD::SRL) && @@ -7133,24 +7185,16 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { if (IsCopyOrSelect && N->hasOneUse()) return SDValue(); - EVT VT = N->getValueType(0); - - if (!TLI.isDesirableToCommuteWithShift(N, Level)) - return SDValue(); - // Fold the constants, shifting the binop RHS by the shift amount. - SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)), - N->getValueType(0), - LHS->getOperand(1), N->getOperand(1)); + SDLoc DL(N); + EVT VT = N->getValueType(0); + SDValue NewRHS = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(1), + N->getOperand(1)); assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!"); - // Create the new shift. - SDValue NewShift = DAG.getNode(N->getOpcode(), - SDLoc(LHS->getOperand(0)), - VT, LHS->getOperand(0), N->getOperand(1)); - - // Create the new binop. - return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS); + SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0), + N->getOperand(1)); + return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS); } SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) { @@ -7478,7 +7522,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { } if (N1C && !N1C->isOpaque()) - if (SDValue NewSHL = visitShiftByConstant(N, N1C)) + if (SDValue NewSHL = visitShiftByConstant(N)) return NewSHL; return SDValue(); @@ -7597,6 +7641,37 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { } } + // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper. + // sra (add (shl X, N1C), AddC), N1C --> + // sext (add (trunc X to (width - N1C)), AddC') + if (!LegalTypes && N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C && + N0.getOperand(0).getOpcode() == ISD::SHL && + N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) { + if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) { + SDValue Shl = N0.getOperand(0); + // Determine what the truncate's type would be and ask the target if that + // is a free operation. + LLVMContext &Ctx = *DAG.getContext(); + unsigned ShiftAmt = N1C->getZExtValue(); + EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt); + if (VT.isVector()) + TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements()); + + // TODO: The simple type check probably belongs in the default hook + // implementation and/or target-specific overrides (because + // non-simple types likely require masking when legalized), but that + // restriction may conflict with other transforms. + if (TruncVT.isSimple() && TLI.isTruncateFree(VT, TruncVT)) { + SDLoc DL(N); + SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT); + SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt). + trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT); + SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC); + return DAG.getSExtOrTrunc(Add, DL, VT); + } + } + } + // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))). if (N1.getOpcode() == ISD::TRUNCATE && N1.getOperand(0).getOpcode() == ISD::AND) { @@ -7638,7 +7713,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1); if (N1C && !N1C->isOpaque()) - if (SDValue NewSRA = visitShiftByConstant(N, N1C)) + if (SDValue NewSRA = visitShiftByConstant(N)) return NewSRA; return SDValue(); @@ -7819,7 +7894,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { return SDValue(N, 0); if (N1C && !N1C->isOpaque()) - if (SDValue NewSRL = visitShiftByConstant(N, N1C)) + if (SDValue NewSRL = visitShiftByConstant(N)) return NewSRL; // Attempt to convert a srl of a load into a narrower zero-extending load. @@ -8100,6 +8175,43 @@ static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, } } +/// If a (v)select has a condition value that is a sign-bit test, try to smear +/// the condition operand sign-bit across the value width and use it as a mask. +static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) { + SDValue Cond = N->getOperand(0); + SDValue C1 = N->getOperand(1); + SDValue C2 = N->getOperand(2); + assert(isConstantOrConstantVector(C1) && isConstantOrConstantVector(C2) && + "Expected select-of-constants"); + + EVT VT = N->getValueType(0); + if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() || + VT != Cond.getOperand(0).getValueType()) + return SDValue(); + + // The inverted-condition + commuted-select variants of these patterns are + // canonicalized to these forms in IR. + SDValue X = Cond.getOperand(0); + SDValue CondC = Cond.getOperand(1); + ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); + if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) && + isAllOnesOrAllOnesSplat(C2)) { + // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1 + SDLoc DL(N); + SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT); + SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC); + return DAG.getNode(ISD::OR, DL, VT, Sra, C1); + } + if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) { + // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1 + SDLoc DL(N); + SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT); + SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC); + return DAG.getNode(ISD::AND, DL, VT, Sra, C1); + } + return SDValue(); +} + SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { SDValue Cond = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -8148,22 +8260,36 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { return Cond; } - // For any constants that differ by 1, we can transform the select into an - // extend and add. Use a target hook because some targets may prefer to - // transform in the other direction. + // Use a target hook because some targets may prefer to transform in the + // other direction. if (TLI.convertSelectOfConstantsToMath(VT)) { - if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) { + // For any constants that differ by 1, we can transform the select into an + // extend and add. + const APInt &C1Val = C1->getAPIntValue(); + const APInt &C2Val = C2->getAPIntValue(); + if (C1Val - 1 == C2Val) { // select Cond, C1, C1-1 --> add (zext Cond), C1-1 if (VT != MVT::i1) Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); return DAG.getNode(ISD::ADD, DL, VT, Cond, N2); } - if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) { + if (C1Val + 1 == C2Val) { // select Cond, C1, C1+1 --> add (sext Cond), C1+1 if (VT != MVT::i1) Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond); return DAG.getNode(ISD::ADD, DL, VT, Cond, N2); } + + // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2) + if (C1Val.isPowerOf2() && C2Val.isNullValue()) { + if (VT != MVT::i1) + Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); + SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT); + return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC); + } + + if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG)) + return V; } return SDValue(); @@ -8381,23 +8507,6 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { return SDValue(); } -static -std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) { - SDLoc DL(N); - EVT LoVT, HiVT; - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); - - // Split the inputs. - SDValue Lo, Hi, LL, LH, RL, RH; - std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); - std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); - - Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); - Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); - - return std::make_pair(Lo, Hi); -} - // This function assumes all the vselect's arguments are CONCAT_VECTOR // nodes and that the condition is a BV of ConstantSDNodes (or undefs). static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { @@ -8456,7 +8565,6 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { SDValue DAGCombiner::visitMSCATTER(SDNode *N) { MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N); SDValue Mask = MSC->getMask(); - SDValue Data = MSC->getValue(); SDValue Chain = MSC->getChain(); SDLoc DL(N); @@ -8464,123 +8572,19 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) { if (ISD::isBuildVectorAllZeros(Mask.getNode())) return Chain; - if (Level >= AfterLegalizeTypes) - return SDValue(); - - // If the MSCATTER data type requires splitting and the mask is provided by a - // SETCC, then split both nodes and its operands before legalization. This - // prevents the type legalizer from unrolling SETCC into scalar comparisons - // and enables future optimizations (e.g. min/max pattern matching on X86). - if (Mask.getOpcode() != ISD::SETCC) - return SDValue(); - - // Check if any splitting is required. - if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) != - TargetLowering::TypeSplitVector) - return SDValue(); - SDValue MaskLo, MaskHi; - std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); - - EVT LoVT, HiVT; - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0)); - - EVT MemoryVT = MSC->getMemoryVT(); - unsigned Alignment = MSC->getOriginalAlignment(); - - EVT LoMemVT, HiMemVT; - std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); - - SDValue DataLo, DataHi; - std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); - - SDValue Scale = MSC->getScale(); - SDValue BasePtr = MSC->getBasePtr(); - SDValue IndexLo, IndexHi; - std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL); - - MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MSC->getPointerInfo(), - MachineMemOperand::MOStore, LoMemVT.getStoreSize(), - Alignment, MSC->getAAInfo(), MSC->getRanges()); - - SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo, Scale }; - SDValue Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), - DataLo.getValueType(), DL, OpsLo, MMO); - - // The order of the Scatter operation after split is well defined. The "Hi" - // part comes after the "Lo". So these two operations should be chained one - // after another. - SDValue OpsHi[] = { Lo, DataHi, MaskHi, BasePtr, IndexHi, Scale }; - return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(), - DL, OpsHi, MMO); + return SDValue(); } SDValue DAGCombiner::visitMSTORE(SDNode *N) { MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N); SDValue Mask = MST->getMask(); - SDValue Data = MST->getValue(); SDValue Chain = MST->getChain(); - EVT VT = Data.getValueType(); SDLoc DL(N); // Zap masked stores with a zero mask. if (ISD::isBuildVectorAllZeros(Mask.getNode())) return Chain; - if (Level >= AfterLegalizeTypes) - return SDValue(); - - // If the MSTORE data type requires splitting and the mask is provided by a - // SETCC, then split both nodes and its operands before legalization. This - // prevents the type legalizer from unrolling SETCC into scalar comparisons - // and enables future optimizations (e.g. min/max pattern matching on X86). - if (Mask.getOpcode() == ISD::SETCC) { - // Check if any splitting is required. - if (TLI.getTypeAction(*DAG.getContext(), VT) != - TargetLowering::TypeSplitVector) - return SDValue(); - - SDValue MaskLo, MaskHi, Lo, Hi; - std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); - - SDValue Ptr = MST->getBasePtr(); - - EVT MemoryVT = MST->getMemoryVT(); - unsigned Alignment = MST->getOriginalAlignment(); - - EVT LoMemVT, HiMemVT; - std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); - - SDValue DataLo, DataHi; - std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); - - MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MST->getPointerInfo(), - MachineMemOperand::MOStore, LoMemVT.getStoreSize(), - Alignment, MST->getAAInfo(), MST->getRanges()); - - Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO, - MST->isTruncatingStore(), - MST->isCompressingStore()); - - Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, - MST->isCompressingStore()); - unsigned HiOffset = LoMemVT.getStoreSize(); - - MMO = DAG.getMachineFunction().getMachineMemOperand( - MST->getPointerInfo().getWithOffset(HiOffset), - MachineMemOperand::MOStore, HiMemVT.getStoreSize(), Alignment, - MST->getAAInfo(), MST->getRanges()); - - Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO, - MST->isTruncatingStore(), - MST->isCompressingStore()); - - AddToWorklist(Lo.getNode()); - AddToWorklist(Hi.getNode()); - - return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); - } return SDValue(); } @@ -8593,76 +8597,7 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) { if (ISD::isBuildVectorAllZeros(Mask.getNode())) return CombineTo(N, MGT->getPassThru(), MGT->getChain()); - if (Level >= AfterLegalizeTypes) - return SDValue(); - - // If the MGATHER result requires splitting and the mask is provided by a - // SETCC, then split both nodes and its operands before legalization. This - // prevents the type legalizer from unrolling SETCC into scalar comparisons - // and enables future optimizations (e.g. min/max pattern matching on X86). - - if (Mask.getOpcode() != ISD::SETCC) - return SDValue(); - - EVT VT = N->getValueType(0); - - // Check if any splitting is required. - if (TLI.getTypeAction(*DAG.getContext(), VT) != - TargetLowering::TypeSplitVector) - return SDValue(); - - SDValue MaskLo, MaskHi, Lo, Hi; - std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); - - SDValue PassThru = MGT->getPassThru(); - SDValue PassThruLo, PassThruHi; - std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL); - - EVT LoVT, HiVT; - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); - - SDValue Chain = MGT->getChain(); - EVT MemoryVT = MGT->getMemoryVT(); - unsigned Alignment = MGT->getOriginalAlignment(); - - EVT LoMemVT, HiMemVT; - std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); - - SDValue Scale = MGT->getScale(); - SDValue BasePtr = MGT->getBasePtr(); - SDValue Index = MGT->getIndex(); - SDValue IndexLo, IndexHi; - std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL); - - MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MGT->getPointerInfo(), - MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), - Alignment, MGT->getAAInfo(), MGT->getRanges()); - - SDValue OpsLo[] = { Chain, PassThruLo, MaskLo, BasePtr, IndexLo, Scale }; - Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo, - MMO); - - SDValue OpsHi[] = { Chain, PassThruHi, MaskHi, BasePtr, IndexHi, Scale }; - Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi, - MMO); - - AddToWorklist(Lo.getNode()); - AddToWorklist(Hi.getNode()); - - // Build a factor node to remember that this load is independent of the - // other one. - Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1), - Hi.getValue(1)); - - // Legalized the chain result - switch anything that used the old chain to - // use the new one. - DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain); - - SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); - - SDValue RetOps[] = { GatherRes, Chain }; - return DAG.getMergeValues(RetOps, DL); + return SDValue(); } SDValue DAGCombiner::visitMLOAD(SDNode *N) { @@ -8674,76 +8609,6 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { if (ISD::isBuildVectorAllZeros(Mask.getNode())) return CombineTo(N, MLD->getPassThru(), MLD->getChain()); - if (Level >= AfterLegalizeTypes) - return SDValue(); - - // If the MLOAD result requires splitting and the mask is provided by a - // SETCC, then split both nodes and its operands before legalization. This - // prevents the type legalizer from unrolling SETCC into scalar comparisons - // and enables future optimizations (e.g. min/max pattern matching on X86). - if (Mask.getOpcode() == ISD::SETCC) { - EVT VT = N->getValueType(0); - - // Check if any splitting is required. - if (TLI.getTypeAction(*DAG.getContext(), VT) != - TargetLowering::TypeSplitVector) - return SDValue(); - - SDValue MaskLo, MaskHi, Lo, Hi; - std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); - - SDValue PassThru = MLD->getPassThru(); - SDValue PassThruLo, PassThruHi; - std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL); - - EVT LoVT, HiVT; - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0)); - - SDValue Chain = MLD->getChain(); - SDValue Ptr = MLD->getBasePtr(); - EVT MemoryVT = MLD->getMemoryVT(); - unsigned Alignment = MLD->getOriginalAlignment(); - - EVT LoMemVT, HiMemVT; - std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); - - MachineMemOperand *MMO = DAG.getMachineFunction(). - getMachineMemOperand(MLD->getPointerInfo(), - MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), - Alignment, MLD->getAAInfo(), MLD->getRanges()); - - Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, PassThruLo, LoMemVT, - MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad()); - - Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, - MLD->isExpandingLoad()); - unsigned HiOffset = LoMemVT.getStoreSize(); - - MMO = DAG.getMachineFunction().getMachineMemOperand( - MLD->getPointerInfo().getWithOffset(HiOffset), - MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), Alignment, - MLD->getAAInfo(), MLD->getRanges()); - - Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, PassThruHi, HiMemVT, - MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad()); - - AddToWorklist(Lo.getNode()); - AddToWorklist(Hi.getNode()); - - // Build a factor node to remember that this load is independent of the - // other one. - Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1), - Hi.getValue(1)); - - // Legalized the chain result - switch anything that used the old chain to - // use the new one. - DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain); - - SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); - - SDValue RetOps[] = { LoadRes, Chain }; - return DAG.getMergeValues(RetOps, DL); - } return SDValue(); } @@ -8791,6 +8656,18 @@ SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) { return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2); } + // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C) + APInt Pow2C; + if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() && + isNullOrNullSplat(N2)) { + SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT); + SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT); + return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC); + } + + if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG)) + return V; + // The general case for select-of-constants: // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2 // ...but that only makes sense if a vselect is slower than 2 logic ops, so @@ -8832,13 +8709,12 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode()); if (isAbs) { - EVT VT = LHS.getValueType(); if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) return DAG.getNode(ISD::ABS, DL, VT, LHS); - SDValue Shift = DAG.getNode( - ISD::SRA, DL, VT, LHS, - DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT)); + SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS, + DAG.getConstant(VT.getScalarSizeInBits() - 1, + DL, getShiftAmountTy(VT))); SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift); AddToWorklist(Shift.getNode()); AddToWorklist(Add.getNode()); @@ -8851,10 +8727,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { // This is OK if we don't care about what happens if either operand is a // NaN. // - if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N0.getOperand(0), - N0.getOperand(1), TLI)) { - if (SDValue FMinMax = combineMinNumMaxNum( - DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG)) + if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) { + if (SDValue FMinMax = + combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG)) return FMinMax; } @@ -9209,8 +9084,9 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) || - !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() || - !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0))) + !N0.hasOneUse() || !LN0->isSimple() || + !DstVT.isVector() || !DstVT.isPow2VectorType() || + !TLI.isVectorLoadExtDesirable(SDValue(N, 0))) return SDValue(); SmallVector<SDNode *, 4> SetCCs; @@ -9411,7 +9287,8 @@ static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner, LoadSDNode *LN0 = cast<LoadSDNode>(N0); EVT MemVT = LN0->getMemoryVT(); - if ((LegalOperations || LN0->isVolatile() || VT.isVector()) && + if ((LegalOperations || !LN0->isSimple() || + VT.isVector()) && !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT)) return SDValue(); @@ -9436,7 +9313,7 @@ static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, if (!ISD::isNON_EXTLoad(N0.getNode()) || !ISD::isUNINDEXEDLoad(N0.getNode()) || ((LegalOperations || VT.isVector() || - cast<LoadSDNode>(N0)->isVolatile()) && + !cast<LoadSDNode>(N0)->isSimple()) && !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType()))) return {}; @@ -9468,6 +9345,35 @@ static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, return SDValue(N, 0); // Return N so it doesn't get rechecked! } +static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, + const TargetLowering &TLI, EVT VT, + SDNode *N, SDValue N0, + ISD::LoadExtType ExtLoadType, + ISD::NodeType ExtOpc) { + if (!N0.hasOneUse()) + return SDValue(); + + MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0); + if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD) + return SDValue(); + + if (!TLI.isLoadExtLegal(ExtLoadType, VT, Ld->getValueType(0))) + return SDValue(); + + if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0))) + return SDValue(); + + SDLoc dl(Ld); + SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru()); + SDValue NewLoad = DAG.getMaskedLoad(VT, dl, Ld->getChain(), + Ld->getBasePtr(), Ld->getMask(), + PassThru, Ld->getMemoryVT(), + Ld->getMemOperand(), ExtLoadType, + Ld->isExpandingLoad()); + DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1)); + return NewLoad; +} + static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG, bool LegalOperations) { assert((N->getOpcode() == ISD::SIGN_EXTEND || @@ -9568,6 +9474,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { ISD::SEXTLOAD, ISD::SIGN_EXTEND)) return foldedExt; + if (SDValue foldedExt = + tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::SEXTLOAD, + ISD::SIGN_EXTEND)) + return foldedExt; + // fold (sext (load x)) to multiple smaller sextloads. // Only on illegal but splittable vectors. if (SDValue ExtLoad = CombineExtLoad(N)) @@ -9856,6 +9767,11 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { ISD::ZEXTLOAD, ISD::ZERO_EXTEND)) return foldedExt; + if (SDValue foldedExt = + tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::ZEXTLOAD, + ISD::ZERO_EXTEND)) + return foldedExt; + // fold (zext (load x)) to multiple smaller zextloads. // Only on illegal but splittable vectors. if (SDValue ExtLoad = CombineExtLoad(N)) @@ -10340,7 +10256,10 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { return SDValue(); LoadSDNode *LN0 = cast<LoadSDNode>(N0); - if (!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt)) + // Reducing the width of a volatile load is illegal. For atomics, we may be + // able to reduce the width provided we never widen again. (see D66309) + if (!LN0->isSimple() || + !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt)) return SDValue(); auto AdjustBigEndianShift = [&](unsigned ShAmt) { @@ -10369,11 +10288,11 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { SDValue Load; if (ExtType == ISD::NON_EXTLOAD) - Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr, + Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign, LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); else - Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr, + Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT, NewAlign, LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); @@ -10392,7 +10311,6 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { // no larger than the source) then the useful bits of the result are // zero; we can't simply return the shortened shift, because the result // of that operation is undefined. - SDLoc DL(N0); if (ShLeftAmt >= VT.getSizeInBits()) Result = DAG.getConstant(0, DL, VT); else @@ -10513,7 +10431,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && EVT == cast<LoadSDNode>(N0)->getMemoryVT() && - ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile() && + ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() && N0.hasOneUse()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); @@ -10530,7 +10448,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse() && EVT == cast<LoadSDNode>(N0)->getMemoryVT() && - ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || + ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) && TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, @@ -10757,7 +10675,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // after truncation. if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - if (!LN0->isVolatile() && + if (LN0->isSimple() && LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) { SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0), VT, LN0->getChain(), LN0->getBasePtr(), @@ -11051,7 +10969,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // memory accesses. We don't care if the original type was legal or not // as we assume software couldn't rely on the number of accesses of an // illegal type. - ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || + ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) || TLI.isOperationLegal(ISD::LOAD, VT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); @@ -11237,15 +11155,10 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { for (int i = 0; i != MaskScale; ++i) NewMask.push_back(M < 0 ? -1 : M * MaskScale + i); - bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT); - if (!LegalMask) { - std::swap(SV0, SV1); - ShuffleVectorSDNode::commuteMask(NewMask); - LegalMask = TLI.isShuffleMaskLegal(NewMask, VT); - } - - if (LegalMask) - return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask); + SDValue LegalShuffle = + TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG); + if (LegalShuffle) + return LegalShuffle; } return SDValue(); @@ -11998,7 +11911,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math) ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true); if (N1C && N1C->isZero()) - if (N1C->isNegative() || Options.UnsafeFPMath || Flags.hasNoSignedZeros()) + if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) return N0; if (SDValue NewSel = foldBinOpIntoSelect(N)) @@ -12006,17 +11919,17 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // fold (fadd A, (fneg B)) -> (fsub A, B) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && - isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize) == 2) - return DAG.getNode(ISD::FSUB, DL, VT, N0, - GetNegatedExpression(N1, DAG, LegalOperations, - ForCodeSize), Flags); + TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize) == 2) + return DAG.getNode( + ISD::FSUB, DL, VT, N0, + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags); // fold (fadd (fneg A), B) -> (fsub B, A) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && - isNegatibleForFree(N0, LegalOperations, TLI, &Options, ForCodeSize) == 2) - return DAG.getNode(ISD::FSUB, DL, VT, N1, - GetNegatedExpression(N0, DAG, LegalOperations, - ForCodeSize), Flags); + TLI.isNegatibleForFree(N0, DAG, LegalOperations, ForCodeSize) == 2) + return DAG.getNode( + ISD::FSUB, DL, VT, N1, + TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize), Flags); auto isFMulNegTwo = [](SDValue FMul) { if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL) @@ -12056,7 +11969,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // If 'unsafe math' or reassoc and nsz, fold lots of things. // TODO: break out portions of the transformations below for which Unsafe is // considered and which do not require both nsz and reassoc - if ((Options.UnsafeFPMath || + if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) || (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) && AllowNewConst) { // fadd (fadd x, c1), c2 -> fadd x, c1 + c2 @@ -12175,7 +12088,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { // (fsub A, 0) -> A if (N1CFP && N1CFP->isZero()) { - if (!N1CFP->isNegative() || Options.UnsafeFPMath || + if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) { return N0; } @@ -12195,16 +12108,16 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { if (N0CFP && N0CFP->isZero()) { if (N0CFP->isNegative() || (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) { - if (isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize)) - return GetNegatedExpression(N1, DAG, LegalOperations, ForCodeSize); + if (TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize)) + return TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize); if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags); } } - if ((Options.UnsafeFPMath || - (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) - && N1.getOpcode() == ISD::FADD) { + if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) || + (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) && + N1.getOpcode() == ISD::FADD) { // X - (X + Y) -> -Y if (N0 == N1->getOperand(0)) return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1), Flags); @@ -12214,10 +12127,10 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { } // fold (fsub A, (fneg B)) -> (fadd A, B) - if (isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize)) - return DAG.getNode(ISD::FADD, DL, VT, N0, - GetNegatedExpression(N1, DAG, LegalOperations, - ForCodeSize), Flags); + if (TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize)) + return DAG.getNode( + ISD::FADD, DL, VT, N0, + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags); // FSUB -> FMA combines: if (SDValue Fused = visitFSUBForFMACombine(N)) { @@ -12228,6 +12141,21 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { return SDValue(); } +/// Return true if both inputs are at least as cheap in negated form and at +/// least one input is strictly cheaper in negated form. +bool DAGCombiner::isCheaperToUseNegatedFPOps(SDValue X, SDValue Y) { + if (char LHSNeg = + TLI.isNegatibleForFree(X, DAG, LegalOperations, ForCodeSize)) + if (char RHSNeg = + TLI.isNegatibleForFree(Y, DAG, LegalOperations, ForCodeSize)) + // Both negated operands are at least as cheap as their counterparts. + // Check to see if at least one is cheaper negated. + if (LHSNeg == 2 || RHSNeg == 2) + return true; + + return false; +} + SDValue DAGCombiner::visitFMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -12254,10 +12182,6 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { !isConstantFPBuildVectorOrConstantFP(N1)) return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags); - // fold (fmul A, 1.0) -> A - if (N1CFP && N1CFP->isExactlyValue(1.0)) - return N0; - if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; @@ -12302,21 +12226,13 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) return DAG.getNode(ISD::FNEG, DL, VT, N0); - // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) - if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options, - ForCodeSize)) { - if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options, - ForCodeSize)) { - // Both can be negated for free, check to see if at least one is cheaper - // negated. - if (LHSNeg == 2 || RHSNeg == 2) - return DAG.getNode(ISD::FMUL, DL, VT, - GetNegatedExpression(N0, DAG, LegalOperations, - ForCodeSize), - GetNegatedExpression(N1, DAG, LegalOperations, - ForCodeSize), - Flags); - } + // -N0 * -N1 --> N0 * N1 + if (isCheaperToUseNegatedFPOps(N0, N1)) { + SDValue NegN0 = + TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize); + SDValue NegN1 = + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize); + return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1, Flags); } // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X)) @@ -12395,6 +12311,15 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2); } + // (-N0 * -N1) + N2 --> (N0 * N1) + N2 + if (isCheaperToUseNegatedFPOps(N0, N1)) { + SDValue NegN0 = + TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize); + SDValue NegN1 = + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize); + return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2, Flags); + } + if (UnsafeFPMath) { if (N0CFP && N0CFP->isZero()) return N2; @@ -12602,9 +12527,8 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // If this FDIV is part of a reciprocal square root, it may be folded // into a target-specific square root estimate instruction. if (N1.getOpcode() == ISD::FSQRT) { - if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) { + if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); - } } else if (N1.getOpcode() == ISD::FP_EXTEND && N1.getOperand(0).getOpcode() == ISD::FSQRT) { if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0), @@ -12645,28 +12569,16 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { } // Fold into a reciprocal estimate and multiply instead of a real divide. - if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) { - AddToWorklist(RV.getNode()); - return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); - } + if (SDValue RV = BuildDivEstimate(N0, N1, Flags)) + return RV; } // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) - if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options, - ForCodeSize)) { - if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options, - ForCodeSize)) { - // Both can be negated for free, check to see if at least one is cheaper - // negated. - if (LHSNeg == 2 || RHSNeg == 2) - return DAG.getNode(ISD::FDIV, SDLoc(N), VT, - GetNegatedExpression(N0, DAG, LegalOperations, - ForCodeSize), - GetNegatedExpression(N1, DAG, LegalOperations, - ForCodeSize), - Flags); - } - } + if (isCheaperToUseNegatedFPOps(N0, N1)) + return DAG.getNode( + ISD::FDIV, SDLoc(N), VT, + TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize), + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags); return SDValue(); } @@ -13112,22 +13024,6 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { return SDValue(); } -SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { - SDValue N0 = N->getOperand(0); - EVT VT = N->getValueType(0); - EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); - ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); - - // fold (fp_round_inreg c1fp) -> c1fp - if (N0CFP && isTypeLegal(EVT)) { - SDLoc DL(N); - SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT); - return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round); - } - - return SDValue(); -} - SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -13236,9 +13132,8 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { if (isConstantFPBuildVectorOrConstantFP(N0)) return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0); - if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(), - &DAG.getTarget().Options, ForCodeSize)) - return GetNegatedExpression(N0, DAG, LegalOperations, ForCodeSize); + if (TLI.isNegatibleForFree(N0, DAG, LegalOperations, ForCodeSize)) + return TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize); // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading // constant pool values. @@ -14004,11 +13899,12 @@ bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) { } SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) { - if (OptLevel == CodeGenOpt::None || LD->isVolatile()) + if (OptLevel == CodeGenOpt::None || !LD->isSimple()) return SDValue(); SDValue Chain = LD->getOperand(0); StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode()); - if (!ST || ST->isVolatile()) + // TODO: Relax this restriction for unordered atomics (see D66309) + if (!ST || !ST->isSimple()) return SDValue(); EVT LDType = LD->getValueType(0); @@ -14107,7 +14003,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // If load is not volatile and there are no uses of the loaded value (and // the updated indexed value in case of indexed loads), change uses of the // chain value into uses of the chain input (i.e. delete the dead load). - if (!LD->isVolatile()) { + // TODO: Allow this for unordered atomics (see D66309) + if (LD->isSimple()) { if (N->getValueType(1) == MVT::Other) { // Unindexed loads. if (!N->hasAnyUseOfValue(0)) { @@ -14241,7 +14138,7 @@ struct LoadedSlice { /// Helper structure used to compute the cost of a slice. struct Cost { /// Are we optimizing for code size. - bool ForCodeSize; + bool ForCodeSize = false; /// Various cost. unsigned Loads = 0; @@ -14250,10 +14147,10 @@ struct LoadedSlice { unsigned ZExts = 0; unsigned Shift = 0; - Cost(bool ForCodeSize = false) : ForCodeSize(ForCodeSize) {} + explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {} /// Get the cost of one isolated slice. - Cost(const LoadedSlice &LS, bool ForCodeSize = false) + Cost(const LoadedSlice &LS, bool ForCodeSize) : ForCodeSize(ForCodeSize), Loads(1) { EVT TruncType = LS.Inst->getValueType(0); EVT LoadedType = LS.getLoadedType(); @@ -14678,7 +14575,7 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) { return false; LoadSDNode *LD = cast<LoadSDNode>(N); - if (LD->isVolatile() || !ISD::isNormalLoad(LD) || + if (!LD->isSimple() || !ISD::isNormalLoad(LD) || !LD->getValueType(0).isInteger()) return false; @@ -14829,13 +14726,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { else if (Chain->getOpcode() == ISD::TokenFactor && SDValue(LD, 1).hasOneUse()) { // LD has only 1 chain use so they are no indirect dependencies. - bool isOk = false; - for (const SDValue &ChainOp : Chain->op_values()) - if (ChainOp.getNode() == LD) { - isOk = true; - break; - } - if (!isOk) + if (!LD->isOperandOf(Chain.getNode())) return Result; } else return Result; // Fail. @@ -14848,7 +14739,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { /// Check to see if IVal is something that provides a value as specified by /// MaskInfo. If so, replace the specified store with a narrower store of /// truncated IVal. -static SDNode * +static SDValue ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, SDValue IVal, StoreSDNode *St, DAGCombiner *DC) { @@ -14860,14 +14751,19 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, // that uses this. If not, this is not a replacement. APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(), ByteShift*8, (ByteShift+NumBytes)*8); - if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr; + if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue(); // Check that it is legal on the target to do this. It is legal if the new // VT we're shrinking to (i8/i16/i32) is legal or we're still before type - // legalization. - MVT VT = MVT::getIntegerVT(NumBytes*8); + // legalization (and the target doesn't explicitly think this is a bad idea). + MVT VT = MVT::getIntegerVT(NumBytes * 8); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!DC->isTypeLegal(VT)) - return nullptr; + return SDValue(); + if (St->getMemOperand() && + !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, + *St->getMemOperand())) + return SDValue(); // Okay, we can do this! Replace the 'St' store with a store of IVal that is // shifted by ByteShift and truncated down to NumBytes. @@ -14901,8 +14797,7 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, ++OpsNarrowed; return DAG .getStore(St->getChain(), SDLoc(St), IVal, Ptr, - St->getPointerInfo().getWithOffset(StOffset), NewAlign) - .getNode(); + St->getPointerInfo().getWithOffset(StOffset), NewAlign); } /// Look for sequence of load / op / store where op is one of 'or', 'xor', and @@ -14911,7 +14806,7 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, /// or code size. SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { StoreSDNode *ST = cast<StoreSDNode>(N); - if (ST->isVolatile()) + if (!ST->isSimple()) return SDValue(); SDValue Chain = ST->getChain(); @@ -14933,16 +14828,16 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { std::pair<unsigned, unsigned> MaskedLoad; MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain); if (MaskedLoad.first) - if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, + if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, Value.getOperand(1), ST,this)) - return SDValue(NewST, 0); + return NewST; // Or is commutative, so try swapping X and Y. MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain); if (MaskedLoad.first) - if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, + if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, Value.getOperand(0), ST,this)) - return SDValue(NewST, 0); + return NewST; } if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) || @@ -15367,14 +15262,16 @@ void DAGCombiner::getStoreMergeCandidates( // Loads must only have one use. if (!Ld->hasNUsesOfValue(1, 0)) return; - // The memory operands must not be volatile/indexed. - if (Ld->isVolatile() || Ld->isIndexed()) + // The memory operands must not be volatile/indexed/atomic. + // TODO: May be able to relax for unordered atomics (see D66309) + if (!Ld->isSimple() || Ld->isIndexed()) return; } auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr, int64_t &Offset) -> bool { - // The memory operands must not be volatile/indexed. - if (Other->isVolatile() || Other->isIndexed()) + // The memory operands must not be volatile/indexed/atomic. + // TODO: May be able to relax for unordered atomics (see D66309) + if (!Other->isSimple() || Other->isIndexed()) return false; // Don't mix temporal stores with non-temporal stores. if (St->isNonTemporal() != Other->isNonTemporal()) @@ -15394,8 +15291,10 @@ void DAGCombiner::getStoreMergeCandidates( // Loads must only have one use. if (!OtherLd->hasNUsesOfValue(1, 0)) return false; - // The memory operands must not be volatile/indexed. - if (OtherLd->isVolatile() || OtherLd->isIndexed()) + // The memory operands must not be volatile/indexed/atomic. + // TODO: May be able to relax for unordered atomics (see D66309) + if (!OtherLd->isSimple() || + OtherLd->isIndexed()) return false; // Don't mix temporal loads with non-temporal loads. if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal()) @@ -15425,6 +15324,18 @@ void DAGCombiner::getStoreMergeCandidates( return (BasePtr.equalBaseIndex(Ptr, DAG, Offset)); }; + // Check if the pair of StoreNode and the RootNode already bail out many + // times which is over the limit in dependence check. + auto OverLimitInDependenceCheck = [&](SDNode *StoreNode, + SDNode *RootNode) -> bool { + auto RootCount = StoreRootCountMap.find(StoreNode); + if (RootCount != StoreRootCountMap.end() && + RootCount->second.first == RootNode && + RootCount->second.second > StoreMergeDependenceLimit) + return true; + return false; + }; + // We looking for a root node which is an ancestor to all mergable // stores. We search up through a load, to our root and then down // through all children. For instance we will find Store{1,2,3} if @@ -15454,7 +15365,8 @@ void DAGCombiner::getStoreMergeCandidates( if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) { BaseIndexOffset Ptr; int64_t PtrDiff; - if (CandidateMatch(OtherST, Ptr, PtrDiff)) + if (CandidateMatch(OtherST, Ptr, PtrDiff) && + !OverLimitInDependenceCheck(OtherST, RootNode)) StoreNodes.push_back(MemOpLink(OtherST, PtrDiff)); } } else @@ -15464,7 +15376,8 @@ void DAGCombiner::getStoreMergeCandidates( if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) { BaseIndexOffset Ptr; int64_t PtrDiff; - if (CandidateMatch(OtherST, Ptr, PtrDiff)) + if (CandidateMatch(OtherST, Ptr, PtrDiff) && + !OverLimitInDependenceCheck(OtherST, RootNode)) StoreNodes.push_back(MemOpLink(OtherST, PtrDiff)); } } @@ -15522,13 +15435,24 @@ bool DAGCombiner::checkMergeStoreCandidatesForDependencies( // Search through DAG. We can stop early if we find a store node. for (unsigned i = 0; i < NumStores; ++i) if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist, - Max)) + Max)) { + // If the searching bail out, record the StoreNode and RootNode in the + // StoreRootCountMap. If we have seen the pair many times over a limit, + // we won't add the StoreNode into StoreNodes set again. + if (Visited.size() >= Max) { + auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode]; + if (RootCount.first == RootNode) + RootCount.second++; + else + RootCount = {RootNode, 1}; + } return false; + } return true; } bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { - if (OptLevel == CodeGenOpt::None) + if (OptLevel == CodeGenOpt::None || !EnableStoreMerging) return false; EVT MemVT = St->getMemoryVT(); @@ -15588,7 +15512,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) { bool RV = false; while (StoreNodes.size() > 1) { - unsigned StartIdx = 0; + size_t StartIdx = 0; while ((StartIdx + 1 < StoreNodes.size()) && StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes != StoreNodes[StartIdx + 1].OffsetFromBase) @@ -16113,7 +16037,7 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) { case MVT::ppcf128: return SDValue(); case MVT::f32: - if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) || + if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) || TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { ; Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). @@ -16125,7 +16049,7 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) { return SDValue(); case MVT::f64: if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations && - !ST->isVolatile()) || + ST->isSimple()) || TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { ; Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). @@ -16134,7 +16058,7 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) { Ptr, ST->getMemOperand()); } - if (!ST->isVolatile() && + if (ST->isSimple() && TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { // Many FP stores are not made apparent until after legalize, e.g. for // argument passing. Since this is so common, custom legalize the @@ -16181,7 +16105,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // memory accesses. We don't care if the original type was legal or not // as we assume software couldn't rely on the number of accesses of an // illegal type. - if (((!LegalOperations && !ST->isVolatile()) || + // TODO: May be able to relax for unordered atomics (see D66309) + if (((!LegalOperations && ST->isSimple()) || TLI.isOperationLegal(ISD::STORE, SVT)) && TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT, DAG, *ST->getMemOperand())) { @@ -16242,9 +16167,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // See if we can simplify the input to this truncstore with knowledge that // only the low bits are being used. For example: // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8" - SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits); AddToWorklist(Value.getNode()); - if (Shorter) + if (SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits)) return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(), ST->getMemOperand()); @@ -16263,9 +16187,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // If this is a load followed by a store to the same location, then the store // is dead/noop. + // TODO: Can relax for unordered atomics (see D66309) if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) { if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() && - ST->isUnindexed() && !ST->isVolatile() && + ST->isUnindexed() && ST->isSimple() && // There can't be any side effects between the load and store, such as // a call or store. Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) { @@ -16274,9 +16199,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { } } + // TODO: Can relax for unordered atomics (see D66309) if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) { - if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() && - !ST1->isVolatile()) { + if (ST->isUnindexed() && ST->isSimple() && + ST1->isUnindexed() && ST1->isSimple()) { if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT()) { // If this is a store followed by a store with the same value to the @@ -16405,7 +16331,8 @@ SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) { break; case ISD::STORE: { StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain); - if (ST->isVolatile() || ST->isIndexed()) + // TODO: Can relax for unordered atomics (see D66309) + if (!ST->isSimple() || ST->isIndexed()) continue; const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG); // If we store purely within object bounds just before its lifetime ends, @@ -16456,6 +16383,11 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) { if (OptLevel == CodeGenOpt::None) return SDValue(); + // Can't change the number of memory accesses for a volatile store or break + // atomicity for an atomic one. + if (!ST->isSimple()) + return SDValue(); + SDValue Val = ST->getValue(); SDLoc DL(ST); @@ -16531,12 +16463,52 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) { } /// Convert a disguised subvector insertion into a shuffle: -/// insert_vector_elt V, (bitcast X from vector type), IdxC --> -/// bitcast(shuffle (bitcast V), (extended X), Mask) -/// Note: We do not use an insert_subvector node because that requires a legal -/// subvector type. SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) { SDValue InsertVal = N->getOperand(1); + SDValue Vec = N->getOperand(0); + + // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N), InsIndex) + // --> (vector_shuffle X, Y) + if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() && + InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + isa<ConstantSDNode>(InsertVal.getOperand(1))) { + ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Vec.getNode()); + ArrayRef<int> Mask = SVN->getMask(); + + SDValue X = Vec.getOperand(0); + SDValue Y = Vec.getOperand(1); + + // Vec's operand 0 is using indices from 0 to N-1 and + // operand 1 from N to 2N - 1, where N is the number of + // elements in the vectors. + int XOffset = -1; + if (InsertVal.getOperand(0) == X) { + XOffset = 0; + } else if (InsertVal.getOperand(0) == Y) { + XOffset = X.getValueType().getVectorNumElements(); + } + + if (XOffset != -1) { + SmallVector<int, 16> NewMask(Mask.begin(), Mask.end()); + + auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1)); + NewMask[InsIndex] = XOffset + ExtrIndex->getZExtValue(); + assert(NewMask[InsIndex] < + (int)(2 * Vec.getValueType().getVectorNumElements()) && + NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound"); + + SDValue LegalShuffle = + TLI.buildLegalVectorShuffle(Vec.getValueType(), SDLoc(N), X, + Y, NewMask, DAG); + if (LegalShuffle) + return LegalShuffle; + } + } + + // insert_vector_elt V, (bitcast X from vector type), IdxC --> + // bitcast(shuffle (bitcast V), (extended X), Mask) + // Note: We do not use an insert_subvector node because that requires a + // legal subvector type. if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() || !InsertVal.getOperand(0).getValueType().isVector()) return SDValue(); @@ -16674,7 +16646,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) { - assert(!OriginalLoad->isVolatile()); + assert(OriginalLoad->isSimple()); EVT ResultVT = EVE->getValueType(0); EVT VecEltVT = InVecVT.getVectorElementType(); @@ -16747,12 +16719,12 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) }; SDValue To[] = { Load, Chain }; DAG.ReplaceAllUsesOfValuesWith(From, To, 2); + // Make sure to revisit this node to clean it up; it will usually be dead. + AddToWorklist(EVE); // Since we're explicitly calling ReplaceAllUses, add the new node to the // worklist explicitly as well. - AddToWorklist(Load.getNode()); AddUsersToWorklist(Load.getNode()); // Add users too - // Make sure to revisit this node to clean it up; it will usually be dead. - AddToWorklist(EVE); + AddToWorklist(Load.getNode()); ++OpsNarrowed; return SDValue(EVE, 0); } @@ -16982,7 +16954,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { ISD::isNormalLoad(VecOp.getNode()) && !Index->hasPredecessor(VecOp.getNode())) { auto *VecLoad = dyn_cast<LoadSDNode>(VecOp); - if (VecLoad && !VecLoad->isVolatile()) + if (VecLoad && VecLoad->isSimple()) return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad); } @@ -17041,7 +17013,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // Make sure we found a non-volatile load and the extractelement is // the only use. - if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile()) + if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple()) return SDValue(); // If Idx was -1 above, Elt is going to be -1, so just return undef. @@ -17344,17 +17316,16 @@ static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) { // the shuffle mask with -1. } - // Turn this into a shuffle with zero if that's legal. - EVT VecVT = Extract.getOperand(0).getValueType(); - if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(ShufMask, VecVT)) - return SDValue(); - // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... --> // bitcast (shuffle V, ZeroVec, VectorMask) SDLoc DL(BV); + EVT VecVT = Extract.getOperand(0).getValueType(); SDValue ZeroVec = DAG.getConstant(0, DL, VecVT); - SDValue Shuf = DAG.getVectorShuffle(VecVT, DL, Extract.getOperand(0), ZeroVec, - ShufMask); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0), + ZeroVec, ShufMask, DAG); + if (!Shuf) + return SDValue(); return DAG.getBitcast(VT, Shuf); } @@ -17656,6 +17627,13 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { } } + // A splat of a single element is a SPLAT_VECTOR if supported on the target. + if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand) + if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) { + assert(!V.isUndef() && "Splat of undef should have been handled earlier"); + return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V); + } + // Check if we can express BUILD VECTOR via subvector extract. if (!LegalTypes && (N->getNumOperands() > 1)) { SDValue Op0 = N->getOperand(0); @@ -17829,11 +17807,9 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) { } } - if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT)) - return SDValue(); - - return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0), - DAG.getBitcast(VT, SV1), Mask); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0), + DAG.getBitcast(VT, SV1), Mask, DAG); } SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { @@ -17853,6 +17829,15 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { SDValue In = N->getOperand(0); assert(In.getValueType().isVector() && "Must concat vectors"); + // If the input is a concat_vectors, just make a larger concat by padding + // with smaller undefs. + if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) { + unsigned NumOps = N->getNumOperands() * In.getNumOperands(); + SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end()); + Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType())); + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops); + } + SDValue Scalar = peekThroughOneUseBitcasts(In); // concat_vectors(scalar_to_vector(scalar), undef) -> @@ -18002,6 +17987,23 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { return SDValue(); } +// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find +// if the subvector can be sourced for free. +static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) { + if (V.getOpcode() == ISD::INSERT_SUBVECTOR && + V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) { + return V.getOperand(1); + } + auto *IndexC = dyn_cast<ConstantSDNode>(Index); + if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS && + V.getOperand(0).getValueType() == SubVT && + (IndexC->getZExtValue() % SubVT.getVectorNumElements()) == 0) { + uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorNumElements(); + return V.getOperand(SubIdx); + } + return SDValue(); +} + static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract, SelectionDAG &DAG) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -18010,39 +18012,29 @@ static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract, if (!TLI.isBinOp(BinOpcode) || BinOp.getNode()->getNumValues() != 1) return SDValue(); + EVT VecVT = BinOp.getValueType(); SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1); - SDValue Index = Extract->getOperand(1); - EVT VT = Extract->getValueType(0); + if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType()) + return SDValue(); - // Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find - // if the source subvector is the same type as the one being extracted. - auto GetSubVector = [VT, Index](SDValue V) -> SDValue { - if (V.getOpcode() == ISD::INSERT_SUBVECTOR && - V.getOperand(1).getValueType() == VT && V.getOperand(2) == Index) { - return V.getOperand(1); - } - auto *IndexC = dyn_cast<ConstantSDNode>(Index); - if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS && - V.getOperand(0).getValueType() == VT && - (IndexC->getZExtValue() % VT.getVectorNumElements()) == 0) { - uint64_t SubIdx = IndexC->getZExtValue() / VT.getVectorNumElements(); - return V.getOperand(SubIdx); - } + SDValue Index = Extract->getOperand(1); + EVT SubVT = Extract->getValueType(0); + if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT)) return SDValue(); - }; - SDValue Sub0 = GetSubVector(Bop0); - SDValue Sub1 = GetSubVector(Bop1); + + SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT); + SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT); // TODO: We could handle the case where only 1 operand is being inserted by // creating an extract of the other operand, but that requires checking // number of uses and/or costs. - if (!Sub0 || !Sub1 || !TLI.isOperationLegalOrCustom(BinOpcode, VT)) + if (!Sub0 || !Sub1) return SDValue(); // We are inserting both operands of the wide binop only to extract back // to the narrow vector size. Eliminate all of the insert/extract: // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y - return DAG.getNode(BinOpcode, SDLoc(Extract), VT, Sub0, Sub1, + return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1, BinOp->getFlags()); } @@ -18174,7 +18166,8 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0)); auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1)); - if (!Ld || Ld->getExtensionType() || Ld->isVolatile() || !ExtIdx) + if (!Ld || Ld->getExtensionType() || !Ld->isSimple() || + !ExtIdx) return SDValue(); // Allow targets to opt-out. @@ -18878,7 +18871,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { // build_vector. if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) { int SplatIndex = SVN->getSplatIndex(); - if (TLI.isExtractVecEltCheap(VT, SplatIndex) && + if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) && TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) { // splat (vector_bo L, R), Index --> // splat (scalar_bo (extelt L, Index), (extelt R, Index)) @@ -19153,22 +19146,13 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { SV1 = DAG.getUNDEF(VT); // Avoid introducing shuffles with illegal mask. - if (!TLI.isShuffleMaskLegal(Mask, VT)) { - ShuffleVectorSDNode::commuteMask(Mask); - - if (!TLI.isShuffleMaskLegal(Mask, VT)) - return SDValue(); - - // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2) - // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2) - // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2) - std::swap(SV0, SV1); - } - // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2) // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) - return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask); + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2) + return TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask, DAG); } if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG)) @@ -19191,35 +19175,35 @@ SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) { SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1); int Elt = C0->getZExtValue(); NewMask[0] = Elt; - SDValue Val; // If we have an implict truncate do truncate here as long as it's legal. // if it's not legal, this should if (VT.getScalarType() != InVal.getValueType() && InVal.getValueType().isScalarInteger() && isTypeLegal(VT.getScalarType())) { - Val = + SDValue Val = DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal); return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val); } if (VT.getScalarType() == InVecT.getScalarType() && - VT.getVectorNumElements() <= InVecT.getVectorNumElements() && - TLI.isShuffleMaskLegal(NewMask, VT)) { - Val = DAG.getVectorShuffle(InVecT, SDLoc(N), InVec, - DAG.getUNDEF(InVecT), NewMask); - // If the initial vector is the correct size this shuffle is a - // valid result. - if (VT == InVecT) - return Val; - // If not we must truncate the vector. - if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) { - MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); - SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy); - EVT SubVT = - EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(), - VT.getVectorNumElements()); - Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, Val, - ZeroIdx); - return Val; + VT.getVectorNumElements() <= InVecT.getVectorNumElements()) { + SDValue LegalShuffle = + TLI.buildLegalVectorShuffle(InVecT, SDLoc(N), InVec, + DAG.getUNDEF(InVecT), NewMask, DAG); + if (LegalShuffle) { + // If the initial vector is the correct size this shuffle is a + // valid result. + if (VT == InVecT) + return LegalShuffle; + // If not we must truncate the vector. + if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) { + MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); + SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy); + EVT SubVT = + EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(), + VT.getVectorNumElements()); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, + LegalShuffle, ZeroIdx); + } } } } @@ -19627,6 +19611,39 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { } } + // Make sure all but the first op are undef or constant. + auto ConcatWithConstantOrUndef = [](SDValue Concat) { + return Concat.getOpcode() == ISD::CONCAT_VECTORS && + std::all_of(std::next(Concat->op_begin()), Concat->op_end(), + [](const SDValue &Op) { + return Op.isUndef() || + ISD::isBuildVectorOfConstantSDNodes(Op.getNode()); + }); + }; + + // The following pattern is likely to emerge with vector reduction ops. Moving + // the binary operation ahead of the concat may allow using a narrower vector + // instruction that has better performance than the wide version of the op: + // VBinOp (concat X, undef/constant), (concat Y, undef/constant) --> + // concat (VBinOp X, Y), VecC + if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) && + (LHS.hasOneUse() || RHS.hasOneUse())) { + EVT NarrowVT = LHS.getOperand(0).getValueType(); + if (NarrowVT == RHS.getOperand(0).getValueType() && + TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) { + SDLoc DL(N); + unsigned NumOperands = LHS.getNumOperands(); + SmallVector<SDValue, 4> ConcatOps; + for (unsigned i = 0; i != NumOperands; ++i) { + // This constant fold for operands 1 and up. + ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i), + RHS.getOperand(i))); + } + + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps); + } + } + if (SDValue V = scalarizeBinOpOfSplats(N, DAG)) return V; @@ -19723,7 +19740,9 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, // Token chains must be identical. if (LHS.getOperand(0) != RHS.getOperand(0) || // Do not let this transformation reduce the number of volatile loads. - LLD->isVolatile() || RLD->isVolatile() || + // Be conservative for atomics for the moment + // TODO: This does appear to be legal for unordered atomics (see D66309) + !LLD->isSimple() || !RLD->isSimple() || // FIXME: If either is a pre/post inc/dec load, // we'd need to split out the address adjustment. LLD->isIndexed() || RLD->isIndexed() || @@ -19928,7 +19947,7 @@ SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset( const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC) { - if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType().isFloatingPoint())) + if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType())) return SDValue(); // If we are before legalize types, we want the other legalization to happen @@ -20016,8 +20035,13 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, // when the condition can be materialized as an all-ones register. Any // single bit-test can be materialized as an all-ones register with // shift-left and shift-right-arith. + // TODO: The operation legality checks could be loosened to include "custom", + // but that may cause regressions for targets that do not have shift + // instructions. if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND && - N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) { + N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2) && + TLI.isOperationLegal(ISD::SHL, VT) && + TLI.isOperationLegal(ISD::SRA, VT)) { SDValue AndLHS = N0->getOperand(0); auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1)); if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) { @@ -20209,7 +20233,10 @@ SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) { /// => /// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form /// does not require additional intermediate precision] -SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) { +/// For the last iteration, put numerator N into it to gain more precision: +/// Result = N X_i + X_i (N - N A X_i) +SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op, + SDNodeFlags Flags) { if (Level >= AfterLegalizeDAG) return SDValue(); @@ -20230,25 +20257,39 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) { if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) { AddToWorklist(Est.getNode()); + SDLoc DL(Op); if (Iterations) { - SDLoc DL(Op); SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); - // Newton iterations: Est = Est + Est (1 - Arg * Est) + // Newton iterations: Est = Est + Est (N - Arg * Est) + // If this is the last iteration, also multiply by the numerator. for (int i = 0; i < Iterations; ++i) { - SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags); + SDValue MulEst = Est; + + if (i == Iterations - 1) { + MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags); + AddToWorklist(MulEst.getNode()); + } + + SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags); AddToWorklist(NewEst.getNode()); - NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags); + NewEst = DAG.getNode(ISD::FSUB, DL, VT, + (i == Iterations - 1 ? N : FPOne), NewEst, Flags); AddToWorklist(NewEst.getNode()); NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); AddToWorklist(NewEst.getNode()); - Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags); + Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags); AddToWorklist(Est.getNode()); } + } else { + // If no iterations are available, multiply with N. + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags); + AddToWorklist(Est.getNode()); } + return Est; } @@ -20271,31 +20312,19 @@ SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est, // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that // this entire sequence requires only one FP constant. SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags); - AddToWorklist(HalfArg.getNode()); - HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags); - AddToWorklist(HalfArg.getNode()); // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est) for (unsigned i = 0; i < Iterations; ++i) { SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags); - AddToWorklist(NewEst.getNode()); - NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags); - AddToWorklist(NewEst.getNode()); - NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags); - AddToWorklist(NewEst.getNode()); - Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); - AddToWorklist(Est.getNode()); } // If non-reciprocal square root is requested, multiply the result by Arg. - if (!Reciprocal) { + if (!Reciprocal) Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags); - AddToWorklist(Est.getNode()); - } return Est; } @@ -20321,13 +20350,8 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, // E = (E * -0.5) * ((A * E) * E + -3.0) for (unsigned i = 0; i < Iterations; ++i) { SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags); - AddToWorklist(AE.getNode()); - SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags); - AddToWorklist(AEE.getNode()); - SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags); - AddToWorklist(RHS.getNode()); // When calculating a square root at the last iteration build: // S = ((A * E) * -0.5) * ((A * E) * E + -3.0) @@ -20340,10 +20364,8 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est, // SQRT: LHS = (A * E) * -0.5 LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags); } - AddToWorklist(LHS.getNode()); Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags); - AddToWorklist(Est.getNode()); } return Est; @@ -20400,16 +20422,11 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op); SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT); Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est); - AddToWorklist(Fabs.getNode()); - AddToWorklist(IsDenorm.getNode()); - AddToWorklist(Est.getNode()); } else { // X == 0.0 ? 0.0 : Est SDValue FPZero = DAG.getConstantFP(0.0, DL, VT); SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ); Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est); - AddToWorklist(IsZero.getNode()); - AddToWorklist(Est.getNode()); } } } @@ -20432,6 +20449,7 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const { struct MemUseCharacteristics { bool IsVolatile; + bool IsAtomic; SDValue BasePtr; int64_t Offset; Optional<int64_t> NumBytes; @@ -20447,18 +20465,20 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const { : (LSN->getAddressingMode() == ISD::PRE_DEC) ? -1 * C->getSExtValue() : 0; - return {LSN->isVolatile(), LSN->getBasePtr(), Offset /*base offset*/, + return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(), + Offset /*base offset*/, Optional<int64_t>(LSN->getMemoryVT().getStoreSize()), LSN->getMemOperand()}; } if (const auto *LN = cast<LifetimeSDNode>(N)) - return {false /*isVolatile*/, LN->getOperand(1), + return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1), (LN->hasOffset()) ? LN->getOffset() : 0, (LN->hasOffset()) ? Optional<int64_t>(LN->getSize()) : Optional<int64_t>(), (MachineMemOperand *)nullptr}; // Default. - return {false /*isvolatile*/, SDValue(), (int64_t)0 /*offset*/, + return {false /*isvolatile*/, /*isAtomic*/ false, SDValue(), + (int64_t)0 /*offset*/, Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr}; }; @@ -20474,6 +20494,11 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const { if (MUC0.IsVolatile && MUC1.IsVolatile) return true; + // Be conservative about atomics for the moment + // TODO: This is way overconservative for unordered atomics (see D66309) + if (MUC0.IsAtomic && MUC1.IsAtomic) + return true; + if (MUC0.MMO && MUC1.MMO) { if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) || (MUC1.MMO->isInvariant() && MUC0.MMO->isStore())) @@ -20555,7 +20580,8 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, SmallPtrSet<SDNode *, 16> Visited; // Visited node set. // Get alias information for node. - const bool IsLoad = isa<LoadSDNode>(N) && !cast<LoadSDNode>(N)->isVolatile(); + // TODO: relax aliasing for unordered atomics (see D66309) + const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple(); // Starting off. Chains.push_back(OriginalChain); @@ -20571,8 +20597,9 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, case ISD::LOAD: case ISD::STORE: { // Get alias information for C. + // TODO: Relax aliasing for unordered atomics (see D66309) bool IsOpLoad = isa<LoadSDNode>(C.getNode()) && - !cast<LSBaseSDNode>(C.getNode())->isVolatile(); + cast<LSBaseSDNode>(C.getNode())->isSimple(); if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) { // Look further up the chain. C = C.getOperand(0); @@ -20727,7 +20754,8 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) { // If the chain has more than one use, then we can't reorder the mem ops. if (!SDValue(Chain, 0)->hasOneUse()) break; - if (Chain->isVolatile() || Chain->isIndexed()) + // TODO: Relax for unordered atomics (see D66309) + if (!Chain->isSimple() || Chain->isIndexed()) break; // Find the base pointer and offset for this memory node. @@ -20795,11 +20823,11 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) { SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps); CombineTo(St, TF); - AddToWorklist(STChain); - // Add TF operands worklist in reverse order. - for (auto I = TF->getNumOperands(); I;) - AddToWorklist(TF->getOperand(--I).getNode()); + // Add TF and its operands to the worklist. AddToWorklist(TF.getNode()); + for (const SDValue &Op : TF->ops()) + AddToWorklist(Op.getNode()); + AddToWorklist(STChain); return true; } diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 22c23ba877e8..6d7260d7aee5 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -174,7 +174,7 @@ static unsigned findSinkableLocalRegDef(MachineInstr &MI) { if (RegDef) return 0; RegDef = MO.getReg(); - } else if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + } else if (Register::isVirtualRegister(MO.getReg())) { // This is another use of a vreg. Don't try to sink it. return 0; } @@ -1213,14 +1213,13 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) { if (!FrameAlign) FrameAlign = TLI.getByValTypeAlignment(ElementTy, DL); Flags.setByValSize(FrameSize); - Flags.setByValAlign(FrameAlign); + Flags.setByValAlign(Align(FrameAlign)); } if (Arg.IsNest) Flags.setNest(); if (NeedsRegBlock) Flags.setInConsecutiveRegs(); - unsigned OriginalAlignment = DL.getABITypeAlignment(Arg.Ty); - Flags.setOrigAlign(OriginalAlignment); + Flags.setOrigAlign(Align(DL.getABITypeAlignment(Arg.Ty))); CLI.OutVals.push_back(Arg.Val); CLI.OutFlags.push_back(Flags); @@ -1237,8 +1236,8 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) { updateValueMap(CLI.CS->getInstruction(), CLI.ResultReg, CLI.NumResultRegs); // Set labels for heapallocsite call. - if (CLI.CS && CLI.CS->getInstruction()->getMetadata("heapallocsite")) { - MDNode *MD = CLI.CS->getInstruction()->getMetadata("heapallocsite"); + if (CLI.CS && CLI.CS->getInstruction()->hasMetadata("heapallocsite")) { + const MDNode *MD = CLI.CS->getInstruction()->getMetadata("heapallocsite"); MF->addCodeViewHeapAllocSite(CLI.Call, MD); } @@ -1303,6 +1302,7 @@ bool FastISel::selectCall(const User *I) { ExtraInfo |= InlineAsm::Extra_HasSideEffects; if (IA->isAlignStack()) ExtraInfo |= InlineAsm::Extra_IsAlignStack; + ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::INLINEASM)) @@ -1388,9 +1388,11 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { "Expected inlined-at fields to agree"); // A dbg.declare describes the address of a source variable, so lower it // into an indirect DBG_VALUE. + auto *Expr = DI->getExpression(); + Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref}); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true, - *Op, DI->getVariable(), DI->getExpression()); + TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ false, + *Op, DI->getVariable(), Expr); } else { // We can't yet handle anything else here because it would require // generating code, thus altering codegen because of debug info. @@ -1414,19 +1416,19 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { if (CI->getBitWidth() > 64) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addCImm(CI) - .addImm(0U) + .addReg(0U) .addMetadata(DI->getVariable()) .addMetadata(DI->getExpression()); else BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addImm(CI->getZExtValue()) - .addImm(0U) + .addReg(0U) .addMetadata(DI->getVariable()) .addMetadata(DI->getExpression()); } else if (const auto *CF = dyn_cast<ConstantFP>(V)) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addFPImm(CF) - .addImm(0U) + .addReg(0U) .addMetadata(DI->getVariable()) .addMetadata(DI->getExpression()); } else if (unsigned Reg = lookUpRegForValue(V)) { @@ -1453,24 +1455,12 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { TII.get(TargetOpcode::DBG_LABEL)).addMetadata(DI->getLabel()); return true; } - case Intrinsic::objectsize: { - ConstantInt *CI = cast<ConstantInt>(II->getArgOperand(1)); - unsigned long long Res = CI->isZero() ? -1ULL : 0; - Constant *ResCI = ConstantInt::get(II->getType(), Res); - unsigned ResultReg = getRegForValue(ResCI); - if (!ResultReg) - return false; - updateValueMap(II, ResultReg); - return true; - } - case Intrinsic::is_constant: { - Constant *ResCI = ConstantInt::get(II->getType(), 0); - unsigned ResultReg = getRegForValue(ResCI); - if (!ResultReg) - return false; - updateValueMap(II, ResultReg); - return true; - } + case Intrinsic::objectsize: + llvm_unreachable("llvm.objectsize.* should have been lowered already"); + + case Intrinsic::is_constant: + llvm_unreachable("llvm.is.constant.* should have been lowered already"); + case Intrinsic::launder_invariant_group: case Intrinsic::strip_invariant_group: case Intrinsic::expect: { @@ -1677,11 +1667,11 @@ bool FastISel::selectInstruction(const Instruction *I) { /// (fall-through) successor, and update the CFG. void FastISel::fastEmitBranch(MachineBasicBlock *MSucc, const DebugLoc &DbgLoc) { - if (FuncInfo.MBB->getBasicBlock()->size() > 1 && + if (FuncInfo.MBB->getBasicBlock()->sizeWithoutDebug() > 1 && FuncInfo.MBB->isLayoutSuccessor(MSucc)) { - // For more accurate line information if this is the only instruction - // in the block then emit it, otherwise we have the unconditional - // fall-through case, which needs no instructions. + // For more accurate line information if this is the only non-debug + // instruction in the block then emit it, otherwise we have the + // unconditional fall-through case, which needs no instructions. } else { // The unconditional branch case. TII.insertBranch(*FuncInfo.MBB, MSucc, nullptr, @@ -2028,7 +2018,7 @@ unsigned FastISel::createResultReg(const TargetRegisterClass *RC) { unsigned FastISel::constrainOperandRegClass(const MCInstrDesc &II, unsigned Op, unsigned OpNum) { - if (TargetRegisterInfo::isVirtualRegister(Op)) { + if (Register::isVirtualRegister(Op)) { const TargetRegisterClass *RegClass = TII.getRegClass(II, OpNum, &TRI, *FuncInfo.MF); if (!MRI.constrainRegClass(Op, RegClass)) { @@ -2236,7 +2226,7 @@ unsigned FastISel::fastEmitInst_i(unsigned MachineInstOpcode, unsigned FastISel::fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, bool Op0IsKill, uint32_t Idx) { unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); - assert(TargetRegisterInfo::isVirtualRegister(Op0) && + assert(Register::isVirtualRegister(Op0) && "Cannot yet extract from physregs"); const TargetRegisterClass *RC = MRI.getRegClass(Op0); MRI.constrainRegClass(Op0, TRI.getSubClassWithSubReg(RC, Idx)); @@ -2417,10 +2407,9 @@ FastISel::createMachineMemOperandFor(const Instruction *I) const { } else return nullptr; - bool IsNonTemporal = I->getMetadata(LLVMContext::MD_nontemporal) != nullptr; - bool IsInvariant = I->getMetadata(LLVMContext::MD_invariant_load) != nullptr; - bool IsDereferenceable = - I->getMetadata(LLVMContext::MD_dereferenceable) != nullptr; + bool IsNonTemporal = I->hasMetadata(LLVMContext::MD_nontemporal); + bool IsInvariant = I->hasMetadata(LLVMContext::MD_invariant_load); + bool IsDereferenceable = I->hasMetadata(LLVMContext::MD_dereferenceable); const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range); AAMDNodes AAInfo; diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 8b1759246b76..cf6711adad48 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -424,7 +425,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { unsigned BitWidth = IntVT.getSizeInBits(); unsigned DestReg = ValueMap[PN]; - if (!TargetRegisterInfo::isVirtualRegister(DestReg)) + if (!Register::isVirtualRegister(DestReg)) return; LiveOutRegInfo.grow(DestReg); LiveOutInfo &DestLOI = LiveOutRegInfo[DestReg]; @@ -445,7 +446,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { assert(ValueMap.count(V) && "V should have been placed in ValueMap when its" "CopyToReg node was created."); unsigned SrcReg = ValueMap[V]; - if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) { + if (!Register::isVirtualRegister(SrcReg)) { DestLOI.IsValid = false; return; } @@ -480,7 +481,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { assert(ValueMap.count(V) && "V should have been placed in ValueMap when " "its CopyToReg node was created."); unsigned SrcReg = ValueMap[V]; - if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) { + if (!Register::isVirtualRegister(SrcReg)) { DestLOI.IsValid = false; return; } diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 9bc07d35dfc5..c5095995ec2e 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -71,7 +71,7 @@ static unsigned countOperands(SDNode *Node, unsigned NumExpUses, if (isa<RegisterMaskSDNode>(Node->getOperand(I - 1))) continue; if (RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Node->getOperand(I - 1))) - if (TargetRegisterInfo::isPhysicalRegister(RN->getReg())) + if (Register::isPhysicalRegister(RN->getReg())) continue; NumImpUses = N - I; break; @@ -86,7 +86,7 @@ void InstrEmitter:: EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, unsigned SrcReg, DenseMap<SDValue, unsigned> &VRBaseMap) { unsigned VRBase = 0; - if (TargetRegisterInfo::isVirtualRegister(SrcReg)) { + if (Register::isVirtualRegister(SrcReg)) { // Just use the input register directly! SDValue Op(Node, ResNo); if (IsClone) @@ -114,7 +114,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, User->getOperand(2).getNode() == Node && User->getOperand(2).getResNo() == ResNo) { unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); - if (TargetRegisterInfo::isVirtualRegister(DestReg)) { + if (Register::isVirtualRegister(DestReg)) { VRBase = DestReg; Match = false; } else if (DestReg != SrcReg) @@ -139,7 +139,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, UseRC = RC; else if (RC) { const TargetRegisterClass *ComRC = - TRI->getCommonSubClass(UseRC, RC, VT.SimpleTy); + TRI->getCommonSubClass(UseRC, RC); // If multiple uses expect disjoint register classes, we emit // copies in AddRegisterOperand. if (ComRC) @@ -219,7 +219,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, if (II.OpInfo[i].isOptionalDef()) { // Optional def must be a physical register. VRBase = cast<RegisterSDNode>(Node->getOperand(i-NumResults))->getReg(); - assert(TargetRegisterInfo::isPhysicalRegister(VRBase)); + assert(Register::isPhysicalRegister(VRBase)); MIB.addReg(VRBase, RegState::Define); } @@ -229,7 +229,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, User->getOperand(2).getNode() == Node && User->getOperand(2).getResNo() == i) { unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { const TargetRegisterClass *RegRC = MRI->getRegClass(Reg); if (RegRC == RC) { VRBase = Reg; @@ -272,7 +272,7 @@ unsigned InstrEmitter::getVR(SDValue Op, // does not include operand register class info. const TargetRegisterClass *RC = TLI->getRegClassFor( Op.getSimpleValueType(), Op.getNode()->isDivergent()); - unsigned VReg = MRI->createVirtualRegister(RC); + Register VReg = MRI->createVirtualRegister(RC); BuildMI(*MBB, InsertPos, Op.getDebugLoc(), TII->get(TargetOpcode::IMPLICIT_DEF), VReg); return VReg; @@ -319,7 +319,7 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB, if (!ConstrainedRC) { OpRC = TRI->getAllocatableClass(OpRC); assert(OpRC && "Constraints cannot be fulfilled for allocation"); - unsigned NewVReg = MRI->createVirtualRegister(OpRC); + Register NewVReg = MRI->createVirtualRegister(OpRC); BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(), TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg); VReg = NewVReg; @@ -385,9 +385,8 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, (IIRC && TRI->isDivergentRegClass(IIRC))) : nullptr; - if (OpRC && IIRC && OpRC != IIRC && - TargetRegisterInfo::isVirtualRegister(VReg)) { - unsigned NewVReg = MRI->createVirtualRegister(IIRC); + if (OpRC && IIRC && OpRC != IIRC && Register::isVirtualRegister(VReg)) { + Register NewVReg = MRI->createVirtualRegister(IIRC); BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(), TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg); VReg = NewVReg; @@ -465,7 +464,7 @@ unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx, // register instead. RC = TRI->getSubClassWithSubReg(TLI->getRegClassFor(VT, isDivergent), SubIdx); assert(RC && "No legal register class for VT supports that SubIdx"); - unsigned NewReg = MRI->createVirtualRegister(RC); + Register NewReg = MRI->createVirtualRegister(RC); BuildMI(*MBB, InsertPos, DL, TII->get(TargetOpcode::COPY), NewReg) .addReg(VReg); return NewReg; @@ -485,7 +484,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, if (User->getOpcode() == ISD::CopyToReg && User->getOperand(2).getNode() == Node) { unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); - if (TargetRegisterInfo::isVirtualRegister(DestReg)) { + if (Register::isVirtualRegister(DestReg)) { VRBase = DestReg; break; } @@ -503,7 +502,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, unsigned Reg; MachineInstr *DefMI; RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(0)); - if (R && TargetRegisterInfo::isPhysicalRegister(R->getReg())) { + if (R && Register::isPhysicalRegister(R->getReg())) { Reg = R->getReg(); DefMI = nullptr; } else { @@ -529,7 +528,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, // Reg may not support a SubIdx sub-register, and we may need to // constrain its register class or issue a COPY to a compatible register // class. - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) Reg = ConstrainForSubReg(Reg, SubIdx, Node->getOperand(0).getSimpleValueType(), Node->isDivergent(), Node->getDebugLoc()); @@ -541,7 +540,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, MachineInstrBuilder CopyMI = BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), VRBase); - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) CopyMI.addReg(Reg, 0, SubIdx); else CopyMI.addReg(TRI->getSubReg(Reg, SubIdx)); @@ -614,7 +613,7 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); const TargetRegisterClass *DstRC = TRI->getAllocatableClass(TRI->getRegClass(DstRCIdx)); - unsigned NewVReg = MRI->createVirtualRegister(DstRC); + Register NewVReg = MRI->createVirtualRegister(DstRC); BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg); @@ -631,7 +630,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node, bool IsClone, bool IsCloned) { unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); - unsigned NewVReg = MRI->createVirtualRegister(TRI->getAllocatableClass(RC)); + Register NewVReg = MRI->createVirtualRegister(TRI->getAllocatableClass(RC)); const MCInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE); MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), II, NewVReg); unsigned NumOps = Node->getNumOperands(); @@ -649,7 +648,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node, RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(i-1)); // Skip physical registers as they don't have a vreg to get and we'll // insert copies for them in TwoAddressInstructionPass anyway. - if (!R || !TargetRegisterInfo::isPhysicalRegister(R->getReg())) { + if (!R || !Register::isPhysicalRegister(R->getReg())) { unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue(); unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap); const TargetRegisterClass *TRC = MRI->getRegClass(SubReg); @@ -678,7 +677,7 @@ MachineInstr * InstrEmitter::EmitDbgValue(SDDbgValue *SD, DenseMap<SDValue, unsigned> &VRBaseMap) { MDNode *Var = SD->getVariable(); - MDNode *Expr = SD->getExpression(); + const DIExpression *Expr = SD->getExpression(); DebugLoc DL = SD->getDebugLoc(); assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); @@ -702,12 +701,11 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, // EmitTargetCodeForFrameDebugValue is responsible for allocation. auto FrameMI = BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE)) .addFrameIndex(SD->getFrameIx()); + if (SD->isIndirect()) - // Push [fi + 0] onto the DIExpression stack. - FrameMI.addImm(0); - else - // Push fi onto the DIExpression stack. - FrameMI.addReg(0); + Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref}); + + FrameMI.addReg(0); return FrameMI.addMetadata(Var).addMetadata(Expr); } // Otherwise, we're going to create an instruction here. @@ -753,9 +751,9 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, // Indirect addressing is indicated by an Imm as the second parameter. if (SD->isIndirect()) - MIB.addImm(0U); - else - MIB.addReg(0U, RegState::Debug); + Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref}); + + MIB.addReg(0U, RegState::Debug); MIB.addMetadata(Var); MIB.addMetadata(Expr); @@ -928,12 +926,12 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // // Collect all the used physreg defs, and make sure that any unused physreg // defs are marked as dead. - SmallVector<unsigned, 8> UsedRegs; + SmallVector<Register, 8> UsedRegs; // Additional results must be physical register defs. if (HasPhysRegOuts) { for (unsigned i = NumDefs; i < NumResults; ++i) { - unsigned Reg = II.getImplicitDefs()[i - NumDefs]; + Register Reg = II.getImplicitDefs()[i - NumDefs]; if (!Node->hasAnyUseOfValue(i)) continue; // This implicitly defined physreg has a use. @@ -960,8 +958,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // direct RegisterSDNode operands. for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i) if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) { - unsigned Reg = R->getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + Register Reg = R->getReg(); + if (Reg.isPhysical()) UsedRegs.push_back(Reg); } } @@ -995,8 +993,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, case ISD::CopyToReg: { unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); SDValue SrcVal = Node->getOperand(2); - if (TargetRegisterInfo::isVirtualRegister(DestReg) && - SrcVal.isMachineOpcode() && + if (Register::isVirtualRegister(DestReg) && SrcVal.isMachineOpcode() && SrcVal.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) { // Instead building a COPY to that vreg destination, build an // IMPLICIT_DEF instruction instead. @@ -1093,16 +1090,18 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, // FIXME: Add dead flags for physical and virtual registers defined. // For now, mark physical register defs as implicit to help fast // regalloc. This makes inline asm look a lot like calls. - MIB.addReg(Reg, RegState::Define | - getImplRegState(TargetRegisterInfo::isPhysicalRegister(Reg))); + MIB.addReg(Reg, + RegState::Define | + getImplRegState(Register::isPhysicalRegister(Reg))); } break; case InlineAsm::Kind_RegDefEarlyClobber: case InlineAsm::Kind_Clobber: for (unsigned j = 0; j != NumVals; ++j, ++i) { unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); - MIB.addReg(Reg, RegState::Define | RegState::EarlyClobber | - getImplRegState(TargetRegisterInfo::isPhysicalRegister(Reg))); + MIB.addReg(Reg, + RegState::Define | RegState::EarlyClobber | + getImplRegState(Register::isPhysicalRegister(Reg))); ECRegs.push_back(Reg); } break; @@ -1136,7 +1135,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, // then remove the early-clobber flag. for (unsigned Reg : ECRegs) { if (MIB->readsRegister(Reg, TRI)) { - MachineOperand *MO = + MachineOperand *MO = MIB->findRegisterDefOperand(Reg, false, false, TRI); assert(MO && "No def operand for clobbered register?"); MO->setIsEarlyClobber(false); diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index bf817f00f83d..f9fdf525240f 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" @@ -161,6 +162,7 @@ private: SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, const SDLoc &dl, SDValue ChainIn); SDValue ExpandBUILD_VECTOR(SDNode *Node); + SDValue ExpandSPLAT_VECTOR(SDNode *Node); SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node); void ExpandDYNAMIC_STACKALLOC(SDNode *Node, SmallVectorImpl<SDValue> &Results); @@ -236,6 +238,16 @@ public: } ReplacedNode(Old); } + + void ReplaceNodeWithValue(SDValue Old, SDValue New) { + LLVM_DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG); + dbgs() << " with: "; New->dump(&DAG)); + + DAG.ReplaceAllUsesOfValueWith(Old, New); + if (UpdatedNodes) + UpdatedNodes->insert(New.getNode()); + ReplacedNode(Old.getNode()); + } }; } // end anonymous namespace @@ -493,8 +505,8 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // expand it. EVT MemVT = ST->getMemoryVT(); const DataLayout &DL = DAG.getDataLayout(); - if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, - *ST->getMemOperand())) { + if (!TLI.allowsMemoryAccessForAlignment(*DAG.getContext(), DL, MemVT, + *ST->getMemOperand())) { LLVM_DEBUG(dbgs() << "Expanding unsupported unaligned store\n"); SDValue Result = TLI.expandUnalignedStore(ST, DAG); ReplaceNode(SDValue(ST, 0), Result); @@ -608,8 +620,8 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { EVT MemVT = ST->getMemoryVT(); // If this is an unaligned store and the target doesn't support it, // expand it. - if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, - *ST->getMemOperand())) { + if (!TLI.allowsMemoryAccessForAlignment(*DAG.getContext(), DL, MemVT, + *ST->getMemOperand())) { SDValue Result = TLI.expandUnalignedStore(ST, DAG); ReplaceNode(SDValue(ST, 0), Result); } @@ -669,8 +681,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { const DataLayout &DL = DAG.getDataLayout(); // If this is an unaligned load and the target doesn't support it, // expand it. - if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, - *LD->getMemOperand())) { + if (!TLI.allowsMemoryAccessForAlignment(*DAG.getContext(), DL, MemVT, + *LD->getMemOperand())) { std::tie(RVal, RChain) = TLI.expandUnalignedLoad(LD, DAG); } break; @@ -894,11 +906,10 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { if (SrcVT.getScalarType() == MVT::f16) { EVT ISrcVT = SrcVT.changeTypeToInteger(); EVT IDestVT = DestVT.changeTypeToInteger(); - EVT LoadVT = TLI.getRegisterType(IDestVT.getSimpleVT()); + EVT ILoadVT = TLI.getRegisterType(IDestVT.getSimpleVT()); - SDValue Result = DAG.getExtLoad(ISD::ZEXTLOAD, dl, LoadVT, - Chain, Ptr, ISrcVT, - LD->getMemOperand()); + SDValue Result = DAG.getExtLoad(ISD::ZEXTLOAD, dl, ILoadVT, Chain, + Ptr, ISrcVT, LD->getMemOperand()); Value = DAG.getNode(ISD::FP16_TO_FP, dl, DestVT, Result); Chain = Result.getValue(1); break; @@ -959,15 +970,13 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { #ifndef NDEBUG for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) - assert((TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) == - TargetLowering::TypeLegal || - TLI.isTypeLegal(Node->getValueType(i))) && + assert(TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) == + TargetLowering::TypeLegal && "Unexpected illegal type!"); for (const SDValue &Op : Node->op_values()) assert((TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) == TargetLowering::TypeLegal || - TLI.isTypeLegal(Op.getValueType()) || Op.getOpcode() == ISD::TargetConstant || Op.getOpcode() == ISD::Register) && "Unexpected illegal type!"); @@ -1004,7 +1013,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Action = TLI.getOperationAction(Node->getOpcode(), Node->getOperand(0).getValueType()); break; - case ISD::FP_ROUND_INREG: case ISD::SIGN_EXTEND_INREG: { EVT InnerType = cast<VTSDNode>(Node->getOperand(1))->getVT(); Action = TLI.getOperationAction(Node->getOpcode(), InnerType); @@ -1097,38 +1105,15 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { return; } break; - case ISD::STRICT_FADD: - case ISD::STRICT_FSUB: - case ISD::STRICT_FMUL: - case ISD::STRICT_FDIV: - case ISD::STRICT_FREM: - case ISD::STRICT_FSQRT: - case ISD::STRICT_FMA: - case ISD::STRICT_FPOW: - case ISD::STRICT_FPOWI: - case ISD::STRICT_FSIN: - case ISD::STRICT_FCOS: - case ISD::STRICT_FEXP: - case ISD::STRICT_FEXP2: - case ISD::STRICT_FLOG: - case ISD::STRICT_FLOG10: - case ISD::STRICT_FLOG2: - case ISD::STRICT_FRINT: - case ISD::STRICT_FNEARBYINT: - case ISD::STRICT_FMAXNUM: - case ISD::STRICT_FMINNUM: - case ISD::STRICT_FCEIL: - case ISD::STRICT_FFLOOR: - case ISD::STRICT_FROUND: - case ISD::STRICT_FTRUNC: - case ISD::STRICT_FP_ROUND: - case ISD::STRICT_FP_EXTEND: - // These pseudo-ops get legalized as if they were their non-strict - // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT - // is also legal, but if ISD::FSQRT requires expansion then so does - // ISD::STRICT_FSQRT. + case ISD::STRICT_LRINT: + case ISD::STRICT_LLRINT: + case ISD::STRICT_LROUND: + case ISD::STRICT_LLROUND: + // These pseudo-ops are the same as the other STRICT_ ops except + // they are registered with setOperationAction() using the input type + // instead of the output type. Action = TLI.getStrictFPOperationAction(Node->getOpcode(), - Node->getValueType(0)); + Node->getOperand(1).getValueType()); break; case ISD::SADDSAT: case ISD::UADDSAT: @@ -1139,7 +1124,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { } case ISD::SMULFIX: case ISD::SMULFIXSAT: - case ISD::UMULFIX: { + case ISD::UMULFIX: + case ISD::UMULFIXSAT: { unsigned Scale = Node->getConstantOperandVal(2); Action = TLI.getFixedPointOperationAction(Node->getOpcode(), Node->getValueType(0), Scale); @@ -1650,7 +1636,6 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS, MVT OpVT = LHS.getSimpleValueType(); ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get(); NeedInvert = false; - bool NeedSwap = false; switch (TLI.getCondCodeAction(CCCode, OpVT)) { default: llvm_unreachable("Unknown condition code action!"); case TargetLowering::Legal: @@ -1664,6 +1649,7 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS, return true; } // Swapping operands didn't work. Try inverting the condition. + bool NeedSwap = false; InvCC = getSetCCInverse(CCCode, OpVT.isInteger()); if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) { // If inverting the condition is not enough, try swapping operands @@ -2021,6 +2007,14 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { return ExpandVectorBuildThroughStack(Node); } +SDValue SelectionDAGLegalize::ExpandSPLAT_VECTOR(SDNode *Node) { + SDLoc DL(Node); + EVT VT = Node->getValueType(0); + SDValue SplatVal = Node->getOperand(0); + + return DAG.getSplatBuildVector(VT, DL, SplatVal); +} + // Expand a node into a call to a libcall. If the result value // does not fit into a register, return the lo part and set the hi part to the // by-reg argument. If it does fit into a single register, return the result @@ -2074,12 +2068,12 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); if (!CallInfo.second.getNode()) { - LLVM_DEBUG(dbgs() << "Created tailcall: "; DAG.getRoot().dump()); + LLVM_DEBUG(dbgs() << "Created tailcall: "; DAG.getRoot().dump(&DAG)); // It's a tailcall, return the chain (which is the DAG root). return DAG.getRoot(); } - LLVM_DEBUG(dbgs() << "Created libcall: "; CallInfo.first.dump()); + LLVM_DEBUG(dbgs() << "Created libcall: "; CallInfo.first.dump(&DAG)); return CallInfo.first; } @@ -2167,6 +2161,9 @@ SDValue SelectionDAGLegalize::ExpandArgFPLibCall(SDNode* Node, RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128, RTLIB::Libcall Call_PPCF128) { + if (Node->isStrictFPOpcode()) + Node = DAG.mutateStrictFPToFP(Node); + RTLIB::Libcall LC; switch (Node->getOperand(0).getValueType().getSimpleVT().SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); @@ -2815,6 +2812,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { break; } case ISD::STRICT_FP_ROUND: + // This expansion does not honor the "strict" properties anyway, + // so prefer falling back to the non-strict operation if legal. + if (TLI.getStrictFPOperationAction(Node->getOpcode(), + Node->getValueType(0)) + == TargetLowering::Legal) + break; Tmp1 = EmitStackConvert(Node->getOperand(1), Node->getValueType(0), Node->getValueType(0), dl, Node->getOperand(0)); @@ -2829,6 +2832,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Tmp1); break; case ISD::STRICT_FP_EXTEND: + // This expansion does not honor the "strict" properties anyway, + // so prefer falling back to the non-strict operation if legal. + if (TLI.getStrictFPOperationAction(Node->getOpcode(), + Node->getValueType(0)) + == TargetLowering::Legal) + break; Tmp1 = EmitStackConvert(Node->getOperand(1), Node->getOperand(1).getValueType(), Node->getValueType(0), dl, Node->getOperand(0)); @@ -2873,19 +2882,6 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Tmp1); break; } - case ISD::FP_ROUND_INREG: { - // The only way we can lower this is to turn it into a TRUNCSTORE, - // EXTLOAD pair, targeting a temporary location (a stack slot). - - // NOTE: there is a choice here between constantly creating new stack - // slots and always reusing the same one. We currently always create - // new ones, as reuse may inhibit scheduling. - EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT(); - Tmp1 = EmitStackConvert(Node->getOperand(0), ExtraVT, - Node->getValueType(0), dl); - Results.push_back(Tmp1); - break; - } case ISD::UINT_TO_FP: if (TLI.expandUINT_TO_FP(Node, Tmp1, DAG)) { Results.push_back(Tmp1); @@ -2901,33 +2897,26 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG)) Results.push_back(Tmp1); break; + case ISD::STRICT_FP_TO_SINT: + if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG)) { + ReplaceNode(Node, Tmp1.getNode()); + LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_TO_SINT node\n"); + return true; + } + break; case ISD::FP_TO_UINT: - if (TLI.expandFP_TO_UINT(Node, Tmp1, DAG)) + if (TLI.expandFP_TO_UINT(Node, Tmp1, Tmp2, DAG)) Results.push_back(Tmp1); break; - case ISD::LROUND: - Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LROUND_F32, - RTLIB::LROUND_F64, RTLIB::LROUND_F80, - RTLIB::LROUND_F128, - RTLIB::LROUND_PPCF128)); - break; - case ISD::LLROUND: - Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLROUND_F32, - RTLIB::LLROUND_F64, RTLIB::LLROUND_F80, - RTLIB::LLROUND_F128, - RTLIB::LLROUND_PPCF128)); - break; - case ISD::LRINT: - Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LRINT_F32, - RTLIB::LRINT_F64, RTLIB::LRINT_F80, - RTLIB::LRINT_F128, - RTLIB::LRINT_PPCF128)); - break; - case ISD::LLRINT: - Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLRINT_F32, - RTLIB::LLRINT_F64, RTLIB::LLRINT_F80, - RTLIB::LLRINT_F128, - RTLIB::LLRINT_PPCF128)); + case ISD::STRICT_FP_TO_UINT: + if (TLI.expandFP_TO_UINT(Node, Tmp1, Tmp2, DAG)) { + // Relink the chain. + DAG.ReplaceAllUsesOfValueWith(SDValue(Node,1), Tmp2); + // Replace the new UINT result. + ReplaceNodeWithValue(SDValue(Node, 0), Tmp1); + LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_TO_UINT node\n"); + return true; + } break; case ISD::VAARG: Results.push_back(DAG.expandVAArg(Node)); @@ -3348,6 +3337,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::SMULFIX: case ISD::SMULFIXSAT: case ISD::UMULFIX: + case ISD::UMULFIXSAT: Results.push_back(TLI.expandFixedPointMul(Node, DAG)); break; case ISD::ADDCARRY: @@ -3662,6 +3652,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::BUILD_VECTOR: Results.push_back(ExpandBUILD_VECTOR(Node)); break; + case ISD::SPLAT_VECTOR: + Results.push_back(ExpandSPLAT_VECTOR(Node)); + break; case ISD::SRA: case ISD::SRL: case ISD::SHL: { @@ -3715,6 +3708,33 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { break; } + if (Results.empty() && Node->isStrictFPOpcode()) { + // FIXME: We were asked to expand a strict floating-point operation, + // but there is currently no expansion implemented that would preserve + // the "strict" properties. For now, we just fall back to the non-strict + // version if that is legal on the target. The actual mutation of the + // operation will happen in SelectionDAGISel::DoInstructionSelection. + switch (Node->getOpcode()) { + default: + if (TLI.getStrictFPOperationAction(Node->getOpcode(), + Node->getValueType(0)) + == TargetLowering::Legal) + return true; + break; + case ISD::STRICT_LRINT: + case ISD::STRICT_LLRINT: + case ISD::STRICT_LROUND: + case ISD::STRICT_LLROUND: + // These are registered by the operand type instead of the value + // type. Reflect that here. + if (TLI.getStrictFPOperationAction(Node->getOpcode(), + Node->getOperand(1).getValueType()) + == TargetLowering::Legal) + return true; + break; + } + } + // Replace the original node with the legalized result. if (Results.empty()) { LLVM_DEBUG(dbgs() << "Cannot expand node\n"); @@ -3956,6 +3976,34 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::POW_F80, RTLIB::POW_F128, RTLIB::POW_PPCF128)); break; + case ISD::LROUND: + case ISD::STRICT_LROUND: + Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LROUND_F32, + RTLIB::LROUND_F64, RTLIB::LROUND_F80, + RTLIB::LROUND_F128, + RTLIB::LROUND_PPCF128)); + break; + case ISD::LLROUND: + case ISD::STRICT_LLROUND: + Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLROUND_F32, + RTLIB::LLROUND_F64, RTLIB::LLROUND_F80, + RTLIB::LLROUND_F128, + RTLIB::LLROUND_PPCF128)); + break; + case ISD::LRINT: + case ISD::STRICT_LRINT: + Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LRINT_F32, + RTLIB::LRINT_F64, RTLIB::LRINT_F80, + RTLIB::LRINT_F128, + RTLIB::LRINT_PPCF128)); + break; + case ISD::LLRINT: + case ISD::STRICT_LLRINT: + Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLRINT_F32, + RTLIB::LLRINT_F64, RTLIB::LLRINT_F80, + RTLIB::LLRINT_F128, + RTLIB::LLRINT_PPCF128)); + break; case ISD::FDIV: Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64, RTLIB::DIV_F80, RTLIB::DIV_F128, diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index b4849b2881e6..72d052473f11 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -42,10 +42,10 @@ static RTLIB::Libcall GetFPLibCall(EVT VT, } //===----------------------------------------------------------------------===// -// Convert Float Results to Integer for Non-HW-supported Operations. +// Convert Float Results to Integer //===----------------------------------------------------------------------===// -bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { +void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { LLVM_DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG); dbgs() << "\n"); SDValue R = SDValue(); @@ -58,26 +58,18 @@ bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { #endif llvm_unreachable("Do not know how to soften the result of this operator!"); - case ISD::Register: - case ISD::CopyFromReg: - case ISD::CopyToReg: - assert(isLegalInHWReg(N->getValueType(ResNo)) && - "Unsupported SoftenFloatRes opcode!"); - // Only when isLegalInHWReg, we can skip check of the operands. - R = SDValue(N, ResNo); - break; case ISD::MERGE_VALUES:R = SoftenFloatRes_MERGE_VALUES(N, ResNo); break; - case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N, ResNo); break; + case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N); break; case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break; - case ISD::ConstantFP: R = SoftenFloatRes_ConstantFP(N, ResNo); break; + case ISD::ConstantFP: R = SoftenFloatRes_ConstantFP(N); break; case ISD::EXTRACT_VECTOR_ELT: R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N, ResNo); break; - case ISD::FABS: R = SoftenFloatRes_FABS(N, ResNo); break; + case ISD::FABS: R = SoftenFloatRes_FABS(N); break; case ISD::FMINNUM: R = SoftenFloatRes_FMINNUM(N); break; case ISD::FMAXNUM: R = SoftenFloatRes_FMAXNUM(N); break; case ISD::FADD: R = SoftenFloatRes_FADD(N); break; case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break; - case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N, ResNo); break; + case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N); break; case ISD::FCOS: R = SoftenFloatRes_FCOS(N); break; case ISD::FDIV: R = SoftenFloatRes_FDIV(N); break; case ISD::FEXP: R = SoftenFloatRes_FEXP(N); break; @@ -89,7 +81,7 @@ bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FMA: R = SoftenFloatRes_FMA(N); break; case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break; case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break; - case ISD::FNEG: R = SoftenFloatRes_FNEG(N, ResNo); break; + case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break; case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break; case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break; case ISD::FP16_TO_FP: R = SoftenFloatRes_FP16_TO_FP(N); break; @@ -102,30 +94,24 @@ bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break; case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break; case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break; - case ISD::LOAD: R = SoftenFloatRes_LOAD(N, ResNo); break; + case ISD::LOAD: R = SoftenFloatRes_LOAD(N); break; case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break; - case ISD::SELECT: R = SoftenFloatRes_SELECT(N, ResNo); break; - case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N, ResNo); break; + case ISD::SELECT: R = SoftenFloatRes_SELECT(N); break; + case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N); break; case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: R = SoftenFloatRes_XINT_TO_FP(N); break; case ISD::UNDEF: R = SoftenFloatRes_UNDEF(N); break; case ISD::VAARG: R = SoftenFloatRes_VAARG(N); break; } - if (R.getNode() && R.getNode() != N) { + // If R is null, the sub-method took care of registering the result. + if (R.getNode()) { + assert(R.getNode() != N); SetSoftenedFloat(SDValue(N, ResNo), R); - // Return true only if the node is changed, assuming that the operands - // are also converted when necessary. - return true; } - - // Otherwise, return false to tell caller to scan operands. - return false; } -SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo) { - if (isLegalInHWReg(N->getValueType(ResNo))) - return SDValue(N, ResNo); +SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) { return BitConvertToInteger(N->getOperand(0)); } @@ -144,10 +130,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) { BitConvertToInteger(N->getOperand(1))); } -SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo) { - // When LegalInHWReg, we can load better from the constant pool. - if (isLegalInHWReg(N->getValueType(ResNo))) - return SDValue(N, ResNo); +SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N) { ConstantFPSDNode *CN = cast<ConstantFPSDNode>(N); // In ppcf128, the high 64 bits are always first in memory regardless // of Endianness. LLVM's APFloat representation is not Endian sensitive, @@ -172,19 +155,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo) { } SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N, unsigned ResNo) { - // When LegalInHWReg, keep the extracted value in register. - if (isLegalInHWReg(N->getValueType(ResNo))) - return SDValue(N, ResNo); SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0)); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NewOp.getValueType().getVectorElementType(), NewOp, N->getOperand(1)); } -SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N, unsigned ResNo) { - // When LegalInHWReg, FABS can be implemented as native bitwise operations. - if (isLegalInHWReg(N->getValueType(ResNo))) - return SDValue(N, ResNo); +SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned Size = NVT.getSizeInBits(); @@ -200,57 +177,69 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMINNUM(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::FMIN_F32, RTLIB::FMIN_F64, RTLIB::FMIN_F80, RTLIB::FMIN_F128, RTLIB::FMIN_PPCF128), - NVT, Ops, false, SDLoc(N)).first; + NVT, Ops, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXNUM(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::FMAX_F32, RTLIB::FMAX_F64, RTLIB::FMAX_F80, RTLIB::FMAX_F128, RTLIB::FMAX_PPCF128), - NVT, Ops, false, SDLoc(N)).first; + NVT, Ops, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::ADD_F32, RTLIB::ADD_F64, RTLIB::ADD_F80, RTLIB::ADD_F128, RTLIB::ADD_PPCF128), - NVT, Ops, false, SDLoc(N)).first; + NVT, Ops, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::CEIL_F32, RTLIB::CEIL_F64, RTLIB::CEIL_F80, RTLIB::CEIL_F128, RTLIB::CEIL_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } -SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo) { - // When LegalInHWReg, FCOPYSIGN can be implemented as native bitwise operations. - if (isLegalInHWReg(N->getValueType(ResNo))) - return SDValue(N, ResNo); +SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) { SDValue LHS = GetSoftenedFloat(N->getOperand(0)); SDValue RHS = BitConvertToInteger(N->getOperand(1)); SDLoc dl(N); @@ -301,98 +290,123 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo) { SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::COS_F32, RTLIB::COS_F64, RTLIB::COS_F80, RTLIB::COS_F128, RTLIB::COS_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::DIV_F32, RTLIB::DIV_F64, RTLIB::DIV_F80, RTLIB::DIV_F128, RTLIB::DIV_PPCF128), - NVT, Ops, false, SDLoc(N)).first; + NVT, Ops, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::EXP_F32, RTLIB::EXP_F64, RTLIB::EXP_F80, RTLIB::EXP_F128, RTLIB::EXP_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::EXP2_F32, RTLIB::EXP2_F64, RTLIB::EXP2_F80, RTLIB::EXP2_F128, RTLIB::EXP2_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::FLOOR_F32, RTLIB::FLOOR_F64, RTLIB::FLOOR_F80, RTLIB::FLOOR_F128, RTLIB::FLOOR_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::LOG_F32, RTLIB::LOG_F64, RTLIB::LOG_F80, RTLIB::LOG_F128, RTLIB::LOG_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::LOG2_F32, RTLIB::LOG2_F64, RTLIB::LOG2_F80, RTLIB::LOG2_F128, RTLIB::LOG2_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::LOG10_F32, RTLIB::LOG10_F64, RTLIB::LOG10_F80, RTLIB::LOG10_F128, RTLIB::LOG10_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) { @@ -400,48 +414,57 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) { SDValue Ops[3] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)), GetSoftenedFloat(N->getOperand(2)) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[3] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType(), + N->getOperand(2).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::FMA_F32, RTLIB::FMA_F64, RTLIB::FMA_F80, RTLIB::FMA_F128, RTLIB::FMA_PPCF128), - NVT, Ops, false, SDLoc(N)).first; + NVT, Ops, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::MUL_F32, RTLIB::MUL_F64, RTLIB::MUL_F80, RTLIB::MUL_F128, RTLIB::MUL_PPCF128), - NVT, Ops, false, SDLoc(N)).first; + NVT, Ops, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::NEARBYINT_F32, RTLIB::NEARBYINT_F64, RTLIB::NEARBYINT_F80, RTLIB::NEARBYINT_F128, RTLIB::NEARBYINT_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } -SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo) { - // When LegalInHWReg, FNEG can be implemented as native bitwise operations. - if (isLegalInHWReg(N->getValueType(ResNo))) - return SDValue(N, ResNo); +SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDLoc dl(N); - EVT FloatVT = N->getValueType(ResNo); + EVT FloatVT = N->getValueType(0); if (FloatVT == MVT::f32 || FloatVT == MVT::f64 || FloatVT == MVT::f128) { // Expand Y = FNEG(X) -> Y = X ^ sign mask APInt SignMask = APInt::getSignMask(NVT.getSizeInBits()); @@ -452,13 +475,14 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo) { // Expand Y = FNEG(X) -> Y = SUB -0.0, X SDValue Ops[2] = { DAG.getConstantFP(-0.0, dl, N->getValueType(0)), GetSoftenedFloat(N->getOperand(0)) }; + TargetLowering::MakeLibCallOptions CallOptions; return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::SUB_F32, RTLIB::SUB_F64, RTLIB::SUB_F80, RTLIB::SUB_F128, RTLIB::SUB_PPCF128), - NVT, Ops, false, dl).first; + NVT, Ops, CallOptions, dl).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { @@ -485,7 +509,10 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); - return TLI.makeLibCall(DAG, LC, NVT, Op, false, SDLoc(N)).first; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, SDLoc(N)).first; } // FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special @@ -493,15 +520,18 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) { EVT MidVT = TLI.getTypeToTransformTo(*DAG.getContext(), MVT::f32); SDValue Op = N->getOperand(0); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); SDValue Res32 = TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MidVT, Op, - false, SDLoc(N)).first; + CallOptions, SDLoc(N)).first; if (N->getValueType(0) == MVT::f32) return Res32; EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); RTLIB::Libcall LC = RTLIB::getFPEXT(MVT::f32, N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); - return TLI.makeLibCall(DAG, LC, NVT, Res32, false, SDLoc(N)).first; + return TLI.makeLibCall(DAG, LC, NVT, Res32, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { @@ -515,20 +545,27 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!"); - return TLI.makeLibCall(DAG, LC, NVT, Op, false, SDLoc(N)).first; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::POW_F32, RTLIB::POW_F64, RTLIB::POW_F80, RTLIB::POW_F128, RTLIB::POW_PPCF128), - NVT, Ops, false, SDLoc(N)).first; + NVT, Ops, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { @@ -536,87 +573,111 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { "Unsupported power type!"); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), N->getOperand(1) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::POWI_F32, RTLIB::POWI_F64, RTLIB::POWI_F80, RTLIB::POWI_F128, RTLIB::POWI_PPCF128), - NVT, Ops, false, SDLoc(N)).first; + NVT, Ops, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::REM_F32, RTLIB::REM_F64, RTLIB::REM_F80, RTLIB::REM_F128, RTLIB::REM_PPCF128), - NVT, Ops, false, SDLoc(N)).first; + NVT, Ops, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::RINT_F32, RTLIB::RINT_F64, RTLIB::RINT_F80, RTLIB::RINT_F128, RTLIB::RINT_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::ROUND_F32, RTLIB::ROUND_F64, RTLIB::ROUND_F80, RTLIB::ROUND_F128, RTLIB::ROUND_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::SIN_F32, RTLIB::SIN_F64, RTLIB::SIN_F80, RTLIB::SIN_F128, RTLIB::SIN_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::SQRT_F32, RTLIB::SQRT_F64, RTLIB::SQRT_F80, RTLIB::SQRT_F128, RTLIB::SQRT_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { N->getOperand(0).getValueType(), + N->getOperand(1).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::SUB_F32, RTLIB::SUB_F64, RTLIB::SUB_F80, RTLIB::SUB_F128, RTLIB::SUB_PPCF128), - NVT, Ops, false, SDLoc(N)).first; + NVT, Ops, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { @@ -625,17 +686,19 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), NVT, N->getOperand(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, RTLIB::TRUNC_F80, RTLIB::TRUNC_F128, RTLIB::TRUNC_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } -SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo) { - bool LegalInHWReg = isLegalInHWReg(N->getValueType(ResNo)); +SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { LoadSDNode *L = cast<LoadSDNode>(N); EVT VT = N->getValueType(0); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); @@ -666,23 +729,17 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo) { // use the new one. ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); auto ExtendNode = DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL); - if (LegalInHWReg) - return ExtendNode; return BitConvertToInteger(ExtendNode); } -SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N, unsigned ResNo) { - if (isLegalInHWReg(N->getValueType(ResNo))) - return SDValue(N, ResNo); +SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N) { SDValue LHS = GetSoftenedFloat(N->getOperand(1)); SDValue RHS = GetSoftenedFloat(N->getOperand(2)); return DAG.getSelect(SDLoc(N), LHS.getValueType(), N->getOperand(0), LHS, RHS); } -SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N, unsigned ResNo) { - if (isLegalInHWReg(N->getValueType(ResNo))) - return SDValue(N, ResNo); +SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) { SDValue LHS = GetSoftenedFloat(N->getOperand(2)); SDValue RHS = GetSoftenedFloat(N->getOperand(3)); return DAG.getNode(ISD::SELECT_CC, SDLoc(N), @@ -736,14 +793,18 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { // Sign/zero extend the argument if the libcall takes a larger type. SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, NVT, N->getOperand(0)); + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(Signed); + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT), - Op, Signed, dl).first; + Op, CallOptions, dl).first; } //===----------------------------------------------------------------------===// -// Convert Float Operand to Integer for Non-HW-supported Operations. +// Convert Float Operand to Integer //===----------------------------------------------------------------------===// bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { @@ -753,8 +814,6 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { switch (N->getOpcode()) { default: - if (CanSkipSoftenFloatOperand(N, OpNo)) - return false; #ifndef NDEBUG dbgs() << "SoftenFloatOperand Op #" << OpNo << ": "; N->dump(&DAG); dbgs() << "\n"; @@ -762,11 +821,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { llvm_unreachable("Do not know how to soften this operator's operand!"); case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break; - case ISD::CopyToReg: Res = SoftenFloatOp_COPY_TO_REG(N); break; case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break; - case ISD::FABS: Res = SoftenFloatOp_FABS(N); break; - case ISD::FCOPYSIGN: Res = SoftenFloatOp_FCOPYSIGN(N); break; - case ISD::FNEG: Res = SoftenFloatOp_FNEG(N); break; case ISD::FP_EXTEND: Res = SoftenFloatOp_FP_EXTEND(N); break; case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break; @@ -776,19 +831,9 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { case ISD::LLROUND: Res = SoftenFloatOp_LLROUND(N); break; case ISD::LRINT: Res = SoftenFloatOp_LRINT(N); break; case ISD::LLRINT: Res = SoftenFloatOp_LLRINT(N); break; - case ISD::SELECT: Res = SoftenFloatOp_SELECT(N); break; case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break; case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break; - case ISD::STORE: - Res = SoftenFloatOp_STORE(N, OpNo); - // Do not try to analyze or soften this node again if the value is - // or can be held in a register. In that case, Res.getNode() should - // be equal to N. - if (Res.getNode() == N && - isLegalInHWReg(N->getOperand(OpNo).getValueType())) - return false; - // Otherwise, we need to reanalyze and lower the new Res nodes. - break; + case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break; } // If the result is null, the sub-method took care of registering results etc. @@ -800,60 +845,16 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { return true; assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && - "Invalid operand expansion"); + "Invalid operand promotion"); ReplaceValueWith(SDValue(N, 0), Res); return false; } -bool DAGTypeLegalizer::CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo) { - if (!isLegalInHWReg(N->getOperand(OpNo).getValueType())) - return false; - - // When the operand type can be kept in registers there is nothing to do for - // the following opcodes. - switch (N->getOperand(OpNo).getOpcode()) { - case ISD::BITCAST: - case ISD::ConstantFP: - case ISD::CopyFromReg: - case ISD::CopyToReg: - case ISD::FABS: - case ISD::FCOPYSIGN: - case ISD::FNEG: - case ISD::Register: - case ISD::SELECT: - case ISD::SELECT_CC: - return true; - } - - switch (N->getOpcode()) { - case ISD::ConstantFP: // Leaf node. - case ISD::CopyFromReg: // Operand is a register that we know to be left - // unchanged by SoftenFloatResult(). - case ISD::Register: // Leaf node. - return true; - } - return false; -} - SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) { - return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), - GetSoftenedFloat(N->getOperand(0))); -} - -SDValue DAGTypeLegalizer::SoftenFloatOp_COPY_TO_REG(SDNode *N) { - SDValue Op1 = GetSoftenedFloat(N->getOperand(1)); - SDValue Op2 = GetSoftenedFloat(N->getOperand(2)); - - if (Op1 == N->getOperand(1) && Op2 == N->getOperand(2)) - return SDValue(); - - if (N->getNumOperands() == 3) - return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2), 0); + SDValue Op0 = GetSoftenedFloat(N->getOperand(0)); - return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2, - N->getOperand(3)), - 0); + return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op0); } SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) { @@ -868,7 +869,10 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND libcall"); - return TLI.makeLibCall(DAG, LC, RVT, Op, false, SDLoc(N)).first; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, LC, RVT, Op, CallOptions, SDLoc(N)).first; } @@ -885,7 +889,10 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) { assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall"); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return TLI.makeLibCall(DAG, LC, RVT, Op, false, SDLoc(N)).first; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + return TLI.makeLibCall(DAG, LC, RVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { @@ -895,7 +902,8 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { EVT VT = NewLHS.getValueType(); NewLHS = GetSoftenedFloat(NewLHS); NewRHS = GetSoftenedFloat(NewRHS); - TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N)); + TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N), + N->getOperand(2), N->getOperand(3)); // If softenSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. @@ -911,34 +919,6 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { 0); } -SDValue DAGTypeLegalizer::SoftenFloatOp_FABS(SDNode *N) { - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - - if (Op == N->getOperand(0)) - return SDValue(); - - return SDValue(DAG.UpdateNodeOperands(N, Op), 0); -} - -SDValue DAGTypeLegalizer::SoftenFloatOp_FCOPYSIGN(SDNode *N) { - SDValue Op0 = GetSoftenedFloat(N->getOperand(0)); - SDValue Op1 = GetSoftenedFloat(N->getOperand(1)); - - if (Op0 == N->getOperand(0) && Op1 == N->getOperand(1)) - return SDValue(); - - return SDValue(DAG.UpdateNodeOperands(N, Op0, Op1), 0); -} - -SDValue DAGTypeLegalizer::SoftenFloatOp_FNEG(SDNode *N) { - SDValue Op = GetSoftenedFloat(N->getOperand(0)); - - if (Op == N->getOperand(0)) - return SDValue(); - - return SDValue(DAG.UpdateNodeOperands(N, Op), 0); -} - SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) { bool Signed = N->getOpcode() == ISD::FP_TO_SINT; EVT SVT = N->getOperand(0).getValueType(); @@ -962,23 +942,15 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) { assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_XINT!"); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - SDValue Res = TLI.makeLibCall(DAG, LC, NVT, Op, false, dl).first; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); + SDValue Res = TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, dl).first; // Truncate the result if the libcall returns a larger type. return DAG.getNode(ISD::TRUNCATE, dl, RVT, Res); } -SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT(SDNode *N) { - SDValue Op1 = GetSoftenedFloat(N->getOperand(1)); - SDValue Op2 = GetSoftenedFloat(N->getOperand(2)); - - if (Op1 == N->getOperand(1) && Op2 == N->getOperand(2)) - return SDValue(); - - return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2), - 0); -} - SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get(); @@ -986,7 +958,8 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { EVT VT = NewLHS.getValueType(); NewLHS = GetSoftenedFloat(NewLHS); NewRHS = GetSoftenedFloat(NewRHS); - TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N)); + TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N), + N->getOperand(0), N->getOperand(1)); // If softenSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. @@ -1009,7 +982,8 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) { EVT VT = NewLHS.getValueType(); NewLHS = GetSoftenedFloat(NewLHS); NewRHS = GetSoftenedFloat(NewRHS); - TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N)); + TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N), + N->getOperand(0), N->getOperand(1)); // If softenSetCCOperands returned a scalar, use it. if (!NewRHS.getNode()) { @@ -1047,13 +1021,16 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_LROUND(SDNode *N) { SDValue Op = GetSoftenedFloat(N->getOperand(0)); EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, RTLIB::LROUND_F32, RTLIB::LROUND_F64, RTLIB::LROUND_F80, RTLIB::LROUND_F128, RTLIB::LROUND_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatOp_LLROUND(SDNode *N) { @@ -1061,13 +1038,16 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_LLROUND(SDNode *N) { SDValue Op = GetSoftenedFloat(N->getOperand(0)); EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, RTLIB::LLROUND_F32, RTLIB::LLROUND_F64, RTLIB::LLROUND_F80, RTLIB::LLROUND_F128, RTLIB::LLROUND_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatOp_LRINT(SDNode *N) { @@ -1075,13 +1055,16 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_LRINT(SDNode *N) { SDValue Op = GetSoftenedFloat(N->getOperand(0)); EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, RTLIB::LRINT_F32, RTLIB::LRINT_F64, RTLIB::LRINT_F80, RTLIB::LRINT_F128, RTLIB::LRINT_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatOp_LLRINT(SDNode *N) { @@ -1089,13 +1072,16 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_LLRINT(SDNode *N) { SDValue Op = GetSoftenedFloat(N->getOperand(0)); EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[1] = { N->getOperand(0).getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true); return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, RTLIB::LLRINT_F32, RTLIB::LLRINT_F64, RTLIB::LLRINT_F80, RTLIB::LLRINT_F128, RTLIB::LLRINT_PPCF128), - NVT, Op, false, SDLoc(N)).first; + NVT, Op, CallOptions, SDLoc(N)).first; } //===----------------------------------------------------------------------===// @@ -1267,13 +1253,14 @@ void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N, void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + TargetLowering::MakeLibCallOptions CallOptions; SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::DIV_F32, RTLIB::DIV_F64, RTLIB::DIV_F80, RTLIB::DIV_F128, RTLIB::DIV_PPCF128), - N->getValueType(0), Ops, false, + N->getValueType(0), Ops, CallOptions, SDLoc(N)).first; GetPairElements(Call, Lo, Hi); } @@ -1341,13 +1328,14 @@ void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N, void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Ops[3] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) }; + TargetLowering::MakeLibCallOptions CallOptions; SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::FMA_F32, RTLIB::FMA_F64, RTLIB::FMA_F80, RTLIB::FMA_F128, RTLIB::FMA_PPCF128), - N->getValueType(0), Ops, false, + N->getValueType(0), Ops, CallOptions, SDLoc(N)).first; GetPairElements(Call, Lo, Hi); } @@ -1355,13 +1343,14 @@ void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + TargetLowering::MakeLibCallOptions CallOptions; SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::MUL_F32, RTLIB::MUL_F64, RTLIB::MUL_F80, RTLIB::MUL_F128, RTLIB::MUL_PPCF128), - N->getValueType(0), Ops, false, + N->getValueType(0), Ops, CallOptions, SDLoc(N)).first; GetPairElements(Call, Lo, Hi); } @@ -1470,13 +1459,14 @@ void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N, void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + TargetLowering::MakeLibCallOptions CallOptions; SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::SUB_F32, RTLIB::SUB_F64, RTLIB::SUB_F80, RTLIB::SUB_F128, RTLIB::SUB_PPCF128), - N->getValueType(0), Ops, false, + N->getValueType(0), Ops, CallOptions, SDLoc(N)).first; GetPairElements(Call, Lo, Hi); } @@ -1555,7 +1545,9 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, } assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!"); - Hi = TLI.makeLibCall(DAG, LC, VT, Src, true, dl).first; + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(true); + Hi = TLI.makeLibCall(DAG, LC, VT, Src, CallOptions, dl).first; GetPairElements(Hi, Lo, Hi); } @@ -1732,7 +1724,8 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!"); - return TLI.makeLibCall(DAG, LC, RVT, N->getOperand(0), false, dl).first; + TargetLowering::MakeLibCallOptions CallOptions; + return TLI.makeLibCall(DAG, LC, RVT, N->getOperand(0), CallOptions, dl).first; } SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { @@ -1741,8 +1734,9 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!"); + TargetLowering::MakeLibCallOptions CallOptions; return TLI.makeLibCall(DAG, LC, N->getValueType(0), N->getOperand(0), - false, dl).first; + CallOptions, dl).first; } SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) { @@ -1807,49 +1801,53 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) { SDValue DAGTypeLegalizer::ExpandFloatOp_LROUND(SDNode *N) { EVT RVT = N->getValueType(0); EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + TargetLowering::MakeLibCallOptions CallOptions; return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, RTLIB::LROUND_F32, RTLIB::LROUND_F64, RTLIB::LROUND_F80, RTLIB::LROUND_F128, RTLIB::LROUND_PPCF128), - RVT, N->getOperand(0), false, SDLoc(N)).first; + RVT, N->getOperand(0), CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::ExpandFloatOp_LLROUND(SDNode *N) { EVT RVT = N->getValueType(0); EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + TargetLowering::MakeLibCallOptions CallOptions; return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, RTLIB::LLROUND_F32, RTLIB::LLROUND_F64, RTLIB::LLROUND_F80, RTLIB::LLROUND_F128, RTLIB::LLROUND_PPCF128), - RVT, N->getOperand(0), false, SDLoc(N)).first; + RVT, N->getOperand(0), CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::ExpandFloatOp_LRINT(SDNode *N) { EVT RVT = N->getValueType(0); EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + TargetLowering::MakeLibCallOptions CallOptions; return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, RTLIB::LRINT_F32, RTLIB::LRINT_F64, RTLIB::LRINT_F80, RTLIB::LRINT_F128, RTLIB::LRINT_PPCF128), - RVT, N->getOperand(0), false, SDLoc(N)).first; + RVT, N->getOperand(0), CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::ExpandFloatOp_LLRINT(SDNode *N) { EVT RVT = N->getValueType(0); EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; + TargetLowering::MakeLibCallOptions CallOptions; return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, RTLIB::LLRINT_F32, RTLIB::LLRINT_F64, RTLIB::LLRINT_F80, RTLIB::LLRINT_F128, RTLIB::LLRINT_PPCF128), - RVT, N->getOperand(0), false, SDLoc(N)).first; + RVT, N->getOperand(0), CallOptions, SDLoc(N)).first; } //===----------------------------------------------------------------------===// @@ -2002,6 +2000,12 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { dbgs() << "\n"); SDValue R = SDValue(); + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(ResNo), true)) { + LLVM_DEBUG(dbgs() << "Node has been custom expanded, done\n"); + return; + } + switch (N->getOpcode()) { // These opcodes cannot appear if promotion of FP16 is done in the backend // instead of Clang diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 15ac45c37c66..d5c1b539adbd 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -100,6 +100,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { Res = PromoteIntRes_BUILD_VECTOR(N); break; case ISD::SCALAR_TO_VECTOR: Res = PromoteIntRes_SCALAR_TO_VECTOR(N); break; + case ISD::SPLAT_VECTOR: + Res = PromoteIntRes_SPLAT_VECTOR(N); break; case ISD::CONCAT_VECTORS: Res = PromoteIntRes_CONCAT_VECTORS(N); break; @@ -112,6 +114,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: Res = PromoteIntRes_INT_EXTEND(N); break; + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: Res = PromoteIntRes_FP_TO_XINT(N); break; @@ -148,9 +152,12 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::UADDSAT: case ISD::SSUBSAT: case ISD::USUBSAT: Res = PromoteIntRes_ADDSUBSAT(N); break; + case ISD::SMULFIX: case ISD::SMULFIXSAT: - case ISD::UMULFIX: Res = PromoteIntRes_MULFIX(N); break; + case ISD::UMULFIX: + case ISD::UMULFIXSAT: Res = PromoteIntRes_MULFIX(N); break; + case ISD::ABS: Res = PromoteIntRes_ABS(N); break; case ISD::ATOMIC_LOAD: @@ -494,7 +501,20 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT)) NewOpc = ISD::FP_TO_SINT; - SDValue Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0)); + if (N->getOpcode() == ISD::STRICT_FP_TO_UINT && + !TLI.isOperationLegal(ISD::STRICT_FP_TO_UINT, NVT) && + TLI.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, NVT)) + NewOpc = ISD::STRICT_FP_TO_SINT; + + SDValue Res; + if (N->isStrictFPOpcode()) { + Res = DAG.getNode(NewOpc, dl, { NVT, MVT::Other }, + { N->getOperand(0), N->getOperand(1) }); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + } else + Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0)); // Assert that the converted value fits in the original type. If it doesn't // (eg: because the value being converted is too big), then the result of the @@ -503,7 +523,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { // NOTE: fp-to-uint to fp-to-sint promotion guarantees zero extend. For example: // before legalization: fp-to-uint16, 65534. -> 0xfffe // after legalization: fp-to-sint32, 65534. -> 0x0000fffe - return DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ? + return DAG.getNode((N->getOpcode() == ISD::FP_TO_UINT || + N->getOpcode() == ISD::STRICT_FP_TO_UINT) ? ISD::AssertZext : ISD::AssertSext, dl, NVT, Res, DAG.getValueType(N->getValueType(0).getScalarType())); } @@ -590,7 +611,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) { N->getIndex(), N->getScale() }; SDValue Res = DAG.getMaskedGather(DAG.getVTList(NVT, MVT::Other), N->getMemoryVT(), dl, Ops, - N->getMemOperand()); + N->getMemOperand(), N->getIndexType()); // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); @@ -623,48 +644,84 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSAT(SDNode *N) { - // For promoting iN -> iM, this can be expanded by - // 1. ANY_EXTEND iN to iM - // 2. SHL by M-N - // 3. [US][ADD|SUB]SAT - // 4. L/ASHR by M-N + // If the promoted type is legal, we can convert this to: + // 1. ANY_EXTEND iN to iM + // 2. SHL by M-N + // 3. [US][ADD|SUB]SAT + // 4. L/ASHR by M-N + // Else it is more efficient to convert this to a min and a max + // operation in the higher precision arithmetic. SDLoc dl(N); SDValue Op1 = N->getOperand(0); SDValue Op2 = N->getOperand(1); unsigned OldBits = Op1.getScalarValueSizeInBits(); unsigned Opcode = N->getOpcode(); - unsigned ShiftOp; - switch (Opcode) { - case ISD::SADDSAT: - case ISD::SSUBSAT: - ShiftOp = ISD::SRA; - break; - case ISD::UADDSAT: - case ISD::USUBSAT: - ShiftOp = ISD::SRL; - break; - default: - llvm_unreachable("Expected opcode to be signed or unsigned saturation " - "addition or subtraction"); - } - - SDValue Op1Promoted = GetPromotedInteger(Op1); - SDValue Op2Promoted = GetPromotedInteger(Op2); + SDValue Op1Promoted, Op2Promoted; + if (Opcode == ISD::UADDSAT || Opcode == ISD::USUBSAT) { + Op1Promoted = ZExtPromotedInteger(Op1); + Op2Promoted = ZExtPromotedInteger(Op2); + } else { + Op1Promoted = SExtPromotedInteger(Op1); + Op2Promoted = SExtPromotedInteger(Op2); + } EVT PromotedType = Op1Promoted.getValueType(); unsigned NewBits = PromotedType.getScalarSizeInBits(); - unsigned SHLAmount = NewBits - OldBits; - EVT SHVT = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout()); - SDValue ShiftAmount = DAG.getConstant(SHLAmount, dl, SHVT); - Op1Promoted = - DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, ShiftAmount); - Op2Promoted = - DAG.getNode(ISD::SHL, dl, PromotedType, Op2Promoted, ShiftAmount); - SDValue Result = - DAG.getNode(Opcode, dl, PromotedType, Op1Promoted, Op2Promoted); - return DAG.getNode(ShiftOp, dl, PromotedType, Result, ShiftAmount); + if (TLI.isOperationLegalOrCustom(Opcode, PromotedType)) { + unsigned ShiftOp; + switch (Opcode) { + case ISD::SADDSAT: + case ISD::SSUBSAT: + ShiftOp = ISD::SRA; + break; + case ISD::UADDSAT: + case ISD::USUBSAT: + ShiftOp = ISD::SRL; + break; + default: + llvm_unreachable("Expected opcode to be signed or unsigned saturation " + "addition or subtraction"); + } + + unsigned SHLAmount = NewBits - OldBits; + EVT SHVT = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout()); + SDValue ShiftAmount = DAG.getConstant(SHLAmount, dl, SHVT); + Op1Promoted = + DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, ShiftAmount); + Op2Promoted = + DAG.getNode(ISD::SHL, dl, PromotedType, Op2Promoted, ShiftAmount); + + SDValue Result = + DAG.getNode(Opcode, dl, PromotedType, Op1Promoted, Op2Promoted); + return DAG.getNode(ShiftOp, dl, PromotedType, Result, ShiftAmount); + } else { + if (Opcode == ISD::USUBSAT) { + SDValue Max = + DAG.getNode(ISD::UMAX, dl, PromotedType, Op1Promoted, Op2Promoted); + return DAG.getNode(ISD::SUB, dl, PromotedType, Max, Op2Promoted); + } + + if (Opcode == ISD::UADDSAT) { + APInt MaxVal = APInt::getAllOnesValue(OldBits).zext(NewBits); + SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType); + SDValue Add = + DAG.getNode(ISD::ADD, dl, PromotedType, Op1Promoted, Op2Promoted); + return DAG.getNode(ISD::UMIN, dl, PromotedType, Add, SatMax); + } + + unsigned AddOp = Opcode == ISD::SADDSAT ? ISD::ADD : ISD::SUB; + APInt MinVal = APInt::getSignedMinValue(OldBits).sext(NewBits); + APInt MaxVal = APInt::getSignedMaxValue(OldBits).sext(NewBits); + SDValue SatMin = DAG.getConstant(MinVal, dl, PromotedType); + SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType); + SDValue Result = + DAG.getNode(AddOp, dl, PromotedType, Op1Promoted, Op2Promoted); + Result = DAG.getNode(ISD::SMIN, dl, PromotedType, Result, SatMax); + Result = DAG.getNode(ISD::SMAX, dl, PromotedType, Result, SatMin); + return Result; + } } SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) { @@ -673,6 +730,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) { SDValue Op1Promoted, Op2Promoted; bool Signed = N->getOpcode() == ISD::SMULFIX || N->getOpcode() == ISD::SMULFIXSAT; + bool Saturating = + N->getOpcode() == ISD::SMULFIXSAT || N->getOpcode() == ISD::UMULFIXSAT; if (Signed) { Op1Promoted = SExtPromotedInteger(N->getOperand(0)); Op2Promoted = SExtPromotedInteger(N->getOperand(1)); @@ -685,7 +744,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) { unsigned DiffSize = PromotedType.getScalarSizeInBits() - OldType.getScalarSizeInBits(); - bool Saturating = N->getOpcode() == ISD::SMULFIXSAT; if (Saturating) { // Promoting the operand and result values changes the saturation width, // which is extends the values that we clamp to on saturation. This could be @@ -1110,6 +1168,8 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);break; case ISD::SCALAR_TO_VECTOR: Res = PromoteIntOp_SCALAR_TO_VECTOR(N); break; + case ISD::SPLAT_VECTOR: + Res = PromoteIntOp_SPLAT_VECTOR(N); break; case ISD::VSELECT: case ISD::SELECT: Res = PromoteIntOp_SELECT(N, OpNo); break; case ISD::SELECT_CC: Res = PromoteIntOp_SELECT_CC(N, OpNo); break; @@ -1148,7 +1208,8 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::SMULFIX: case ISD::SMULFIXSAT: - case ISD::UMULFIX: Res = PromoteIntOp_MULFIX(N); break; + case ISD::UMULFIX: + case ISD::UMULFIXSAT: Res = PromoteIntOp_MULFIX(N); break; case ISD::FPOWI: Res = PromoteIntOp_FPOWI(N); break; @@ -1339,6 +1400,13 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N) { GetPromotedInteger(N->getOperand(0))), 0); } +SDValue DAGTypeLegalizer::PromoteIntOp_SPLAT_VECTOR(SDNode *N) { + // Integer SPLAT_VECTOR operands are implicitly truncated, so just promote the + // operand in place. + return SDValue( + DAG.UpdateNodeOperands(N, GetPromotedInteger(N->getOperand(0))), 0); +} + SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) { assert(OpNo == 0 && "Only know how to promote the condition!"); SDValue Cond = N->getOperand(0); @@ -1454,8 +1522,12 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N, EVT DataVT = N->getValueType(0); NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT); } else if (OpNo == 4) { - // Need to sign extend the index since the bits will likely be used. - NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo)); + // The Index + if (N->isIndexSigned()) + // Need to sign extend the index since the bits will likely be used. + NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo)); + else + NewOps[OpNo] = ZExtPromotedInteger(N->getOperand(OpNo)); } else NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo)); @@ -1470,8 +1542,12 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, EVT DataVT = N->getValue().getValueType(); NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT); } else if (OpNo == 4) { - // Need to sign extend the index since the bits will likely be used. - NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo)); + // The Index + if (N->isIndexSigned()) + // Need to sign extend the index since the bits will likely be used. + NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo)); + else + NewOps[OpNo] = ZExtPromotedInteger(N->getOperand(OpNo)); } else NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo)); return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); @@ -1715,7 +1791,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::SMULFIX: case ISD::SMULFIXSAT: - case ISD::UMULFIX: ExpandIntRes_MULFIX(N, Lo, Hi); break; + case ISD::UMULFIX: + case ISD::UMULFIXSAT: ExpandIntRes_MULFIX(N, Lo, Hi); break; case ISD::VECREDUCE_ADD: case ISD::VECREDUCE_MUL: @@ -2473,7 +2550,9 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo, RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, true/*irrelevant*/, dl).first, + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(true); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, CallOptions, dl).first, Lo, Hi); } @@ -2488,7 +2567,8 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo, RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, false/*irrelevant*/, dl).first, + TargetLowering::MakeLibCallOptions CallOptions; + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, CallOptions, dl).first, Lo, Hi); } @@ -2514,7 +2594,9 @@ void DAGTypeLegalizer::ExpandIntRes_LLROUND(SDNode *N, SDValue &Lo, SDLoc dl(N); EVT RetVT = N->getValueType(0); - SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, true/*irrelevant*/, dl).first, + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(true); + SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, CallOptions, dl).first, Lo, Hi); } @@ -2540,7 +2622,9 @@ void DAGTypeLegalizer::ExpandIntRes_LLRINT(SDNode *N, SDValue &Lo, SDLoc dl(N); EVT RetVT = N->getValueType(0); - SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, true/*irrelevant*/, dl).first, + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(true); + SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, CallOptions, dl).first, Lo, Hi); } @@ -2743,7 +2827,9 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, } SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true/*irrelevant*/, dl).first, + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(true); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); } @@ -2777,38 +2863,53 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo, SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); uint64_t Scale = N->getConstantOperandVal(2); - bool Saturating = N->getOpcode() == ISD::SMULFIXSAT; - EVT BoolVT = getSetCCResultType(VT); - SDValue Zero = DAG.getConstant(0, dl, VT); + bool Saturating = (N->getOpcode() == ISD::SMULFIXSAT || + N->getOpcode() == ISD::UMULFIXSAT); + bool Signed = (N->getOpcode() == ISD::SMULFIX || + N->getOpcode() == ISD::SMULFIXSAT); + + // Handle special case when scale is equal to zero. if (!Scale) { SDValue Result; if (!Saturating) { Result = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS); } else { - Result = DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS); + EVT BoolVT = getSetCCResultType(VT); + unsigned MulOp = Signed ? ISD::SMULO : ISD::UMULO; + Result = DAG.getNode(MulOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS); SDValue Product = Result.getValue(0); SDValue Overflow = Result.getValue(1); - - APInt MinVal = APInt::getSignedMinValue(VTSize); - APInt MaxVal = APInt::getSignedMaxValue(VTSize); - SDValue SatMin = DAG.getConstant(MinVal, dl, VT); - SDValue SatMax = DAG.getConstant(MaxVal, dl, VT); - SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT); - Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin); - Result = DAG.getSelect(dl, VT, Overflow, Result, Product); + if (Signed) { + APInt MinVal = APInt::getSignedMinValue(VTSize); + APInt MaxVal = APInt::getSignedMaxValue(VTSize); + SDValue SatMin = DAG.getConstant(MinVal, dl, VT); + SDValue SatMax = DAG.getConstant(MaxVal, dl, VT); + SDValue Zero = DAG.getConstant(0, dl, VT); + SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT); + Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin); + Result = DAG.getSelect(dl, VT, Overflow, Result, Product); + } else { + // For unsigned multiplication, we only need to check the max since we + // can't really overflow towards zero. + APInt MaxVal = APInt::getMaxValue(VTSize); + SDValue SatMax = DAG.getConstant(MaxVal, dl, VT); + Result = DAG.getSelect(dl, VT, Overflow, SatMax, Product); + } } SplitInteger(Result, Lo, Hi); return; } + // For SMULFIX[SAT] we only expect to find Scale<VTSize, but this assert will + // cover for unhandled cases below, while still being valid for UMULFIX[SAT]. + assert(Scale <= VTSize && "Scale can't be larger than the value type size."); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); SDValue LL, LH, RL, RH; GetExpandedInteger(LHS, LL, LH); GetExpandedInteger(RHS, RL, RH); SmallVector<SDValue, 4> Result; - bool Signed = (N->getOpcode() == ISD::SMULFIX || - N->getOpcode() == ISD::SMULFIXSAT); unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI; if (!TLI.expandMUL_LOHI(LoHiOp, VT, dl, LHS, RHS, Result, NVT, DAG, TargetLowering::MulExpansionKind::OnlyLegalOrCustom, @@ -2822,19 +2923,9 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo, "the size of the current value type"); EVT ShiftTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout()); - // Shift whole amount by scale. - SDValue ResultLL = Result[0]; - SDValue ResultLH = Result[1]; - SDValue ResultHL = Result[2]; - SDValue ResultHH = Result[3]; - - SDValue SatMax, SatMin; - SDValue NVTZero = DAG.getConstant(0, dl, NVT); - SDValue NVTNeg1 = DAG.getConstant(-1, dl, NVT); - EVT BoolNVT = getSetCCResultType(NVT); - - // After getting the multplication result in 4 parts, we need to perform a + // After getting the multiplication result in 4 parts, we need to perform a // shift right by the amount of the scale to get the result in that scale. + // // Let's say we multiply 2 64 bit numbers. The resulting value can be held in // 128 bits that are cut into 4 32-bit parts: // @@ -2846,123 +2937,135 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo, // // |NVTSize-| // - // The resulting Lo and Hi will only need to be one of these 32-bit parts - // after shifting. + // The resulting Lo and Hi would normally be in LL and LH after the shift. But + // to avoid unneccessary shifting of all 4 parts, we can adjust the shift + // amount and get Lo and Hi using two funnel shifts. Or for the special case + // when Scale is a multiple of NVTSize we can just pick the result without + // shifting. + uint64_t Part0 = Scale / NVTSize; // Part holding lowest bit needed. + if (Scale % NVTSize) { + SDValue ShiftAmount = DAG.getConstant(Scale % NVTSize, dl, ShiftTy); + Lo = DAG.getNode(ISD::FSHR, dl, NVT, Result[Part0 + 1], Result[Part0], + ShiftAmount); + Hi = DAG.getNode(ISD::FSHR, dl, NVT, Result[Part0 + 2], Result[Part0 + 1], + ShiftAmount); + } else { + Lo = Result[Part0]; + Hi = Result[Part0 + 1]; + } + + // Unless saturation is requested we are done. The result is in <Hi,Lo>. + if (!Saturating) + return; + + // Can not overflow when there is no integer part. + if (Scale == VTSize) + return; + + // To handle saturation we must check for overflow in the multiplication. + // + // Unsigned overflow happened if the upper (VTSize - Scale) bits (of Result) + // aren't all zeroes. + // + // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of Result) + // aren't all ones or all zeroes. + // + // We cannot overflow past HH when multiplying 2 ints of size VTSize, so the + // highest bit of HH determines saturation direction in the event of signed + // saturation. + + SDValue ResultHL = Result[2]; + SDValue ResultHH = Result[3]; + + SDValue SatMax, SatMin; + SDValue NVTZero = DAG.getConstant(0, dl, NVT); + SDValue NVTNeg1 = DAG.getConstant(-1, dl, NVT); + EVT BoolNVT = getSetCCResultType(NVT); + + if (!Signed) { + if (Scale < NVTSize) { + // Overflow happened if ((HH | (HL >> Scale)) != 0). + SDValue HLAdjusted = DAG.getNode(ISD::SRL, dl, NVT, ResultHL, + DAG.getConstant(Scale, dl, ShiftTy)); + SDValue Tmp = DAG.getNode(ISD::OR, dl, NVT, HLAdjusted, ResultHH); + SatMax = DAG.getSetCC(dl, BoolNVT, Tmp, NVTZero, ISD::SETNE); + } else if (Scale == NVTSize) { + // Overflow happened if (HH != 0). + SatMax = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETNE); + } else if (Scale < VTSize) { + // Overflow happened if ((HH >> (Scale - NVTSize)) != 0). + SDValue HLAdjusted = DAG.getNode(ISD::SRL, dl, NVT, ResultHL, + DAG.getConstant(Scale - NVTSize, dl, + ShiftTy)); + SatMax = DAG.getSetCC(dl, BoolNVT, HLAdjusted, NVTZero, ISD::SETNE); + } else + llvm_unreachable("Scale must be less or equal to VTSize for UMULFIXSAT" + "(and saturation can't happen with Scale==VTSize)."); + + Hi = DAG.getSelect(dl, NVT, SatMax, NVTNeg1, Hi); + Lo = DAG.getSelect(dl, NVT, SatMax, NVTNeg1, Lo); + return; + } + if (Scale < NVTSize) { - // If the scale is less than the size of the VT we expand to, the Hi and - // Lo of the result will be in the first 2 parts of the result after - // shifting right. This only requires shifting by the scale as far as the - // third part in the result (ResultHL). - SDValue SRLAmnt = DAG.getConstant(Scale, dl, ShiftTy); - SDValue SHLAmnt = DAG.getConstant(NVTSize - Scale, dl, ShiftTy); - Lo = DAG.getNode(ISD::SRL, dl, NVT, ResultLL, SRLAmnt); - Lo = DAG.getNode(ISD::OR, dl, NVT, Lo, - DAG.getNode(ISD::SHL, dl, NVT, ResultLH, SHLAmnt)); - Hi = DAG.getNode(ISD::SRL, dl, NVT, ResultLH, SRLAmnt); - Hi = DAG.getNode(ISD::OR, dl, NVT, Hi, - DAG.getNode(ISD::SHL, dl, NVT, ResultHL, SHLAmnt)); - - // We cannot overflow past HH when multiplying 2 ints of size VTSize, so the - // highest bit of HH determines saturation direction in the event of - // saturation. // The number of overflow bits we can check are VTSize - Scale + 1 (we // include the sign bit). If these top bits are > 0, then we overflowed past // the max value. If these top bits are < -1, then we overflowed past the // min value. Otherwise, we did not overflow. - if (Saturating) { - unsigned OverflowBits = VTSize - Scale + 1; - assert(OverflowBits <= VTSize && OverflowBits > NVTSize && - "Extent of overflow bits must start within HL"); - SDValue HLHiMask = DAG.getConstant( - APInt::getHighBitsSet(NVTSize, OverflowBits - NVTSize), dl, NVT); - SDValue HLLoMask = DAG.getConstant( - APInt::getLowBitsSet(NVTSize, VTSize - OverflowBits), dl, NVT); - - // HH > 0 or HH == 0 && HL > HLLoMask - SDValue HHPos = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT); - SDValue HHZero = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ); - SDValue HLPos = - DAG.getSetCC(dl, BoolNVT, ResultHL, HLLoMask, ISD::SETUGT); - SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHPos, - DAG.getNode(ISD::AND, dl, BoolNVT, HHZero, HLPos)); - - // HH < -1 or HH == -1 && HL < HLHiMask - SDValue HHNeg = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT); - SDValue HHNeg1 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ); - SDValue HLNeg = - DAG.getSetCC(dl, BoolNVT, ResultHL, HLHiMask, ISD::SETULT); - SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHNeg, - DAG.getNode(ISD::AND, dl, BoolNVT, HHNeg1, HLNeg)); - } + unsigned OverflowBits = VTSize - Scale + 1; + assert(OverflowBits <= VTSize && OverflowBits > NVTSize && + "Extent of overflow bits must start within HL"); + SDValue HLHiMask = DAG.getConstant( + APInt::getHighBitsSet(NVTSize, OverflowBits - NVTSize), dl, NVT); + SDValue HLLoMask = DAG.getConstant( + APInt::getLowBitsSet(NVTSize, VTSize - OverflowBits), dl, NVT); + // We overflow max if HH > 0 or (HH == 0 && HL > HLLoMask). + SDValue HHGT0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT); + SDValue HHEQ0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ); + SDValue HLUGT = DAG.getSetCC(dl, BoolNVT, ResultHL, HLLoMask, ISD::SETUGT); + SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHGT0, + DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ0, HLUGT)); + // We overflow min if HH < -1 or (HH == -1 && HL < HLHiMask). + SDValue HHLT = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT); + SDValue HHEQ = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ); + SDValue HLULT = DAG.getSetCC(dl, BoolNVT, ResultHL, HLHiMask, ISD::SETULT); + SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHLT, + DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ, HLULT)); } else if (Scale == NVTSize) { - // If the scales are equal, Lo and Hi are ResultLH and Result HL, - // respectively. Avoid shifting to prevent undefined behavior. - Lo = ResultLH; - Hi = ResultHL; - - // We overflow max if HH > 0 or HH == 0 && HL sign bit is 1. - // We overflow min if HH < -1 or HH == -1 && HL sign bit is 0. - if (Saturating) { - SDValue HHPos = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT); - SDValue HHZero = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ); - SDValue HLNeg = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETLT); - SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHPos, - DAG.getNode(ISD::AND, dl, BoolNVT, HHZero, HLNeg)); - - SDValue HHNeg = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT); - SDValue HHNeg1 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ); - SDValue HLPos = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETGE); - SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHNeg, - DAG.getNode(ISD::AND, dl, BoolNVT, HHNeg1, HLPos)); - } + // We overflow max if HH > 0 or (HH == 0 && HL sign bit is 1). + SDValue HHGT0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT); + SDValue HHEQ0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ); + SDValue HLNeg = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETLT); + SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHGT0, + DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ0, HLNeg)); + // We overflow min if HH < -1 or (HH == -1 && HL sign bit is 0). + SDValue HHLT = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT); + SDValue HHEQ = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ); + SDValue HLPos = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETGE); + SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHLT, + DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ, HLPos)); } else if (Scale < VTSize) { - // If the scale is instead less than the old VT size, but greater than or - // equal to the expanded VT size, the first part of the result (ResultLL) is - // no longer a part of Lo because it would be scaled out anyway. Instead we - // can start shifting right from the fourth part (ResultHH) to the second - // part (ResultLH), and Result LH will be the new Lo. - SDValue SRLAmnt = DAG.getConstant(Scale - NVTSize, dl, ShiftTy); - SDValue SHLAmnt = DAG.getConstant(VTSize - Scale, dl, ShiftTy); - Lo = DAG.getNode(ISD::SRL, dl, NVT, ResultLH, SRLAmnt); - Lo = DAG.getNode(ISD::OR, dl, NVT, Lo, - DAG.getNode(ISD::SHL, dl, NVT, ResultHL, SHLAmnt)); - Hi = DAG.getNode(ISD::SRL, dl, NVT, ResultHL, SRLAmnt); - Hi = DAG.getNode(ISD::OR, dl, NVT, Hi, - DAG.getNode(ISD::SHL, dl, NVT, ResultHH, SHLAmnt)); - // This is similar to the case when we saturate if Scale < NVTSize, but we - // only need to chech HH. - if (Saturating) { - unsigned OverflowBits = VTSize - Scale + 1; - SDValue HHHiMask = DAG.getConstant( - APInt::getHighBitsSet(NVTSize, OverflowBits), dl, NVT); - SDValue HHLoMask = DAG.getConstant( - APInt::getLowBitsSet(NVTSize, NVTSize - OverflowBits), dl, NVT); - - SatMax = DAG.getSetCC(dl, BoolNVT, ResultHH, HHLoMask, ISD::SETGT); - SatMin = DAG.getSetCC(dl, BoolNVT, ResultHH, HHHiMask, ISD::SETLT); - } - } else if (Scale == VTSize) { - assert( - !Signed && - "Only unsigned types can have a scale equal to the operand bit width"); - - Lo = ResultHL; - Hi = ResultHH; - } else { - llvm_unreachable("Expected the scale to be less than or equal to the width " - "of the operands"); - } + // only need to check HH. + unsigned OverflowBits = VTSize - Scale + 1; + SDValue HHHiMask = DAG.getConstant( + APInt::getHighBitsSet(NVTSize, OverflowBits), dl, NVT); + SDValue HHLoMask = DAG.getConstant( + APInt::getLowBitsSet(NVTSize, NVTSize - OverflowBits), dl, NVT); + SatMax = DAG.getSetCC(dl, BoolNVT, ResultHH, HHLoMask, ISD::SETGT); + SatMin = DAG.getSetCC(dl, BoolNVT, ResultHH, HHHiMask, ISD::SETLT); + } else + llvm_unreachable("Illegal scale for signed fixed point mul."); - if (Saturating) { - APInt LHMax = APInt::getSignedMaxValue(NVTSize); - APInt LLMax = APInt::getAllOnesValue(NVTSize); - APInt LHMin = APInt::getSignedMinValue(NVTSize); - Hi = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(LHMax, dl, NVT), Hi); - Hi = DAG.getSelect(dl, NVT, SatMin, DAG.getConstant(LHMin, dl, NVT), Hi); - Lo = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(LLMax, dl, NVT), Lo); - Lo = DAG.getSelect(dl, NVT, SatMin, NVTZero, Lo); - } + // Saturate to signed maximum. + APInt MaxHi = APInt::getSignedMaxValue(NVTSize); + APInt MaxLo = APInt::getAllOnesValue(NVTSize); + Hi = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(MaxHi, dl, NVT), Hi); + Lo = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(MaxLo, dl, NVT), Lo); + // Saturate to signed minimum. + APInt MinHi = APInt::getSignedMinValue(NVTSize); + Hi = DAG.getSelect(dl, NVT, SatMin, DAG.getConstant(MinHi, dl, NVT), Hi); + Lo = DAG.getSelect(dl, NVT, SatMin, NVTZero, Lo); } void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node, @@ -3030,7 +3133,9 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, LC = RTLIB::SDIV_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true, dl).first, Lo, Hi); + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(true); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, @@ -3129,7 +3234,9 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, isSigned, dl).first, Lo, Hi); + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(isSigned); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); return; } @@ -3217,7 +3324,9 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N, LC = RTLIB::SREM_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true, dl).first, Lo, Hi); + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(true); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N, @@ -3373,7 +3482,8 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, LC = RTLIB::UDIV_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, false, dl).first, Lo, Hi); + TargetLowering::MakeLibCallOptions CallOptions; + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, @@ -3399,7 +3509,8 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, LC = RTLIB::UREM_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, false, dl).first, Lo, Hi); + TargetLowering::MakeLibCallOptions CallOptions; + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N, @@ -3759,7 +3870,9 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) { RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Don't know how to expand this SINT_TO_FP!"); - return TLI.makeLibCall(DAG, LC, DstVT, Op, true, SDLoc(N)).first; + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(true); + return TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, SDLoc(N)).first; } SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { @@ -3924,7 +4037,9 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Don't know how to expand this UINT_TO_FP!"); - return TLI.makeLibCall(DAG, LC, DstVT, Op, true, dl).first; + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(true); + return TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, dl).first; } SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) { @@ -4033,6 +4148,23 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) { return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NOutVT, Op); } +SDValue DAGTypeLegalizer::PromoteIntRes_SPLAT_VECTOR(SDNode *N) { + SDLoc dl(N); + + SDValue SplatVal = N->getOperand(0); + + assert(!SplatVal.getValueType().isVector() && "Input must be a scalar"); + + EVT OutVT = N->getValueType(0); + EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); + assert(NOutVT.isVector() && "Type must be promoted to a vector type"); + EVT NOutElemVT = NOutVT.getVectorElementType(); + + SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutElemVT, SplatVal); + + return DAG.getNode(ISD::SPLAT_VECTOR, dl, NOutVT, Op); +} + SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { SDLoc dl(N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 14fd5be23ccb..b596c174a287 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -81,7 +81,6 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { for (unsigned i = 0, e = Node.getNumValues(); i != e; ++i) { SDValue Res(&Node, i); - EVT VT = Res.getValueType(); bool Failed = false; // Don't create a value in map. auto ResId = (ValueToIdMap.count(Res)) ? ValueToIdMap[Res] : 0; @@ -135,17 +134,13 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { dbgs() << "Unprocessed value in a map!"; Failed = true; } - } else if (isTypeLegal(VT) || IgnoreNodeResults(&Node)) { + } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(&Node)) { if (Mapped > 1) { dbgs() << "Value with legal type was transformed!"; Failed = true; } } else { - // If the value can be kept in HW registers, softening machinery can - // leave it unchanged and don't put it to any map. - if (Mapped == 0 && - !(getTypeAction(VT) == TargetLowering::TypeSoftenFloat && - isLegalInHWReg(VT))) { + if (Mapped == 0) { dbgs() << "Processed value not in any map!"; Failed = true; } else if (Mapped & (Mapped - 1)) { @@ -257,13 +252,9 @@ bool DAGTypeLegalizer::run() { Changed = true; goto NodeDone; case TargetLowering::TypeSoftenFloat: - Changed = SoftenFloatResult(N, i); - if (Changed) - goto NodeDone; - // If not changed, the result type should be legally in register. - assert(isLegalInHWReg(ResultVT) && - "Unchanged SoftenFloatResult should be legal in register!"); - goto ScanOperands; + SoftenFloatResult(N, i); + Changed = true; + goto NodeDone; case TargetLowering::TypeExpandFloat: ExpandFloatResult(N, i); Changed = true; @@ -439,15 +430,9 @@ NodeDone: bool Failed = false; // Check that all result types are legal. - // A value type is illegal if its TypeAction is not TypeLegal, - // and TLI.RegClassForVT does not have a register class for this type. - // For example, the x86_64 target has f128 that is not TypeLegal, - // to have softened operators, but it also has FR128 register class to - // pass and return f128 values. Hence a legalized node can have f128 type. if (!IgnoreNodeResults(&Node)) for (unsigned i = 0, NumVals = Node.getNumValues(); i < NumVals; ++i) - if (!isTypeLegal(Node.getValueType(i)) && - !TLI.isTypeLegal(Node.getValueType(i))) { + if (!isTypeLegal(Node.getValueType(i))) { dbgs() << "Result type " << i << " illegal: "; Node.dump(&DAG); Failed = true; @@ -456,8 +441,7 @@ NodeDone: // Check that all operand types are legal. for (unsigned i = 0, NumOps = Node.getNumOperands(); i < NumOps; ++i) if (!IgnoreNodeResults(Node.getOperand(i).getNode()) && - !isTypeLegal(Node.getOperand(i).getValueType()) && - !TLI.isTypeLegal(Node.getOperand(i).getValueType())) { + !isTypeLegal(Node.getOperand(i).getValueType())) { dbgs() << "Operand type " << i << " illegal: "; Node.getOperand(i).dump(&DAG); Failed = true; @@ -713,23 +697,13 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { } void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { - // f128 of x86_64 could be kept in SSE registers, - // but sometimes softened to i128. - assert((Result.getValueType() == - TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) || - Op.getValueType() == - TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) && + assert(Result.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && "Invalid type for softened float"); AnalyzeNewValue(Result); auto &OpIdEntry = SoftenedFloats[getTableId(Op)]; - // Allow repeated calls to save f128 type nodes - // or any node with type that transforms to itself. - // Many operations on these types are not softened. - assert(((OpIdEntry == 0) || - Op.getValueType() == - TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) && - "Node is already converted to integer!"); + assert((OpIdEntry == 0) && "Node is already converted to integer!"); OpIdEntry = getTableId(Result); } @@ -1003,25 +977,27 @@ SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) { /// Convert the node into a libcall with the same prototype. SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned) { + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(isSigned); unsigned NumOps = N->getNumOperands(); SDLoc dl(N); if (NumOps == 0) { - return TLI.makeLibCall(DAG, LC, N->getValueType(0), None, isSigned, + return TLI.makeLibCall(DAG, LC, N->getValueType(0), None, CallOptions, dl).first; } else if (NumOps == 1) { SDValue Op = N->getOperand(0); - return TLI.makeLibCall(DAG, LC, N->getValueType(0), Op, isSigned, + return TLI.makeLibCall(DAG, LC, N->getValueType(0), Op, CallOptions, dl).first; } else if (NumOps == 2) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, isSigned, + return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, CallOptions, dl).first; } SmallVector<SDValue, 8> Ops(NumOps); for (unsigned i = 0; i < NumOps; ++i) Ops[i] = N->getOperand(i); - return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, isSigned, dl).first; + return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, CallOptions, dl).first; } /// Expand a node into a call to a libcall. Similar to ExpandLibCall except that diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 1d489b1b3a33..4afbae69128a 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -73,15 +73,6 @@ private: return VT.isSimple() && TLI.isTypeLegal(VT); } - /// Return true if this type can be passed in registers. - /// For example, x86_64's f128, should to be legally in registers - /// and only some operations converted to library calls or integer - /// bitwise operations. - bool isLegalInHWReg(EVT VT) const { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); - return VT == NVT && isSimpleLegalType(VT); - } - EVT getSetCCResultType(EVT VT) const { return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); } @@ -306,6 +297,7 @@ private: SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N); SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N); SDValue PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N); + SDValue PromoteIntRes_SPLAT_VECTOR(SDNode *N); SDValue PromoteIntRes_EXTEND_VECTOR_INREG(SDNode *N); SDValue PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N); SDValue PromoteIntRes_CONCAT_VECTORS(SDNode *N); @@ -363,6 +355,7 @@ private: SDValue PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N); SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N); + SDValue PromoteIntOp_SPLAT_VECTOR(SDNode *N); SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo); @@ -472,14 +465,11 @@ private: // Float to Integer Conversion Support: LegalizeFloatTypes.cpp //===--------------------------------------------------------------------===// - /// Given an operand Op of Float type, returns the integer if the Op is not - /// supported in target HW and converted to the integer. - /// The integer contains exactly the same bits as Op - only the type changed. - /// For example, if Op is an f32 which was softened to an i32, then this - /// method returns an i32, the bits of which coincide with those of Op. - /// If the Op can be efficiently supported in target HW or the operand must - /// stay in a register, the Op is not converted to an integer. - /// In that case, the given op is returned. + /// GetSoftenedFloat - Given a processed operand Op which was converted to an + /// integer of the same size, this returns the integer. The integer contains + /// exactly the same bits as Op - only the type changed. For example, if Op + /// is an f32 which was softened to an i32, then this method returns an i32, + /// the bits of which coincide with those of Op SDValue GetSoftenedFloat(SDValue Op) { TableId Id = getTableId(Op); auto Iter = SoftenedFloats.find(Id); @@ -494,19 +484,19 @@ private: } void SetSoftenedFloat(SDValue Op, SDValue Result); - // Convert Float Results to Integer for Non-HW-supported Operations. - bool SoftenFloatResult(SDNode *N, unsigned ResNo); + // Convert Float Results to Integer. + void SoftenFloatResult(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo); - SDValue SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_BITCAST(SDNode *N); SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N); - SDValue SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_ConstantFP(SDNode *N); SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N, unsigned ResNo); - SDValue SoftenFloatRes_FABS(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_FABS(SDNode *N); SDValue SoftenFloatRes_FMINNUM(SDNode *N); SDValue SoftenFloatRes_FMAXNUM(SDNode *N); SDValue SoftenFloatRes_FADD(SDNode *N); SDValue SoftenFloatRes_FCEIL(SDNode *N); - SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N); SDValue SoftenFloatRes_FCOS(SDNode *N); SDValue SoftenFloatRes_FDIV(SDNode *N); SDValue SoftenFloatRes_FEXP(SDNode *N); @@ -518,7 +508,7 @@ private: SDValue SoftenFloatRes_FMA(SDNode *N); SDValue SoftenFloatRes_FMUL(SDNode *N); SDValue SoftenFloatRes_FNEARBYINT(SDNode *N); - SDValue SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_FNEG(SDNode *N); SDValue SoftenFloatRes_FP_EXTEND(SDNode *N); SDValue SoftenFloatRes_FP16_TO_FP(SDNode *N); SDValue SoftenFloatRes_FP_ROUND(SDNode *N); @@ -531,27 +521,17 @@ private: SDValue SoftenFloatRes_FSQRT(SDNode *N); SDValue SoftenFloatRes_FSUB(SDNode *N); SDValue SoftenFloatRes_FTRUNC(SDNode *N); - SDValue SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo); - SDValue SoftenFloatRes_SELECT(SDNode *N, unsigned ResNo); - SDValue SoftenFloatRes_SELECT_CC(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_LOAD(SDNode *N); + SDValue SoftenFloatRes_SELECT(SDNode *N); + SDValue SoftenFloatRes_SELECT_CC(SDNode *N); SDValue SoftenFloatRes_UNDEF(SDNode *N); SDValue SoftenFloatRes_VAARG(SDNode *N); SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N); - // Return true if we can skip softening the given operand or SDNode because - // either it was soften before by SoftenFloatResult and references to the - // operand were replaced by ReplaceValueWith or it's value type is legal in HW - // registers and the operand can be left unchanged. - bool CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo); - - // Convert Float Operand to Integer for Non-HW-supported Operations. + // Convert Float Operand to Integer. bool SoftenFloatOperand(SDNode *N, unsigned OpNo); SDValue SoftenFloatOp_BITCAST(SDNode *N); - SDValue SoftenFloatOp_COPY_TO_REG(SDNode *N); SDValue SoftenFloatOp_BR_CC(SDNode *N); - SDValue SoftenFloatOp_FABS(SDNode *N); - SDValue SoftenFloatOp_FCOPYSIGN(SDNode *N); - SDValue SoftenFloatOp_FNEG(SDNode *N); SDValue SoftenFloatOp_FP_EXTEND(SDNode *N); SDValue SoftenFloatOp_FP_ROUND(SDNode *N); SDValue SoftenFloatOp_FP_TO_XINT(SDNode *N); @@ -559,7 +539,6 @@ private: SDValue SoftenFloatOp_LLROUND(SDNode *N); SDValue SoftenFloatOp_LRINT(SDNode *N); SDValue SoftenFloatOp_LLRINT(SDNode *N); - SDValue SoftenFloatOp_SELECT(SDNode *N); SDValue SoftenFloatOp_SELECT_CC(SDNode *N); SDValue SoftenFloatOp_SETCC(SDNode *N); SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo); @@ -715,6 +694,7 @@ private: bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo); SDValue ScalarizeVecOp_BITCAST(SDNode *N); SDValue ScalarizeVecOp_UnaryOp(SDNode *N); + SDValue ScalarizeVecOp_UnaryOp_StrictFP(SDNode *N); SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N); SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecOp_VSELECT(SDNode *N); @@ -830,6 +810,7 @@ private: SDValue WidenVecRes_Ternary(SDNode *N); SDValue WidenVecRes_Binary(SDNode *N); SDValue WidenVecRes_BinaryCanTrap(SDNode *N); + SDValue WidenVecRes_BinaryWithExtraScalarOp(SDNode *N); SDValue WidenVecRes_StrictFP(SDNode *N); SDValue WidenVecRes_OverflowOp(SDNode *N, unsigned ResNo); SDValue WidenVecRes_Convert(SDNode *N); @@ -933,6 +914,8 @@ private: void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi); void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVSETCC(const SDNode *N); + //===--------------------------------------------------------------------===// // Generic Expansion: LegalizeTypesGeneric.cpp //===--------------------------------------------------------------------===// diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 943f63f46c47..5562f400b6e1 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -52,17 +52,11 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { case TargetLowering::TypePromoteFloat: llvm_unreachable("Bitcast of a promotion-needing float should never need" "expansion"); - case TargetLowering::TypeSoftenFloat: { - // Expand the floating point operand only if it was converted to integers. - // Otherwise, it is a legal type like f128 that can be saved in a register. - auto SoftenedOp = GetSoftenedFloat(InOp); - if (isLegalInHWReg(SoftenedOp.getValueType())) - break; - SplitInteger(SoftenedOp, Lo, Hi); + case TargetLowering::TypeSoftenFloat: + SplitInteger(GetSoftenedFloat(InOp), Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); return; - } case TargetLowering::TypeExpandInteger: case TargetLowering::TypeExpandFloat: { auto &DL = DAG.getDataLayout(); @@ -509,23 +503,6 @@ void DAGTypeLegalizer::SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo, GetSplitOp(Op, Lo, Hi); } -static std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, - SelectionDAG &DAG) { - SDLoc DL(N); - EVT LoVT, HiVT; - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); - - // Split the inputs. - SDValue Lo, Hi, LL, LH, RL, RH; - std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); - std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); - - Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); - Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); - - return std::make_pair(Lo, Hi); -} - void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue LL, LH, RL, RH, CL, CH; SDLoc dl(N); @@ -537,16 +514,25 @@ void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, SDValue &Hi) { if (Cond.getValueType().isVector()) { if (SDValue Res = WidenVSELECTAndMask(N)) std::tie(CL, CH) = DAG.SplitVector(Res->getOperand(0), dl); - // It seems to improve code to generate two narrow SETCCs as opposed to - // splitting a wide result vector. - else if (Cond.getOpcode() == ISD::SETCC) - std::tie(CL, CH) = SplitVSETCC(Cond.getNode(), DAG); // Check if there are already splitted versions of the vector available and // use those instead of splitting the mask operand again. else if (getTypeAction(Cond.getValueType()) == TargetLowering::TypeSplitVector) GetSplitVector(Cond, CL, CH); - else + // It seems to improve code to generate two narrow SETCCs as opposed to + // splitting a wide result vector. + else if (Cond.getOpcode() == ISD::SETCC) { + // If the condition is a vXi1 vector, and the LHS of the setcc is a legal + // type and the setcc result type is the same vXi1, then leave the setcc + // alone. + EVT CondLHSVT = Cond.getOperand(0).getValueType(); + if (Cond.getValueType().getVectorElementType() == MVT::i1 && + isTypeLegal(CondLHSVT) && + getSetCCResultType(CondLHSVT) == Cond.getValueType()) + std::tie(CL, CH) = DAG.SplitVector(Cond, dl); + else + SplitVecRes_SETCC(Cond.getNode(), CL, CH); + } else std::tie(CL, CH) = DAG.SplitVector(Cond, dl); } diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 10b8b705869e..15c3a0b6cfad 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -38,6 +38,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MachineValueType.h" #include "llvm/Support/MathExtras.h" @@ -333,14 +334,27 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::STRICT_FFLOOR: case ISD::STRICT_FROUND: case ISD::STRICT_FTRUNC: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: case ISD::STRICT_FP_ROUND: case ISD::STRICT_FP_EXTEND: - // These pseudo-ops get legalized as if they were their non-strict - // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT - // is also legal, but if ISD::FSQRT requires expansion then so does - // ISD::STRICT_FSQRT. - Action = TLI.getStrictFPOperationAction(Node->getOpcode(), - Node->getValueType(0)); + Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); + // If we're asked to expand a strict vector floating-point operation, + // by default we're going to simply unroll it. That is usually the + // best approach, except in the case where the resulting strict (scalar) + // operations would themselves use the fallback mutation to non-strict. + // In that specific case, just do the fallback on the vector op. + if (Action == TargetLowering::Expand && + TLI.getStrictFPOperationAction(Node->getOpcode(), + Node->getValueType(0)) + == TargetLowering::Legal) { + EVT EltVT = Node->getValueType(0).getVectorElementType(); + if (TLI.getOperationAction(Node->getOpcode(), EltVT) + == TargetLowering::Expand && + TLI.getStrictFPOperationAction(Node->getOpcode(), EltVT) + == TargetLowering::Legal) + Action = TargetLowering::Legal; + } break; case ISD::ADD: case ISD::SUB: @@ -439,16 +453,13 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { break; case ISD::SMULFIX: case ISD::SMULFIXSAT: - case ISD::UMULFIX: { + case ISD::UMULFIX: + case ISD::UMULFIXSAT: { unsigned Scale = Node->getConstantOperandVal(2); Action = TLI.getFixedPointOperationAction(Node->getOpcode(), Node->getValueType(0), Scale); break; } - case ISD::FP_ROUND_INREG: - Action = TLI.getOperationAction(Node->getOpcode(), - cast<VTSDNode>(Node->getOperand(1))->getVT()); - break; case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: case ISD::VECREDUCE_ADD: @@ -820,6 +831,13 @@ SDValue VectorLegalizer::Expand(SDValue Op) { case ISD::SMULFIX: case ISD::UMULFIX: return ExpandFixedPointMul(Op); + case ISD::SMULFIXSAT: + case ISD::UMULFIXSAT: + // FIXME: We do not expand SMULFIXSAT/UMULFIXSAT here yet, not sure exactly + // why. Maybe it results in worse codegen compared to the unroll for some + // targets? This should probably be investigated. And if we still prefer to + // unroll an explanation could be helpful. + return DAG.UnrollVectorOp(Op.getNode()); case ISD::STRICT_FADD: case ISD::STRICT_FSUB: case ISD::STRICT_FMUL: @@ -844,6 +862,8 @@ SDValue VectorLegalizer::Expand(SDValue Op) { case ISD::STRICT_FFLOOR: case ISD::STRICT_FROUND: case ISD::STRICT_FTRUNC: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: return ExpandStrictFPOp(Op); case ISD::VECREDUCE_ADD: case ISD::VECREDUCE_MUL: @@ -1168,9 +1188,13 @@ SDValue VectorLegalizer::ExpandABS(SDValue Op) { SDValue VectorLegalizer::ExpandFP_TO_UINT(SDValue Op) { // Attempt to expand using TargetLowering. - SDValue Result; - if (TLI.expandFP_TO_UINT(Op.getNode(), Result, DAG)) + SDValue Result, Chain; + if (TLI.expandFP_TO_UINT(Op.getNode(), Result, Chain, DAG)) { + if (Op.getNode()->isStrictFPOpcode()) + // Relink the chain + DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Chain); return Result; + } // Otherwise go ahead and unroll. return DAG.UnrollVectorOp(Op.getNode()); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 7e4d52617977..3763e886cef2 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -52,7 +52,6 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break; case ISD::STRICT_FP_ROUND: R = ScalarizeVecRes_STRICT_FP_ROUND(N); break; case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break; - case ISD::FP_ROUND_INREG: R = ScalarizeVecRes_InregOp(N); break; case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break; case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break; @@ -171,6 +170,8 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::STRICT_FFLOOR: case ISD::STRICT_FROUND: case ISD::STRICT_FTRUNC: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: case ISD::STRICT_FP_EXTEND: R = ScalarizeVecRes_StrictFPOp(N); break; @@ -185,6 +186,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::SMULFIX: case ISD::SMULFIXSAT: case ISD::UMULFIX: + case ISD::UMULFIXSAT: R = ScalarizeVecRes_MULFIX(N); break; } @@ -604,6 +606,10 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { case ISD::UINT_TO_FP: Res = ScalarizeVecOp_UnaryOp(N); break; + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: + Res = ScalarizeVecOp_UnaryOp_StrictFP(N); + break; case ISD::CONCAT_VECTORS: Res = ScalarizeVecOp_CONCAT_VECTORS(N); break; @@ -679,6 +685,23 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) { return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Op); } +/// If the input is a vector that needs to be scalarized, it must be <1 x ty>. +/// Do the strict FP operation on the element instead. +SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp_StrictFP(SDNode *N) { + assert(N->getValueType(0).getVectorNumElements() == 1 && + "Unexpected vector type!"); + SDValue Elt = GetScalarizedVector(N->getOperand(1)); + SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N), + { N->getValueType(0).getScalarType(), MVT::Other }, + { N->getOperand(0), Elt }); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + // Revectorize the result so the types line up with what the uses of this + // expression expect. + return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res); +} + /// The vectors to concatenate have length one - use a BUILD_VECTOR instead. SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) { SmallVector<SDValue, 8> Ops(N->getNumOperands()); @@ -828,7 +851,6 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break; case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break; case ISD::INSERT_SUBVECTOR: SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break; - case ISD::FP_ROUND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break; case ISD::FCOPYSIGN: SplitVecRes_FCOPYSIGN(N, Lo, Hi); break; case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break; @@ -883,7 +905,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FP_ROUND: case ISD::STRICT_FP_ROUND: case ISD::FP_TO_SINT: + case ISD::STRICT_FP_TO_SINT: case ISD::FP_TO_UINT: + case ISD::STRICT_FP_TO_UINT: case ISD::FRINT: case ISD::FROUND: case ISD::FSIN: @@ -977,6 +1001,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::SMULFIX: case ISD::SMULFIXSAT: case ISD::UMULFIX: + case ISD::UMULFIXSAT: SplitVecRes_MULFIX(N, Lo, Hi); break; } @@ -1560,10 +1585,14 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, // Split Mask operand SDValue MaskLo, MaskHi; - if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) - GetSplitVector(Mask, MaskLo, MaskHi); - else - std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); + if (Mask.getOpcode() == ISD::SETCC) { + SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi); + } else { + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); + } EVT MemoryVT = MLD->getMemoryVT(); EVT LoMemVT, HiMemVT; @@ -1622,10 +1651,14 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, // Split Mask operand SDValue MaskLo, MaskHi; - if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) - GetSplitVector(Mask, MaskLo, MaskHi); - else - std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); + if (Mask.getOpcode() == ISD::SETCC) { + SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi); + } else { + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); + } EVT MemoryVT = MGT->getMemoryVT(); EVT LoMemVT, HiMemVT; @@ -1651,11 +1684,11 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale}; Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, OpsLo, - MMO); + MMO, MGT->getIndexType()); SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale}; Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, OpsHi, - MMO); + MMO, MGT->getIndexType()); // Build a factor node to remember that this load is independent of the // other one. @@ -1979,6 +2012,8 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { break; case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: case ISD::CTTZ: case ISD::CTLZ: case ISD::CTPOP: @@ -2293,7 +2328,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale}; SDValue Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, - OpsLo, MMO); + OpsLo, MMO, MGT->getIndexType()); MMO = DAG.getMachineFunction(). getMachineMemOperand(MGT->getPointerInfo(), @@ -2303,7 +2338,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale}; SDValue Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, - OpsHi, MMO); + OpsHi, MMO, MGT->getIndexType()); // Build a factor node to remember that this load is independent of the // other one. @@ -2340,12 +2375,16 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, else std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); + // Split Mask operand SDValue MaskLo, MaskHi; - if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) - // Split Mask operand - GetSplitVector(Mask, MaskLo, MaskHi); - else - std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL); + if (OpNo == 1 && Mask.getOpcode() == ISD::SETCC) { + SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi); + } else { + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL); + } SDValue Lo, Hi; MachineMemOperand *MMO = DAG.getMachineFunction(). @@ -2397,12 +2436,16 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, else std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); + // Split Mask operand SDValue MaskLo, MaskHi; - if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) - // Split Mask operand - GetSplitVector(Mask, MaskLo, MaskHi); - else - std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL); + if (OpNo == 1 && Mask.getOpcode() == ISD::SETCC) { + SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi); + } else { + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL); + } SDValue IndexHi, IndexLo; if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector) @@ -2418,7 +2461,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo, Scale}; Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(), - DL, OpsLo, MMO); + DL, OpsLo, MMO, N->getIndexType()); MMO = DAG.getMachineFunction(). getMachineMemOperand(N->getPointerInfo(), @@ -2430,7 +2473,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, // after another. SDValue OpsHi[] = {Lo, DataHi, MaskHi, Ptr, IndexHi, Scale}; return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(), - DL, OpsHi, MMO); + DL, OpsHi, MMO, N->getIndexType()); } SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { @@ -2596,7 +2639,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) { LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2)); HiRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Hi0, Hi1, N->getOperand(2)); SDValue Con = DAG.getNode(ISD::CONCAT_VECTORS, DL, WideResVT, LoRes, HiRes); - return PromoteTargetBoolean(Con, N->getValueType(0)); + + EVT OpVT = N->getOperand(0).getValueType(); + ISD::NodeType ExtendCode = + TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT)); + return DAG.getNode(ExtendCode, DL, N->getValueType(0), Con); } @@ -2663,7 +2710,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break; case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break; case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break; - case ISD::FP_ROUND_INREG: Res = WidenVecRes_InregOp(N); break; case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_SCALAR_TO_VECTOR(N); break; @@ -2719,6 +2765,15 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_BinaryCanTrap(N); break; + case ISD::SMULFIX: + case ISD::SMULFIXSAT: + case ISD::UMULFIX: + case ISD::UMULFIXSAT: + // These are binary operations, but with an extra operand that shouldn't + // be widened (the scale). + Res = WidenVecRes_BinaryWithExtraScalarOp(N); + break; + case ISD::STRICT_FADD: case ISD::STRICT_FSUB: case ISD::STRICT_FMUL: @@ -2790,6 +2845,8 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::STRICT_FP_EXTEND: case ISD::STRICT_FP_ROUND: + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: Res = WidenVecRes_Convert_StrictFP(N); break; @@ -2866,6 +2923,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, N->getFlags()); } +SDValue DAGTypeLegalizer::WidenVecRes_BinaryWithExtraScalarOp(SDNode *N) { + // Binary op widening, but with an extra operand that shouldn't be widened. + SDLoc dl(N); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + SDValue InOp3 = N->getOperand(2); + return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3, + N->getFlags()); +} + // Given a vector of operations that have been broken up to widen, see // if we can collect them together into the next widest legal VT. This // implementation is trap-safe. @@ -3716,7 +3784,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) { Scale }; SDValue Res = DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), N->getMemoryVT(), dl, Ops, - N->getMemOperand()); + N->getMemOperand(), N->getIndexType()); // Legalize the chain result - switch anything that used the old chain to // use the new one. @@ -4094,7 +4162,9 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { case ISD::FP_EXTEND: case ISD::STRICT_FP_EXTEND: case ISD::FP_TO_SINT: + case ISD::STRICT_FP_TO_SINT: case ISD::FP_TO_UINT: + case ISD::STRICT_FP_TO_UINT: case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: case ISD::TRUNCATE: @@ -4434,7 +4504,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_MGATHER(SDNode *N, unsigned OpNo) { SDValue Ops[] = {MG->getChain(), DataOp, Mask, MG->getBasePtr(), Index, Scale}; SDValue Res = DAG.getMaskedGather(MG->getVTList(), MG->getMemoryVT(), dl, Ops, - MG->getMemOperand()); + MG->getMemOperand(), MG->getIndexType()); ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); ReplaceValueWith(SDValue(N, 0), Res.getValue(0)); return SDValue(); @@ -4472,7 +4542,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) { Scale}; return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(), SDLoc(N), Ops, - MSC->getMemOperand()); + MSC->getMemOperand(), MSC->getIndexType()); } SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { @@ -4504,7 +4574,10 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { ISD::EXTRACT_SUBVECTOR, dl, ResVT, WideSETCC, DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); - return PromoteTargetBoolean(CC, VT); + EVT OpVT = N->getOperand(0).getValueType(); + ISD::NodeType ExtendCode = + TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT)); + return DAG.getNode(ExtendCode, dl, VT, CC); } SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) { @@ -4706,7 +4779,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, int LdWidth = LdVT.getSizeInBits(); int WidthDiff = WidenWidth - LdWidth; - unsigned LdAlign = LD->isVolatile() ? 0 : Align; // Allow wider loads. + unsigned LdAlign = (!LD->isSimple()) ? 0 : Align; // Allow wider loads. // Find the vector type that can load from. EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 2cb850fa1a3d..7ee44c808fcb 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -498,7 +498,7 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, // Check for def of register or earlyclobber register. for (; NumVals; --NumVals, ++i) { unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); } } else diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 34b4c8502353..ff806bdb822c 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -1188,6 +1188,10 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { if (!Pred.isArtificial()) AddPredQueued(NewSU, Pred); + // Make sure the clone comes after the original. (InstrEmitter assumes + // this ordering.) + AddPredQueued(NewSU, SDep(SU, SDep::Artificial)); + // Only copy scheduled successors. Cut them from old node's successor // list and move them over. SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; @@ -1374,7 +1378,7 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) { // Check for def of register or earlyclobber register. for (; NumVals; --NumVals, ++i) { unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI); } } else @@ -2358,7 +2362,7 @@ static bool hasOnlyLiveInOpers(const SUnit *SU) { PredSU->getNode()->getOpcode() == ISD::CopyFromReg) { unsigned Reg = cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { RetVal = true; continue; } @@ -2379,7 +2383,7 @@ static bool hasOnlyLiveOutUses(const SUnit *SU) { if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) { unsigned Reg = cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { RetVal = true; continue; } @@ -2948,8 +2952,8 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { // like other nodes from the perspective of scheduling heuristics. if (SDNode *N = SU.getNode()) if (N->getOpcode() == ISD::CopyToReg && - TargetRegisterInfo::isVirtualRegister - (cast<RegisterSDNode>(N->getOperand(1))->getReg())) + Register::isVirtualRegister( + cast<RegisterSDNode>(N->getOperand(1))->getReg())) continue; SDNode *PredFrameSetup = nullptr; @@ -2995,8 +2999,8 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { // like other nodes from the perspective of scheduling heuristics. if (SDNode *N = SU.getNode()) if (N->getOpcode() == ISD::CopyFromReg && - TargetRegisterInfo::isVirtualRegister - (cast<RegisterSDNode>(N->getOperand(1))->getReg())) + Register::isVirtualRegister( + cast<RegisterSDNode>(N->getOperand(1))->getReg())) continue; // Perform checks on the successors of PredSU. diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 568c6191e512..d4c1fb36475e 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -115,7 +115,7 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, return; unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) return; unsigned ResNo = User->getOperand(2).getResNo(); @@ -528,7 +528,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() { /// are input. This SUnit graph is similar to the SelectionDAG, but /// excludes nodes that aren't interesting to scheduling, and represents /// glued together nodes with a single SUnit. -void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) { +void ScheduleDAGSDNodes::BuildSchedGraph(AAResults *AA) { // Cluster certain nodes which should be scheduled together. ClusterNodes(); // Populate the SUnits array. @@ -656,7 +656,7 @@ void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use, if (Latency > 1 && Use->getOpcode() == ISD::CopyToReg && !BB->succ_empty()) { unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) // This copy is a liveout value. It is likely coalesced, so reduce the // latency so not to penalize the def. // FIXME: need target specific adjustment here? @@ -808,7 +808,7 @@ EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap, } else { // Copy from physical register. assert(I->getReg() && "Unknown physical register!"); - unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC); + Register VRBase = MRI.createVirtualRegister(SU->CopyDstRC); bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second; (void)isNew; // Silence compiler warning. assert(isNew && "Node emitted out of order - early"); @@ -909,6 +909,12 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { // Remember the source order of the inserted instruction. if (HasDbg) ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen, NewInsn); + + if (MDNode *MD = DAG->getHeapAllocSite(N)) { + if (NewInsn && NewInsn->isCall()) + MF.addCodeViewHeapAllocSite(NewInsn, MD); + } + GluedNodes.pop_back(); } auto NewInsn = @@ -917,6 +923,10 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { if (HasDbg) ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders, Seen, NewInsn); + if (MDNode *MD = DAG->getHeapAllocSite(SU->getNode())) { + if (NewInsn && NewInsn->isCall()) + MF.addCodeViewHeapAllocSite(NewInsn, MD); + } } // Insert all the dbg_values which have not already been inserted in source diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 5163b4fa4fd3..183ce4b0652d 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -26,6 +26,7 @@ namespace llvm { +class AAResults; class InstrItineraryData; /// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs. @@ -93,7 +94,7 @@ class InstrItineraryData; /// are input. This SUnit graph is similar to the SelectionDAG, but /// excludes nodes that aren't interesting to scheduling, and represents /// flagged together nodes with a single SUnit. - void BuildSchedGraph(AliasAnalysis *AA); + void BuildSchedGraph(AAResults *AA); /// InitNumRegDefsLeft - Determine the # of regs defined by this node. /// diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp index ab06b55b49fd..e7bac73678a7 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -63,14 +63,13 @@ private: /// HazardRec - The hazard recognizer to use. ScheduleHazardRecognizer *HazardRec; - /// AA - AliasAnalysis for making memory reference queries. - AliasAnalysis *AA; + /// AA - AAResults for making memory reference queries. + AAResults *AA; public: - ScheduleDAGVLIW(MachineFunction &mf, - AliasAnalysis *aa, + ScheduleDAGVLIW(MachineFunction &mf, AAResults *aa, SchedulingPriorityQueue *availqueue) - : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) { + : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) { const TargetSubtargetInfo &STI = mf.getSubtarget(); HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 5852e693fa9f..52a71b91d93f 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -859,9 +859,8 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) { break; case ISD::TargetExternalSymbol: { ExternalSymbolSDNode *ESN = cast<ExternalSymbolSDNode>(N); - Erased = TargetExternalSymbols.erase( - std::pair<std::string,unsigned char>(ESN->getSymbol(), - ESN->getTargetFlags())); + Erased = TargetExternalSymbols.erase(std::pair<std::string, unsigned>( + ESN->getSymbol(), ESN->getTargetFlags())); break; } case ISD::MCSymbol: { @@ -1084,6 +1083,7 @@ void SelectionDAG::clear() { ExternalSymbols.clear(); TargetExternalSymbols.clear(); MCSymbols.clear(); + SDCallSiteDbgInfo.clear(); std::fill(CondCodeNodes.begin(), CondCodeNodes.end(), static_cast<CondCodeSDNode*>(nullptr)); std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(), @@ -1353,7 +1353,7 @@ SDValue SelectionDAG::getConstantFP(double Val, const SDLoc &DL, EVT VT, SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t Offset, bool isTargetGA, - unsigned char TargetFlags) { + unsigned TargetFlags) { assert((TargetFlags == 0 || isTargetGA) && "Cannot set target flags on target-independent globals"); @@ -1400,7 +1400,7 @@ SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) { } SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget, - unsigned char TargetFlags) { + unsigned TargetFlags) { assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent jump tables"); unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable; @@ -1421,7 +1421,7 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget, SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, unsigned Alignment, int Offset, bool isTarget, - unsigned char TargetFlags) { + unsigned TargetFlags) { assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); if (Alignment == 0) @@ -1449,7 +1449,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, unsigned Alignment, int Offset, bool isTarget, - unsigned char TargetFlags) { + unsigned TargetFlags) { assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); if (Alignment == 0) @@ -1473,7 +1473,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, } SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset, - unsigned char TargetFlags) { + unsigned TargetFlags) { FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), None); ID.AddInteger(Index); @@ -1535,10 +1535,9 @@ SDValue SelectionDAG::getMCSymbol(MCSymbol *Sym, EVT VT) { } SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT, - unsigned char TargetFlags) { + unsigned TargetFlags) { SDNode *&N = - TargetExternalSymbols[std::pair<std::string,unsigned char>(Sym, - TargetFlags)]; + TargetExternalSymbols[std::pair<std::string, unsigned>(Sym, TargetFlags)]; if (N) return SDValue(N, 0); N = newSDNode<ExternalSymbolSDNode>(true, Sym, TargetFlags, VT); InsertNode(N); @@ -1802,9 +1801,8 @@ SDValue SelectionDAG::getLabelNode(unsigned Opcode, const SDLoc &dl, } SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT, - int64_t Offset, - bool isTarget, - unsigned char TargetFlags) { + int64_t Offset, bool isTarget, + unsigned TargetFlags) { unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress; FoldingSetNodeID ID; @@ -1900,20 +1898,19 @@ SDValue SelectionDAG::expandVAArg(SDNode *Node) { EVT VT = Node->getValueType(0); SDValue Tmp1 = Node->getOperand(0); SDValue Tmp2 = Node->getOperand(1); - unsigned Align = Node->getConstantOperandVal(3); + const MaybeAlign MA(Node->getConstantOperandVal(3)); SDValue VAListLoad = getLoad(TLI.getPointerTy(getDataLayout()), dl, Tmp1, Tmp2, MachinePointerInfo(V)); SDValue VAList = VAListLoad; - if (Align > TLI.getMinStackArgumentAlignment()) { - assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2"); - + if (MA && *MA > TLI.getMinStackArgumentAlignment()) { VAList = getNode(ISD::ADD, dl, VAList.getValueType(), VAList, - getConstant(Align - 1, dl, VAList.getValueType())); + getConstant(MA->value() - 1, dl, VAList.getValueType())); - VAList = getNode(ISD::AND, dl, VAList.getValueType(), VAList, - getConstant(-(int64_t)Align, dl, VAList.getValueType())); + VAList = + getNode(ISD::AND, dl, VAList.getValueType(), VAList, + getConstant(-(int64_t)MA->value(), dl, VAList.getValueType())); } // Increment the pointer, VAList, to the next vaarg @@ -2154,12 +2151,9 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits, } case ISD::OR: case ISD::XOR: - // If the LHS or RHS don't contribute bits to the or, drop them. - if (MaskedValueIsZero(V.getOperand(0), DemandedBits)) - return V.getOperand(1); - if (MaskedValueIsZero(V.getOperand(1), DemandedBits)) - return V.getOperand(0); - break; + case ISD::SIGN_EXTEND_INREG: + return TLI->SimplifyMultipleUseDemandedBits(V, DemandedBits, DemandedElts, + *this, 0); case ISD::SRL: // Only look at single-use SRLs. if (!V.getNode()->hasOneUse()) @@ -2203,15 +2197,6 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits, return getNode(ISD::ANY_EXTEND, SDLoc(V), V.getValueType(), DemandedSrc); break; } - case ISD::SIGN_EXTEND_INREG: - EVT ExVT = cast<VTSDNode>(V.getOperand(1))->getVT(); - unsigned ExVTBits = ExVT.getScalarSizeInBits(); - - // If none of the extended bits are demanded, eliminate the sextinreg. - if (DemandedBits.getActiveBits() <= ExVTBits) - return V.getOperand(0); - - break; } return SDValue(); } @@ -2395,15 +2380,39 @@ SDValue SelectionDAG::getSplatValue(SDValue V) { /// If a SHL/SRA/SRL node has a constant or splat constant shift amount that /// is less than the element bit-width of the shift node, return it. static const APInt *getValidShiftAmountConstant(SDValue V) { + unsigned BitWidth = V.getScalarValueSizeInBits(); if (ConstantSDNode *SA = isConstOrConstSplat(V.getOperand(1))) { // Shifting more than the bitwidth is not valid. const APInt &ShAmt = SA->getAPIntValue(); - if (ShAmt.ult(V.getScalarValueSizeInBits())) + if (ShAmt.ult(BitWidth)) return &ShAmt; } return nullptr; } +/// If a SHL/SRA/SRL node has constant vector shift amounts that are all less +/// than the element bit-width of the shift node, return the minimum value. +static const APInt *getValidMinimumShiftAmountConstant(SDValue V) { + unsigned BitWidth = V.getScalarValueSizeInBits(); + auto *BV = dyn_cast<BuildVectorSDNode>(V.getOperand(1)); + if (!BV) + return nullptr; + const APInt *MinShAmt = nullptr; + for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { + auto *SA = dyn_cast<ConstantSDNode>(BV->getOperand(i)); + if (!SA) + return nullptr; + // Shifting more than the bitwidth is not valid. + const APInt &ShAmt = SA->getAPIntValue(); + if (ShAmt.uge(BitWidth)) + return nullptr; + if (MinShAmt && MinShAmt->ule(ShAmt)) + continue; + MinShAmt = &ShAmt; + } + return MinShAmt; +} + /// Determine which bits of Op are known to be either zero or one and return /// them in Known. For vectors, the known bits are those that are shared by /// every vector element. @@ -2437,7 +2446,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, return Known; } - if (Depth == 6) + if (Depth >= MaxRecursionDepth) return Known; // Limit search depth. KnownBits Known2; @@ -2582,14 +2591,13 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, SDValue Src = Op.getOperand(0); ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1)); unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); + APInt DemandedSrc = APInt::getAllOnesValue(NumSrcElts); if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) { // Offset the demanded elts by the subvector index. uint64_t Idx = SubIdx->getZExtValue(); - APInt DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); - Known = computeKnownBits(Src, DemandedSrc, Depth + 1); - } else { - Known = computeKnownBits(Src, Depth + 1); + DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); } + Known = computeKnownBits(Src, DemandedSrc, Depth + 1); break; } case ISD::SCALAR_TO_VECTOR: { @@ -2800,25 +2808,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known.One.lshrInPlace(Shift); // High bits are known zero. Known.Zero.setHighBits(Shift); - } else if (auto *BV = dyn_cast<BuildVectorSDNode>(Op.getOperand(1))) { - // If the shift amount is a vector of constants see if we can bound - // the number of upper zero bits. - unsigned ShiftAmountMin = BitWidth; - for (unsigned i = 0; i != BV->getNumOperands(); ++i) { - if (auto *C = dyn_cast<ConstantSDNode>(BV->getOperand(i))) { - const APInt &ShAmt = C->getAPIntValue(); - if (ShAmt.ult(BitWidth)) { - ShiftAmountMin = std::min<unsigned>(ShiftAmountMin, - ShAmt.getZExtValue()); - continue; - } - } - // Don't know anything. - ShiftAmountMin = 0; - break; - } - - Known.Zero.setHighBits(ShiftAmountMin); + } else if (const APInt *ShMinAmt = getValidMinimumShiftAmountConstant(Op)) { + // Minimum shift high bits are known zero. + Known.Zero.setHighBits(ShMinAmt->getZExtValue()); } break; case ISD::SRA: @@ -3105,12 +3097,12 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, // If the first operand is non-negative or has all low bits zero, then // the upper bits are all zero. - if (Known2.Zero[BitWidth-1] || ((Known2.Zero & LowBits) == LowBits)) + if (Known2.isNonNegative() || LowBits.isSubsetOf(Known2.Zero)) Known.Zero |= ~LowBits; // If the first operand is negative and not all low bits are zero, then // the upper bits are all one. - if (Known2.One[BitWidth-1] && ((Known2.One & LowBits) != 0)) + if (Known2.isNegative() && LowBits.intersects(Known2.One)) Known.One |= ~LowBits; assert((Known.Zero & Known.One) == 0&&"Bits known to be one AND zero?"); } @@ -3427,7 +3419,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return Val.getNumSignBits(); } - if (Depth == 6) + if (Depth >= MaxRecursionDepth) return 1; // Limit search depth. if (!DemandedElts) @@ -3729,6 +3721,18 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); if (Tmp == 1) return 1; // Early out. return std::min(Tmp, Tmp2)-1; + case ISD::MUL: { + // The output of the Mul can be at most twice the valid bits in the inputs. + unsigned SignBitsOp0 = ComputeNumSignBits(Op.getOperand(0), Depth + 1); + if (SignBitsOp0 == 1) + break; + unsigned SignBitsOp1 = ComputeNumSignBits(Op.getOperand(1), Depth + 1); + if (SignBitsOp1 == 1) + break; + unsigned OutValidBits = + (VTBits - SignBitsOp0 + 1) + (VTBits - SignBitsOp1 + 1); + return OutValidBits > VTBits ? 1 : VTBits - OutValidBits + 1; + } case ISD::TRUNCATE: { // Check if the sign bits of source go down as far as the truncated value. unsigned NumSrcBits = Op.getOperand(0).getScalarValueSizeInBits(); @@ -3817,13 +3821,13 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, SDValue Src = Op.getOperand(0); ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1)); unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); + APInt DemandedSrc = APInt::getAllOnesValue(NumSrcElts); if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) { // Offset the demanded elts by the subvector index. uint64_t Idx = SubIdx->getZExtValue(); - APInt DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); - return ComputeNumSignBits(Src, DemandedSrc, Depth + 1); + DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); } - return ComputeNumSignBits(Src, Depth + 1); + return ComputeNumSignBits(Src, DemandedSrc, Depth + 1); } case ISD::CONCAT_VECTORS: { // Determine the minimum number of sign bits across all demanded @@ -3976,7 +3980,7 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const if (getTarget().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs()) return true; - if (Depth == 6) + if (Depth >= MaxRecursionDepth) return false; // Limit search depth. // TODO: Handle vectors. @@ -4645,7 +4649,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, return getUNDEF(VT); // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 - if ((getTarget().Options.UnsafeFPMath || Flags.hasNoSignedZeros()) && + if ((getTarget().Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) && OpOpcode == ISD::FSUB) return getNode(ISD::FSUB, DL, VT, Operand.getOperand(1), Operand.getOperand(0), Flags); @@ -5156,22 +5160,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (N2C && N2C->isNullValue()) return N1; break; - case ISD::FP_ROUND_INREG: { - EVT EVT = cast<VTSDNode>(N2)->getVT(); - assert(VT == N1.getValueType() && "Not an inreg round!"); - assert(VT.isFloatingPoint() && EVT.isFloatingPoint() && - "Cannot FP_ROUND_INREG integer types"); - assert(EVT.isVector() == VT.isVector() && - "FP_ROUND_INREG type should be vector iff the operand " - "type is vector!"); - assert((!EVT.isVector() || - EVT.getVectorNumElements() == VT.getVectorNumElements()) && - "Vector element counts must match in FP_ROUND_INREG"); - assert(EVT.bitsLE(VT) && "Not rounding down!"); - (void)EVT; - if (cast<VTSDNode>(N2)->getVT() == VT) return N1; // Not actually rounding. - break; - } case ISD::FP_ROUND: assert(VT.isFloatingPoint() && N1.getValueType().isFloatingPoint() && @@ -5382,7 +5370,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, std::swap(N1, N2); } else { switch (Opcode) { - case ISD::FP_ROUND_INREG: case ISD::SIGN_EXTEND_INREG: case ISD::SUB: return getUNDEF(VT); // fold op(undef, arg2) -> undef @@ -5770,7 +5757,7 @@ static void chainLoadsAndStoresForMemcpy(SelectionDAG &DAG, const SDLoc &dl, static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, - uint64_t Size, unsigned Align, + uint64_t Size, unsigned Alignment, bool isVol, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { @@ -5795,15 +5782,15 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, if (FI && !MFI.isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; unsigned SrcAlign = DAG.InferPtrAlignment(Src); - if (Align > SrcAlign) - SrcAlign = Align; + if (Alignment > SrcAlign) + SrcAlign = Alignment; ConstantDataArraySlice Slice; bool CopyFromConstant = isMemSrcFromConstant(Src, Slice); bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr; unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize); if (!TLI.findOptimalMemOpLowering( - MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), + MemOps, Limit, Size, (DstAlignCanChange ? 0 : Alignment), (isZeroConstant ? 0 : SrcAlign), /*IsMemset=*/false, /*ZeroMemset=*/false, /*MemcpyStrSrc=*/CopyFromConstant, /*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(), @@ -5818,15 +5805,15 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, // realignment. const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (!TRI->needsStackRealignment(MF)) - while (NewAlign > Align && - DL.exceedsNaturalStackAlignment(NewAlign)) - NewAlign /= 2; + while (NewAlign > Alignment && + DL.exceedsNaturalStackAlignment(Align(NewAlign))) + NewAlign /= 2; - if (NewAlign > Align) { + if (NewAlign > Alignment) { // Give the stack frame object a larger alignment if needed. if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign) MFI.setObjectAlignment(FI->getIndex(), NewAlign); - Align = NewAlign; + Alignment = NewAlign; } } @@ -5869,10 +5856,9 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, } Value = getMemsetStringVal(VT, dl, DAG, TLI, SubSlice); if (Value.getNode()) { - Store = DAG.getStore(Chain, dl, Value, - DAG.getMemBasePlusOffset(Dst, DstOff, dl), - DstPtrInfo.getWithOffset(DstOff), Align, - MMOFlags); + Store = DAG.getStore( + Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl), + DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags); OutChains.push_back(Store); } } @@ -5900,7 +5886,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, Store = DAG.getTruncStore( Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl), - DstPtrInfo.getWithOffset(DstOff), VT, Align, MMOFlags); + DstPtrInfo.getWithOffset(DstOff), VT, Alignment, MMOFlags); OutStoreChains.push_back(Store); } SrcOff += VTSize; @@ -6567,7 +6553,7 @@ SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, const SDLoc &dl) { SDValue SelectionDAG::getMemIntrinsicNode( unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue> Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align, - MachineMemOperand::Flags Flags, unsigned Size, const AAMDNodes &AAInfo) { + MachineMemOperand::Flags Flags, uint64_t Size, const AAMDNodes &AAInfo) { if (Align == 0) // Ensure that codegen never sees alignment 0 Align = getEVTAlignment(MemVT); @@ -6619,7 +6605,9 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, createOperands(N, Ops); } InsertNode(N); - return SDValue(N, 0); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; } SDValue SelectionDAG::getLifetimeNode(bool IsStart, const SDLoc &dl, @@ -7022,14 +7010,15 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef<SDValue> Ops, - MachineMemOperand *MMO) { + MachineMemOperand *MMO, + ISD::MemIndexType IndexType) { assert(Ops.size() == 6 && "Incompatible number of operands"); FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops); ID.AddInteger(VT.getRawBits()); ID.AddInteger(getSyntheticNodeSubclassData<MaskedGatherSDNode>( - dl.getIROrder(), VTs, VT, MMO)); + dl.getIROrder(), VTs, VT, MMO, IndexType)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { @@ -7038,7 +7027,7 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, } auto *N = newSDNode<MaskedGatherSDNode>(dl.getIROrder(), dl.getDebugLoc(), - VTs, VT, MMO); + VTs, VT, MMO, IndexType); createOperands(N, Ops); assert(N->getPassThru().getValueType() == N->getValueType(0) && @@ -7062,14 +7051,15 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl, SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl, ArrayRef<SDValue> Ops, - MachineMemOperand *MMO) { + MachineMemOperand *MMO, + ISD::MemIndexType IndexType) { assert(Ops.size() == 6 && "Incompatible number of operands"); FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::MSCATTER, VTs, Ops); ID.AddInteger(VT.getRawBits()); ID.AddInteger(getSyntheticNodeSubclassData<MaskedScatterSDNode>( - dl.getIROrder(), VTs, VT, MMO)); + dl.getIROrder(), VTs, VT, MMO, IndexType)); ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { @@ -7077,7 +7067,7 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl, return SDValue(E, 0); } auto *N = newSDNode<MaskedScatterSDNode>(dl.getIROrder(), dl.getDebugLoc(), - VTs, VT, MMO); + VTs, VT, MMO, IndexType); createOperands(N, Ops); assert(N->getMask().getValueType().getVectorNumElements() == @@ -7766,16 +7756,22 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) { case ISD::STRICT_FLOG: NewOpc = ISD::FLOG; break; case ISD::STRICT_FLOG10: NewOpc = ISD::FLOG10; break; case ISD::STRICT_FLOG2: NewOpc = ISD::FLOG2; break; + case ISD::STRICT_LRINT: NewOpc = ISD::LRINT; break; + case ISD::STRICT_LLRINT: NewOpc = ISD::LLRINT; break; case ISD::STRICT_FRINT: NewOpc = ISD::FRINT; break; case ISD::STRICT_FNEARBYINT: NewOpc = ISD::FNEARBYINT; break; case ISD::STRICT_FMAXNUM: NewOpc = ISD::FMAXNUM; break; case ISD::STRICT_FMINNUM: NewOpc = ISD::FMINNUM; break; case ISD::STRICT_FCEIL: NewOpc = ISD::FCEIL; break; case ISD::STRICT_FFLOOR: NewOpc = ISD::FFLOOR; break; + case ISD::STRICT_LROUND: NewOpc = ISD::LROUND; break; + case ISD::STRICT_LLROUND: NewOpc = ISD::LLROUND; break; case ISD::STRICT_FROUND: NewOpc = ISD::FROUND; break; case ISD::STRICT_FTRUNC: NewOpc = ISD::FTRUNC; break; case ISD::STRICT_FP_ROUND: NewOpc = ISD::FP_ROUND; break; case ISD::STRICT_FP_EXTEND: NewOpc = ISD::FP_EXTEND; break; + case ISD::STRICT_FP_TO_SINT: NewOpc = ISD::FP_TO_SINT; break; + case ISD::STRICT_FP_TO_UINT: NewOpc = ISD::FP_TO_UINT; break; } assert(Node->getNumValues() == 2 && "Unexpected number of results!"); @@ -7925,6 +7921,7 @@ MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &DL, CSEMap.InsertNode(N, IP); InsertNode(N); + NewSDValueDbgMsg(SDValue(N, 0), "Creating new machine node: ", this); return N; } @@ -8619,7 +8616,7 @@ SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad, // TokenFactor. SDValue OldChain = SDValue(OldLoad, 1); SDValue NewChain = SDValue(NewMemOp.getNode(), 1); - if (!OldLoad->hasAnyUseOfValue(1)) + if (OldChain == NewChain || !OldLoad->hasAnyUseOfValue(1)) return NewChain; SDValue TokenFactor = @@ -8812,7 +8809,7 @@ HandleSDNode::~HandleSDNode() { GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL, const GlobalValue *GA, EVT VT, - int64_t o, unsigned char TF) + int64_t o, unsigned TF) : SDNode(Opc, Order, DL, getSDVTList(VT)), Offset(o), TargetFlags(TF) { TheGlobal = GA; } @@ -8986,7 +8983,7 @@ bool SDValue::reachesChainWithoutSideEffects(SDValue Dest, // Loads don't have side effects, look through them. if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(*this)) { - if (!Ld->isVolatile()) + if (Ld->isUnordered()) return Ld->getChain().reachesChainWithoutSideEffects(Dest, Depth-1); } return false; @@ -9005,21 +9002,51 @@ void SDNode::intersectFlagsWith(const SDNodeFlags Flags) { SDValue SelectionDAG::matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp, - ArrayRef<ISD::NodeType> CandidateBinOps) { + ArrayRef<ISD::NodeType> CandidateBinOps, + bool AllowPartials) { // The pattern must end in an extract from index 0. if (Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT || !isNullConstant(Extract->getOperand(1))) return SDValue(); - SDValue Op = Extract->getOperand(0); - unsigned Stages = Log2_32(Op.getValueType().getVectorNumElements()); - // Match against one of the candidate binary ops. + SDValue Op = Extract->getOperand(0); if (llvm::none_of(CandidateBinOps, [Op](ISD::NodeType BinOp) { return Op.getOpcode() == unsigned(BinOp); })) return SDValue(); + // Floating-point reductions may require relaxed constraints on the final step + // of the reduction because they may reorder intermediate operations. + unsigned CandidateBinOp = Op.getOpcode(); + if (Op.getValueType().isFloatingPoint()) { + SDNodeFlags Flags = Op->getFlags(); + switch (CandidateBinOp) { + case ISD::FADD: + if (!Flags.hasNoSignedZeros() || !Flags.hasAllowReassociation()) + return SDValue(); + break; + default: + llvm_unreachable("Unhandled FP opcode for binop reduction"); + } + } + + // Matching failed - attempt to see if we did enough stages that a partial + // reduction from a subvector is possible. + auto PartialReduction = [&](SDValue Op, unsigned NumSubElts) { + if (!AllowPartials || !Op) + return SDValue(); + EVT OpVT = Op.getValueType(); + EVT OpSVT = OpVT.getScalarType(); + EVT SubVT = EVT::getVectorVT(*getContext(), OpSVT, NumSubElts); + if (!TLI->isExtractSubvectorCheap(SubVT, OpVT, 0)) + return SDValue(); + BinOp = (ISD::NodeType)CandidateBinOp; + return getNode( + ISD::EXTRACT_SUBVECTOR, SDLoc(Op), SubVT, Op, + getConstant(0, SDLoc(Op), TLI->getVectorIdxTy(getDataLayout()))); + }; + // At each stage, we're looking for something that looks like: // %s = shufflevector <8 x i32> %op, <8 x i32> undef, // <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, @@ -9030,10 +9057,16 @@ SelectionDAG::matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp, // <4,5,6,7,u,u,u,u> // <2,3,u,u,u,u,u,u> // <1,u,u,u,u,u,u,u> - unsigned CandidateBinOp = Op.getOpcode(); + // While a partial reduction match would be: + // <2,3,u,u,u,u,u,u> + // <1,u,u,u,u,u,u,u> + unsigned Stages = Log2_32(Op.getValueType().getVectorNumElements()); + SDValue PrevOp; for (unsigned i = 0; i < Stages; ++i) { + unsigned MaskEnd = (1 << i); + if (Op.getOpcode() != CandidateBinOp) - return SDValue(); + return PartialReduction(PrevOp, MaskEnd); SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); @@ -9049,12 +9082,14 @@ SelectionDAG::matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp, // The first operand of the shuffle should be the same as the other operand // of the binop. if (!Shuffle || Shuffle->getOperand(0) != Op) - return SDValue(); + return PartialReduction(PrevOp, MaskEnd); // Verify the shuffle has the expected (at this stage of the pyramid) mask. - for (int Index = 0, MaskEnd = 1 << i; Index < MaskEnd; ++Index) - if (Shuffle->getMaskElt(Index) != MaskEnd + Index) - return SDValue(); + for (int Index = 0; Index < (int)MaskEnd; ++Index) + if (Shuffle->getMaskElt(Index) != (int)(MaskEnd + Index)) + return PartialReduction(PrevOp, MaskEnd); + + PrevOp = Op; } BinOp = (ISD::NodeType)CandidateBinOp; @@ -9114,8 +9149,7 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { getShiftAmountOperand(Operands[0].getValueType(), Operands[1]))); break; - case ISD::SIGN_EXTEND_INREG: - case ISD::FP_ROUND_INREG: { + case ISD::SIGN_EXTEND_INREG: { EVT ExtVT = cast<VTSDNode>(Operands[1])->getVT().getVectorElementType(); Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands[0], @@ -9187,6 +9221,9 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD, int Dist) const { if (LD->isVolatile() || Base->isVolatile()) return false; + // TODO: probably too restrictive for atomics, revisit + if (!LD->isSimple()) + return false; if (LD->isIndexed() || Base->isIndexed()) return false; if (LD->getChain() != Base->getChain()) diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp index 9592bc30a4e1..3a53ab9717a4 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -14,6 +14,7 @@ #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" #include <cstdint> using namespace llvm; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index e818dd27c05e..8c15563fcd23 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -833,7 +833,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, // If the source register was virtual and if we know something about it, // add an assert node. - if (!TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) || + if (!Register::isVirtualRegister(Regs[Part + i]) || !RegisterVT.isInteger()) continue; @@ -948,8 +948,7 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size()); if (HasMatching) Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx); - else if (!Regs.empty() && - TargetRegisterInfo::isVirtualRegister(Regs.front())) { + else if (!Regs.empty() && Register::isVirtualRegister(Regs.front())) { // Put the register class of the virtual registers in the flag word. That // way, later passes can recompute register class constraints for inline // assembly as well as normal instructions. @@ -1810,7 +1809,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { // offsets to its parts don't wrap either. SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr, Offsets[i]); - SDValue Val = RetOp.getValue(i); + SDValue Val = RetOp.getValue(RetOp.getResNo() + i); if (MemVTs[i] != ValueVTs[i]) Val = DAG.getPtrExtOrTrunc(Val, getCurSDLoc(), MemVTs[i]); Chains[i] = DAG.getStore(Chain, getCurSDLoc(), Val, @@ -2263,7 +2262,7 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) { Instruction::BinaryOps Opcode = BOp->getOpcode(); if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() && - !I.getMetadata(LLVMContext::MD_unpredictable) && + !I.hasMetadata(LLVMContext::MD_unpredictable) && (Opcode == Instruction::And || Opcode == Instruction::Or)) { FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, Opcode, @@ -2600,9 +2599,11 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, void SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setDiscardResult(true); SDValue Chain = TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid, - None, false, getCurSDLoc(), false, false).second; + None, CallOptions, getCurSDLoc()).second; // On PS4, the "return address" must still be within the calling function, // even if it's at the very end, so emit an explicit TRAP here. // Passing 'true' for doesNotReturn above won't generate the trap for us. @@ -2618,24 +2619,18 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB) { SDLoc dl = getCurSDLoc(); - // Subtract the minimum value + // Subtract the minimum value. SDValue SwitchOp = getValue(B.SValue); EVT VT = SwitchOp.getValueType(); - SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp, - DAG.getConstant(B.First, dl, VT)); - - // Check range - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - SDValue RangeCmp = DAG.getSetCC( - dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), - Sub.getValueType()), - Sub, DAG.getConstant(B.Range, dl, VT), ISD::SETUGT); + SDValue RangeSub = + DAG.getNode(ISD::SUB, dl, VT, SwitchOp, DAG.getConstant(B.First, dl, VT)); // Determine the type of the test operands. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); bool UsePtrType = false; - if (!TLI.isTypeLegal(VT)) + if (!TLI.isTypeLegal(VT)) { UsePtrType = true; - else { + } else { for (unsigned i = 0, e = B.Cases.size(); i != e; ++i) if (!isUIntN(VT.getSizeInBits(), B.Cases[i].Mask)) { // Switch table case range are encoded into series of masks. @@ -2644,6 +2639,7 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, break; } } + SDValue Sub = RangeSub; if (UsePtrType) { VT = TLI.getPointerTy(DAG.getDataLayout()); Sub = DAG.getZExtOrTrunc(Sub, dl, VT); @@ -2655,20 +2651,29 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, MachineBasicBlock* MBB = B.Cases[0].ThisBB; - addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb); + if (!B.OmitRangeCheck) + addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb); addSuccessorWithProb(SwitchBB, MBB, B.Prob); SwitchBB->normalizeSuccProbs(); - SDValue BrRange = DAG.getNode(ISD::BRCOND, dl, - MVT::Other, CopyTo, RangeCmp, - DAG.getBasicBlock(B.Default)); + SDValue Root = CopyTo; + if (!B.OmitRangeCheck) { + // Conditional branch to the default block. + SDValue RangeCmp = DAG.getSetCC(dl, + TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), + RangeSub.getValueType()), + RangeSub, DAG.getConstant(B.Range, dl, RangeSub.getValueType()), + ISD::SETUGT); + + Root = DAG.getNode(ISD::BRCOND, dl, MVT::Other, Root, RangeCmp, + DAG.getBasicBlock(B.Default)); + } // Avoid emitting unnecessary branches to the next block. if (MBB != NextBlock(SwitchBB)) - BrRange = DAG.getNode(ISD::BR, dl, MVT::Other, BrRange, - DAG.getBasicBlock(MBB)); + Root = DAG.getNode(ISD::BR, dl, MVT::Other, Root, DAG.getBasicBlock(MBB)); - DAG.setRoot(BrRange); + DAG.setRoot(Root); } /// visitBitTestCase - this function produces one "bit test" @@ -3266,8 +3271,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) { // We care about the legality of the operation after it has been type // legalized. - while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal && - VT != TLI.getTypeToTransformTo(Ctx, VT)) + while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal) VT = TLI.getTypeToTransformTo(Ctx, VT); // If the vselect is legal, assume we want to leave this as a vector setcc + @@ -3534,17 +3538,32 @@ void SelectionDAGBuilder::visitExtractElement(const User &I) { void SelectionDAGBuilder::visitShuffleVector(const User &I) { SDValue Src1 = getValue(I.getOperand(0)); SDValue Src2 = getValue(I.getOperand(1)); + Constant *MaskV = cast<Constant>(I.getOperand(2)); SDLoc DL = getCurSDLoc(); - - SmallVector<int, 8> Mask; - ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask); - unsigned MaskNumElts = Mask.size(); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); EVT SrcVT = Src1.getValueType(); unsigned SrcNumElts = SrcVT.getVectorNumElements(); + if (MaskV->isNullValue() && VT.isScalableVector()) { + // Canonical splat form of first element of first input vector. + SDValue FirstElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, + SrcVT.getScalarType(), Src1, + DAG.getConstant(0, DL, + TLI.getVectorIdxTy(DAG.getDataLayout()))); + setValue(&I, DAG.getNode(ISD::SPLAT_VECTOR, DL, VT, FirstElt)); + return; + } + + // For now, we only handle splats for scalable vectors. + // The DAGCombiner will perform a BUILD_VECTOR -> SPLAT_VECTOR transformation + // for targets that support a SPLAT_VECTOR for non-scalable vector types. + assert(!VT.isScalableVector() && "Unsupported scalable vector shuffle"); + + SmallVector<int, 8> Mask; + ShuffleVectorInst::getShuffleMask(MaskV, Mask); + unsigned MaskNumElts = Mask.size(); + if (SrcNumElts == MaskNumElts) { setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, Mask)); return; @@ -3825,7 +3844,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { // Normalize Vector GEP - all scalar operands should be converted to the // splat vector. unsigned VectorWidth = I.getType()->isVectorTy() ? - cast<VectorType>(I.getType())->getVectorNumElements() : 0; + I.getType()->getVectorNumElements() : 0; if (VectorWidth && !N.getValueType().isVector()) { LLVMContext &Context = *DAG.getContext(); @@ -3858,12 +3877,11 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { // If this is a scalar constant or a splat vector of constants, // handle it quickly. - const auto *CI = dyn_cast<ConstantInt>(Idx); - if (!CI && isa<ConstantDataVector>(Idx) && - cast<ConstantDataVector>(Idx)->getSplatValue()) - CI = cast<ConstantInt>(cast<ConstantDataVector>(Idx)->getSplatValue()); + const auto *C = dyn_cast<Constant>(Idx); + if (C && isa<VectorType>(C->getType())) + C = C->getSplatValue(); - if (CI) { + if (const auto *CI = dyn_cast_or_null<ConstantInt>(C)) { if (CI->isZero()) continue; APInt Offs = ElementSize * CI->getValue().sextOrTrunc(IdxSize); @@ -3872,7 +3890,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorWidth)) : DAG.getConstant(Offs, dl, IdxTy); - // In an inbouds GEP with an offset that is nonnegative even when + // In an inbounds GEP with an offset that is nonnegative even when // interpreted as signed, assume there is no unsigned overflow. SDNodeFlags Flags; if (Offs.isNonNegative() && cast<GEPOperator>(I).isInBounds()) @@ -4002,8 +4020,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { Type *Ty = I.getType(); bool isVolatile = I.isVolatile(); - bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr; - bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr; + bool isNonTemporal = I.hasMetadata(LLVMContext::MD_nontemporal); + bool isInvariant = I.hasMetadata(LLVMContext::MD_invariant_load); bool isDereferenceable = isDereferenceablePointer(SV, I.getType(), DAG.getDataLayout()); unsigned Alignment = I.getAlignment(); @@ -4118,7 +4136,7 @@ void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) { SDValue Src = getValue(SrcV); // Create a virtual register, then update the virtual register. - unsigned VReg = + Register VReg = SwiftError.getOrCreateVRegDefAt(&I, FuncInfo.MBB, I.getPointerOperand()); // Chain, DL, Reg, N or Chain, DL, Reg, N, Glue // Chain can be getRoot or getControlRoot. @@ -4132,8 +4150,8 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) { "call visitLoadFromSwiftError when backend supports swifterror"); assert(!I.isVolatile() && - I.getMetadata(LLVMContext::MD_nontemporal) == nullptr && - I.getMetadata(LLVMContext::MD_invariant_load) == nullptr && + !I.hasMetadata(LLVMContext::MD_nontemporal) && + !I.hasMetadata(LLVMContext::MD_invariant_load) && "Support volatile, non temporal, invariant for load_from_swift_error"); const Value *SV = I.getOperand(0); @@ -4209,7 +4227,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { auto MMOFlags = MachineMemOperand::MONone; if (I.isVolatile()) MMOFlags |= MachineMemOperand::MOVolatile; - if (I.getMetadata(LLVMContext::MD_nontemporal) != nullptr) + if (I.hasMetadata(LLVMContext::MD_nontemporal)) MMOFlags |= MachineMemOperand::MONonTemporal; MMOFlags |= TLI.getMMOFlags(I); @@ -4309,8 +4327,9 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, // are looking for. If first operand of the GEP is a splat vector - we // extract the splat value and use it as a uniform base. // In all other cases the function returns 'false'. -static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index, - SDValue &Scale, SelectionDAGBuilder* SDB) { +static bool getUniformBase(const Value *&Ptr, SDValue &Base, SDValue &Index, + ISD::MemIndexType &IndexType, SDValue &Scale, + SelectionDAGBuilder *SDB) { SelectionDAG& DAG = SDB->DAG; LLVMContext &Context = *DAG.getContext(); @@ -4330,8 +4349,13 @@ static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index, // Ensure all the other indices are 0. for (unsigned i = 1; i < FinalIndex; ++i) { - auto *C = dyn_cast<ConstantInt>(GEP->getOperand(i)); - if (!C || !C->isZero()) + auto *C = dyn_cast<Constant>(GEP->getOperand(i)); + if (!C) + return false; + if (isa<VectorType>(C->getType())) + C = C->getSplatValue(); + auto *CI = dyn_cast_or_null<ConstantInt>(C); + if (!CI || !CI->isZero()) return false; } @@ -4346,6 +4370,7 @@ static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index, SDB->getCurSDLoc(), TLI.getPointerTy(DL)); Base = SDB->getValue(Ptr); Index = SDB->getValue(IndexVal); + IndexType = ISD::SIGNED_SCALED; if (!Index.getValueType().isVector()) { unsigned GEPWidth = GEP->getType()->getVectorNumElements(); @@ -4373,9 +4398,11 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { SDValue Base; SDValue Index; + ISD::MemIndexType IndexType; SDValue Scale; const Value *BasePtr = Ptr; - bool UniformBase = getUniformBase(BasePtr, Base, Index, Scale, this); + bool UniformBase = getUniformBase(BasePtr, Base, Index, IndexType, Scale, + this); const Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr; MachineMemOperand *MMO = DAG.getMachineFunction(). @@ -4385,11 +4412,12 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { if (!UniformBase) { Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(Ptr); + IndexType = ISD::SIGNED_SCALED; Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout())); } SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index, Scale }; SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl, - Ops, MMO); + Ops, MMO, IndexType); DAG.setRoot(Scatter); setValue(&I, Scatter); } @@ -4476,9 +4504,11 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { SDValue Root = DAG.getRoot(); SDValue Base; SDValue Index; + ISD::MemIndexType IndexType; SDValue Scale; const Value *BasePtr = Ptr; - bool UniformBase = getUniformBase(BasePtr, Base, Index, Scale, this); + bool UniformBase = getUniformBase(BasePtr, Base, Index, IndexType, Scale, + this); bool ConstantMemory = false; if (UniformBase && AA && AA->pointsToConstantMemory( @@ -4500,11 +4530,12 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { if (!UniformBase) { Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(Ptr); + IndexType = ISD::SIGNED_SCALED; Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout())); } SDValue Ops[] = { Root, Src0, Mask, Base, Index, Scale }; SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl, - Ops, MMO); + Ops, MMO, IndexType); SDValue OutChain = Gather.getValue(1); if (!ConstantMemory) @@ -4628,7 +4659,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { auto Flags = MachineMemOperand::MOLoad; if (I.isVolatile()) Flags |= MachineMemOperand::MOVolatile; - if (I.getMetadata(LLVMContext::MD_invariant_load) != nullptr) + if (I.hasMetadata(LLVMContext::MD_invariant_load)) Flags |= MachineMemOperand::MOInvariant; if (isDereferenceablePointer(I.getPointerOperand(), I.getType(), DAG.getDataLayout())) @@ -4645,9 +4676,27 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { AAMDNodes(), nullptr, SSID, Order); InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG); - SDValue L = - DAG.getAtomic(ISD::ATOMIC_LOAD, dl, MemVT, MemVT, InChain, - getValue(I.getPointerOperand()), MMO); + + SDValue Ptr = getValue(I.getPointerOperand()); + + if (TLI.lowerAtomicLoadAsLoadSDNode(I)) { + // TODO: Once this is better exercised by tests, it should be merged with + // the normal path for loads to prevent future divergence. + SDValue L = DAG.getLoad(MemVT, dl, InChain, Ptr, MMO); + if (MemVT != VT) + L = DAG.getPtrExtOrTrunc(L, dl, VT); + + setValue(&I, L); + SDValue OutChain = L.getValue(1); + if (!I.isUnordered()) + DAG.setRoot(OutChain); + else + PendingLoads.push_back(OutChain); + return; + } + + SDValue L = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, MemVT, MemVT, InChain, + Ptr, MMO); SDValue OutChain = L.getValue(1); if (MemVT != VT) @@ -4686,9 +4735,17 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { SDValue Val = getValue(I.getValueOperand()); if (Val.getValueType() != MemVT) Val = DAG.getPtrExtOrTrunc(Val, dl, MemVT); + SDValue Ptr = getValue(I.getPointerOperand()); + if (TLI.lowerAtomicStoreAsStoreSDNode(I)) { + // TODO: Once this is better exercised by tests, it should be merged with + // the normal path for stores to prevent future divergence. + SDValue S = DAG.getStore(InChain, dl, Val, Ptr, MMO); + DAG.setRoot(S); + return; + } SDValue OutChain = DAG.getAtomic(ISD::ATOMIC_STORE, dl, MemVT, InChain, - getValue(I.getPointerOperand()), Val, MMO); + Ptr, Val, MMO); DAG.setRoot(OutChain); @@ -4731,8 +4788,22 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, // Add all operands of the call to the operand list. for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { - SDValue Op = getValue(I.getArgOperand(i)); - Ops.push_back(Op); + const Value *Arg = I.getArgOperand(i); + if (!I.paramHasAttr(i, Attribute::ImmArg)) { + Ops.push_back(getValue(Arg)); + continue; + } + + // Use TargetConstant instead of a regular constant for immarg. + EVT VT = TLI.getValueType(*DL, Arg->getType(), true); + if (const ConstantInt *CI = dyn_cast<ConstantInt>(Arg)) { + assert(CI->getBitWidth() <= 64 && + "large intrinsic immediates not handled"); + Ops.push_back(DAG.getTargetConstant(*CI, SDLoc(), VT)); + } else { + Ops.push_back( + DAG.getTargetConstantFP(*cast<ConstantFP>(Arg), SDLoc(), VT)); + } } SmallVector<EVT, 4> ValueVTs; @@ -4749,10 +4820,10 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, // This is target intrinsic that touches memory AAMDNodes AAInfo; I.getAAMetadata(AAInfo); - Result = - DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT, - MachinePointerInfo(Info.ptrVal, Info.offset), - Info.align, Info.flags, Info.size, AAInfo); + Result = DAG.getMemIntrinsicNode( + Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT, + MachinePointerInfo(Info.ptrVal, Info.offset), + Info.align ? Info.align->value() : 0, Info.flags, Info.size, AAInfo); } else if (!HasChain) { Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops); } else if (!I.getType()->isVoidTy()) { @@ -4918,12 +4989,11 @@ static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, // Put the exponent in the right bit position for later addition to the // final result: // - // #define LOG2OFe 1.4426950f - // t0 = Op * LOG2OFe + // t0 = Op * log2(e) // TODO: What fast-math-flags should be set here? SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op, - getF32Constant(DAG, 0x3fb8aa3b, dl)); + DAG.getConstantFP(numbers::log2ef, dl, MVT::f32)); return getLimitedPrecisionExp2(t0, dl, DAG); } @@ -4941,10 +5011,11 @@ static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG, LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); - // Scale the exponent by log(2) [0.69314718f]. + // Scale the exponent by log(2). SDValue Exp = GetExponent(DAG, Op1, TLI, dl); - SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, - getF32Constant(DAG, 0x3f317218, dl)); + SDValue LogOfExponent = + DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, + DAG.getConstantFP(numbers::ln2f, dl, MVT::f32)); // Get the significand and build it into a floating-point number with // exponent of 1. @@ -5311,19 +5382,32 @@ static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS, return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS); } -// getUnderlyingArgReg - Find underlying register used for a truncated or -// bitcasted argument. -static unsigned getUnderlyingArgReg(const SDValue &N) { +// getUnderlyingArgRegs - Find underlying registers used for a truncated, +// bitcasted, or split argument. Returns a list of <Register, size in bits> +static void +getUnderlyingArgRegs(SmallVectorImpl<std::pair<unsigned, unsigned>> &Regs, + const SDValue &N) { switch (N.getOpcode()) { - case ISD::CopyFromReg: - return cast<RegisterSDNode>(N.getOperand(1))->getReg(); + case ISD::CopyFromReg: { + SDValue Op = N.getOperand(1); + Regs.emplace_back(cast<RegisterSDNode>(Op)->getReg(), + Op.getValueType().getSizeInBits()); + return; + } case ISD::BITCAST: case ISD::AssertZext: case ISD::AssertSext: case ISD::TRUNCATE: - return getUnderlyingArgReg(N.getOperand(0)); + getUnderlyingArgRegs(Regs, N.getOperand(0)); + return; + case ISD::BUILD_PAIR: + case ISD::BUILD_VECTOR: + case ISD::CONCAT_VECTORS: + for (SDValue Op : N->op_values()) + getUnderlyingArgRegs(Regs, Op); + return; default: - return 0; + return; } } @@ -5412,11 +5496,16 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( if (FI != std::numeric_limits<int>::max()) Op = MachineOperand::CreateFI(FI); + SmallVector<std::pair<unsigned, unsigned>, 8> ArgRegsAndSizes; if (!Op && N.getNode()) { - unsigned Reg = getUnderlyingArgReg(N); - if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) { + getUnderlyingArgRegs(ArgRegsAndSizes, N); + Register Reg; + if (ArgRegsAndSizes.size() == 1) + Reg = ArgRegsAndSizes.front().first; + + if (Reg && Reg.isVirtual()) { MachineRegisterInfo &RegInfo = MF.getRegInfo(); - unsigned PR = RegInfo.getLiveInPhysReg(Reg); + Register PR = RegInfo.getLiveInPhysReg(Reg); if (PR) Reg = PR; } @@ -5436,29 +5525,42 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( } if (!Op) { + // Create a DBG_VALUE for each decomposed value in ArgRegs to cover Reg + auto splitMultiRegDbgValue + = [&](ArrayRef<std::pair<unsigned, unsigned>> SplitRegs) { + unsigned Offset = 0; + for (auto RegAndSize : SplitRegs) { + auto FragmentExpr = DIExpression::createFragmentExpression( + Expr, Offset, RegAndSize.second); + if (!FragmentExpr) + continue; + assert(!IsDbgDeclare && "DbgDeclare operand is not in memory?"); + FuncInfo.ArgDbgValues.push_back( + BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), false, + RegAndSize.first, Variable, *FragmentExpr)); + Offset += RegAndSize.second; + } + }; + // Check if ValueMap has reg number. - DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V); + DenseMap<const Value *, unsigned>::const_iterator + VMI = FuncInfo.ValueMap.find(V); if (VMI != FuncInfo.ValueMap.end()) { const auto &TLI = DAG.getTargetLoweringInfo(); RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second, V->getType(), getABIRegCopyCC(V)); if (RFV.occupiesMultipleRegs()) { - unsigned Offset = 0; - for (auto RegAndSize : RFV.getRegsAndSizes()) { - Op = MachineOperand::CreateReg(RegAndSize.first, false); - auto FragmentExpr = DIExpression::createFragmentExpression( - Expr, Offset, RegAndSize.second); - if (!FragmentExpr) - continue; - FuncInfo.ArgDbgValues.push_back( - BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsDbgDeclare, - Op->getReg(), Variable, *FragmentExpr)); - Offset += RegAndSize.second; - } + splitMultiRegDbgValue(RFV.getRegsAndSizes()); return true; } + Op = MachineOperand::CreateReg(VMI->second, false); IsIndirect = IsDbgDeclare; + } else if (ArgRegsAndSizes.size() > 1) { + // This was split due to the calling convention, and no virtual register + // mapping exists for the value. + splitMultiRegDbgValue(ArgRegsAndSizes); + return true; } } @@ -5468,8 +5570,10 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( assert(Variable->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); IsIndirect = (Op->isReg()) ? IsIndirect : true; + if (IsIndirect) + Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref}); FuncInfo.ArgDbgValues.push_back( - BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect, + BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), false, *Op, Variable, Expr)); return true; @@ -5554,11 +5658,11 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, return; case Intrinsic::sponentry: setValue(&I, DAG.getNode(ISD::SPONENTRY, sdl, - TLI.getPointerTy(DAG.getDataLayout()))); + TLI.getFrameIndexTy(DAG.getDataLayout()))); return; case Intrinsic::frameaddress: setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, - TLI.getPointerTy(DAG.getDataLayout()), + TLI.getFrameIndexTy(DAG.getDataLayout()), getValue(I.getArgOperand(0)))); return; case Intrinsic::read_register: { @@ -5888,65 +5992,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::masked_compressstore: visitMaskedStore(I, true /* IsCompressing */); return; - case Intrinsic::x86_mmx_pslli_w: - case Intrinsic::x86_mmx_pslli_d: - case Intrinsic::x86_mmx_pslli_q: - case Intrinsic::x86_mmx_psrli_w: - case Intrinsic::x86_mmx_psrli_d: - case Intrinsic::x86_mmx_psrli_q: - case Intrinsic::x86_mmx_psrai_w: - case Intrinsic::x86_mmx_psrai_d: { - SDValue ShAmt = getValue(I.getArgOperand(1)); - if (isa<ConstantSDNode>(ShAmt)) { - visitTargetIntrinsic(I, Intrinsic); - return; - } - unsigned NewIntrinsic = 0; - EVT ShAmtVT = MVT::v2i32; - switch (Intrinsic) { - case Intrinsic::x86_mmx_pslli_w: - NewIntrinsic = Intrinsic::x86_mmx_psll_w; - break; - case Intrinsic::x86_mmx_pslli_d: - NewIntrinsic = Intrinsic::x86_mmx_psll_d; - break; - case Intrinsic::x86_mmx_pslli_q: - NewIntrinsic = Intrinsic::x86_mmx_psll_q; - break; - case Intrinsic::x86_mmx_psrli_w: - NewIntrinsic = Intrinsic::x86_mmx_psrl_w; - break; - case Intrinsic::x86_mmx_psrli_d: - NewIntrinsic = Intrinsic::x86_mmx_psrl_d; - break; - case Intrinsic::x86_mmx_psrli_q: - NewIntrinsic = Intrinsic::x86_mmx_psrl_q; - break; - case Intrinsic::x86_mmx_psrai_w: - NewIntrinsic = Intrinsic::x86_mmx_psra_w; - break; - case Intrinsic::x86_mmx_psrai_d: - NewIntrinsic = Intrinsic::x86_mmx_psra_d; - break; - default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. - } - - // The vector shift intrinsics with scalars uses 32b shift amounts but - // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits - // to be zero. - // We must do this early because v2i32 is not a legal type. - SDValue ShOps[2]; - ShOps[0] = ShAmt; - ShOps[1] = DAG.getConstant(0, sdl, MVT::i32); - ShAmt = DAG.getBuildVector(ShAmtVT, sdl, ShOps); - EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); - ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt); - Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT, - DAG.getConstant(NewIntrinsic, sdl, MVT::i32), - getValue(I.getArgOperand(0)), ShAmt); - setValue(&I, Res); - return; - } case Intrinsic::powi: setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), DAG)); @@ -6063,6 +6108,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::experimental_constrained_fdiv: case Intrinsic::experimental_constrained_frem: case Intrinsic::experimental_constrained_fma: + case Intrinsic::experimental_constrained_fptosi: + case Intrinsic::experimental_constrained_fptoui: case Intrinsic::experimental_constrained_fptrunc: case Intrinsic::experimental_constrained_fpext: case Intrinsic::experimental_constrained_sqrt: @@ -6075,12 +6122,16 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::experimental_constrained_log: case Intrinsic::experimental_constrained_log10: case Intrinsic::experimental_constrained_log2: + case Intrinsic::experimental_constrained_lrint: + case Intrinsic::experimental_constrained_llrint: case Intrinsic::experimental_constrained_rint: case Intrinsic::experimental_constrained_nearbyint: case Intrinsic::experimental_constrained_maxnum: case Intrinsic::experimental_constrained_minnum: case Intrinsic::experimental_constrained_ceil: case Intrinsic::experimental_constrained_floor: + case Intrinsic::experimental_constrained_lround: + case Intrinsic::experimental_constrained_llround: case Intrinsic::experimental_constrained_round: case Intrinsic::experimental_constrained_trunc: visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(I)); @@ -6272,6 +6323,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, Op3)); return; } + case Intrinsic::umul_fix_sat: { + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + SDValue Op3 = getValue(I.getArgOperand(2)); + setValue(&I, DAG.getNode(ISD::UMULFIXSAT, sdl, Op1.getValueType(), Op1, Op2, + Op3)); + return; + } case Intrinsic::stacksave: { SDValue Op = getRoot(); Res = DAG.getNode( @@ -6347,29 +6406,11 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, DAG.setRoot(Res); return; } - case Intrinsic::objectsize: { - // If we don't know by now, we're never going to know. - ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1)); - - assert(CI && "Non-constant type in __builtin_object_size?"); - - SDValue Arg = getValue(I.getCalledValue()); - EVT Ty = Arg.getValueType(); - - if (CI->isZero()) - Res = DAG.getConstant(-1ULL, sdl, Ty); - else - Res = DAG.getConstant(0, sdl, Ty); - - setValue(&I, Res); - return; - } + case Intrinsic::objectsize: + llvm_unreachable("llvm.objectsize.* should have been lowered already"); case Intrinsic::is_constant: - // If this wasn't constant-folded away by now, then it's not a - // constant. - setValue(&I, DAG.getConstant(0, sdl, MVT::i1)); - return; + llvm_unreachable("llvm.is.constant.* should have been lowered already"); case Intrinsic::annotation: case Intrinsic::ptr_annotation: @@ -6818,6 +6859,17 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, setValue(&I, Val); return; } + case Intrinsic::ptrmask: { + SDValue Ptr = getValue(I.getOperand(0)); + SDValue Const = getValue(I.getOperand(1)); + + EVT DestVT = + EVT(DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout())); + + setValue(&I, DAG.getNode(ISD::AND, getCurSDLoc(), DestVT, Ptr, + DAG.getZExtOrTrunc(Const, getCurSDLoc(), DestVT))); + return; + } } } @@ -6845,6 +6897,12 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( case Intrinsic::experimental_constrained_fma: Opcode = ISD::STRICT_FMA; break; + case Intrinsic::experimental_constrained_fptosi: + Opcode = ISD::STRICT_FP_TO_SINT; + break; + case Intrinsic::experimental_constrained_fptoui: + Opcode = ISD::STRICT_FP_TO_UINT; + break; case Intrinsic::experimental_constrained_fptrunc: Opcode = ISD::STRICT_FP_ROUND; break; @@ -6881,6 +6939,12 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( case Intrinsic::experimental_constrained_log2: Opcode = ISD::STRICT_FLOG2; break; + case Intrinsic::experimental_constrained_lrint: + Opcode = ISD::STRICT_LRINT; + break; + case Intrinsic::experimental_constrained_llrint: + Opcode = ISD::STRICT_LLRINT; + break; case Intrinsic::experimental_constrained_rint: Opcode = ISD::STRICT_FRINT; break; @@ -6899,6 +6963,12 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( case Intrinsic::experimental_constrained_floor: Opcode = ISD::STRICT_FFLOOR; break; + case Intrinsic::experimental_constrained_lround: + Opcode = ISD::STRICT_LROUND; + break; + case Intrinsic::experimental_constrained_llround: + Opcode = ISD::STRICT_LLROUND; + break; case Intrinsic::experimental_constrained_round: Opcode = ISD::STRICT_FROUND; break; @@ -7102,7 +7172,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, if (SwiftErrorVal && TLI.supportSwiftError()) { // Get the last element of InVals. SDValue Src = CLI.InVals.back(); - unsigned VReg = SwiftError.getOrCreateVRegDefAt( + Register VReg = SwiftError.getOrCreateVRegDefAt( CS.getInstruction(), FuncInfo.MBB, SwiftErrorVal); SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src); DAG.setRoot(CopyNode); @@ -8021,6 +8091,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Compute the constraint code and ConstraintType to use. TLI.ComputeConstraintToUse(T, SDValue()); + if (T.ConstraintType == TargetLowering::C_Immediate && + OpInfo.CallOperand && !isa<ConstantSDNode>(OpInfo.CallOperand)) + // We've delayed emitting a diagnostic like the "n" constraint because + // inlining could cause an integer showing up. + return emitInlineAsmError( + CS, "constraint '" + Twine(T.ConstraintCode) + "' expects an " + "integer constant expression"); + ExtraInfo.update(T); } @@ -8105,7 +8183,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { switch (OpInfo.Type) { case InlineAsm::isOutput: if (OpInfo.ConstraintType == TargetLowering::C_Memory || - (OpInfo.ConstraintType == TargetLowering::C_Other && + ((OpInfo.ConstraintType == TargetLowering::C_Immediate || + OpInfo.ConstraintType == TargetLowering::C_Other) && OpInfo.isIndirect)) { unsigned ConstraintID = TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode); @@ -8119,13 +8198,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { MVT::i32)); AsmNodeOperands.push_back(OpInfo.CallOperand); break; - } else if ((OpInfo.ConstraintType == TargetLowering::C_Other && + } else if (((OpInfo.ConstraintType == TargetLowering::C_Immediate || + OpInfo.ConstraintType == TargetLowering::C_Other) && !OpInfo.isIndirect) || OpInfo.ConstraintType == TargetLowering::C_Register || OpInfo.ConstraintType == TargetLowering::C_RegisterClass) { // Otherwise, this outputs to a register (directly for C_Register / - // C_RegisterClass, and a target-defined fashion for C_Other). Find a - // register that we can use. + // C_RegisterClass, and a target-defined fashion for + // C_Immediate/C_Other). Find a register that we can use. if (OpInfo.AssignedRegs.Regs.empty()) { emitInlineAsmError( CS, "couldn't allocate output register for constraint '" + @@ -8205,15 +8285,24 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } // Treat indirect 'X' constraint as memory. - if (OpInfo.ConstraintType == TargetLowering::C_Other && + if ((OpInfo.ConstraintType == TargetLowering::C_Immediate || + OpInfo.ConstraintType == TargetLowering::C_Other) && OpInfo.isIndirect) OpInfo.ConstraintType = TargetLowering::C_Memory; - if (OpInfo.ConstraintType == TargetLowering::C_Other) { + if (OpInfo.ConstraintType == TargetLowering::C_Immediate || + OpInfo.ConstraintType == TargetLowering::C_Other) { std::vector<SDValue> Ops; TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode, Ops, DAG); if (Ops.empty()) { + if (OpInfo.ConstraintType == TargetLowering::C_Immediate) + if (isa<ConstantSDNode>(InOperandVal)) { + emitInlineAsmError(CS, "value out of range for constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); + return; + } + emitInlineAsmError(CS, "invalid operand for inline asm constraint '" + Twine(OpInfo.ConstraintCode) + "'"); return; @@ -8250,7 +8339,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass || - OpInfo.ConstraintType == TargetLowering::C_Register) && + OpInfo.ConstraintType == TargetLowering::C_Register || + OpInfo.ConstraintType == TargetLowering::C_Immediate) && "Unknown constraint type!"); // TODO: Support this. @@ -8356,6 +8446,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { Val = OpInfo.AssignedRegs.getCopyFromRegs( DAG, FuncInfo, getCurSDLoc(), Chain, &Flag, CS.getInstruction()); break; + case TargetLowering::C_Immediate: case TargetLowering::C_Other: Val = TLI.LowerAsmOutputForConstraint(Chain, Flag, getCurSDLoc(), OpInfo, DAG); @@ -9018,7 +9109,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { // Certain targets (such as MIPS), may have a different ABI alignment // for a type depending on the context. Give the target a chance to // specify the alignment it wants. - unsigned OriginalAlignment = getABIAlignmentForCallingConv(ArgTy, DL); + const Align OriginalAlignment(getABIAlignmentForCallingConv(ArgTy, DL)); if (Args[i].Ty->isPointerTy()) { Flags.setPointer(); @@ -9073,7 +9164,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { FrameAlign = Args[i].Alignment; else FrameAlign = getByValTypeAlignment(ElementTy, DL); - Flags.setByValAlign(FrameAlign); + Flags.setByValAlign(Align(FrameAlign)); } if (Args[i].IsNest) Flags.setNest(); @@ -9129,7 +9220,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { if (NumParts > 1 && j == 0) MyFlags.Flags.setSplit(); else if (j != 0) { - MyFlags.Flags.setOrigAlign(1); + MyFlags.Flags.setOrigAlign(Align::None()); if (j == NumParts - 1) MyFlags.Flags.setSplitEnd(); } @@ -9259,7 +9350,7 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { assert((Op.getOpcode() != ISD::CopyFromReg || cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) && "Copy from a reg to the same reg!"); - assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg"); + assert(!Register::isPhysicalRegister(Reg) && "Is a physreg"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // If this is an InlineAsm we have to match the registers required, not the @@ -9516,8 +9607,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // Certain targets (such as MIPS), may have a different ABI alignment // for a type depending on the context. Give the target a chance to // specify the alignment it wants. - unsigned OriginalAlignment = - TLI->getABIAlignmentForCallingConv(ArgTy, DL); + const Align OriginalAlignment( + TLI->getABIAlignmentForCallingConv(ArgTy, DL)); if (Arg.getType()->isPointerTy()) { Flags.setPointer(); @@ -9577,7 +9668,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { FrameAlign = Arg.getParamAlignment(); else FrameAlign = TLI->getByValTypeAlignment(ElementTy, DL); - Flags.setByValAlign(FrameAlign); + Flags.setByValAlign(Align(FrameAlign)); } if (Arg.hasAttribute(Attribute::Nest)) Flags.setNest(); @@ -9586,6 +9677,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) { Flags.setOrigAlign(OriginalAlignment); if (ArgCopyElisionCandidates.count(&Arg)) Flags.setCopyElisionCandidate(); + if (Arg.hasAttribute(Attribute::Returned)) + Flags.setReturned(); MVT RegisterVT = TLI->getRegisterTypeForCallingConv( *CurDAG->getContext(), F.getCallingConv(), VT); @@ -9598,7 +9691,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { MyFlags.Flags.setSplit(); // if it isn't first piece, alignment must be 1 else if (i > 0) { - MyFlags.Flags.setOrigAlign(1); + MyFlags.Flags.setOrigAlign(Align::None()); if (i == NumRegs - 1) MyFlags.Flags.setSplitEnd(); } @@ -9650,7 +9743,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) { MachineFunction& MF = SDB->DAG.getMachineFunction(); MachineRegisterInfo& RegInfo = MF.getRegInfo(); - unsigned SRetReg = RegInfo.createVirtualRegister(TLI->getRegClassFor(RegVT)); + Register SRetReg = + RegInfo.createVirtualRegister(TLI->getRegClassFor(RegVT)); FuncInfo->DemoteRegister = SRetReg; NewRoot = SDB->DAG.getCopyToReg(NewRoot, SDB->getCurSDLoc(), SRetReg, ArgValue); @@ -9748,10 +9842,14 @@ void SelectionDAGISel::LowerArguments(const Function &F) { FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex()); } + // Analyses past this point are naive and don't expect an assertion. + if (Res.getOpcode() == ISD::AssertZext) + Res = Res.getOperand(0); + // Update the SwiftErrorVRegDefMap. if (Res.getOpcode() == ISD::CopyFromReg && isSwiftErrorArg) { unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (Register::isVirtualRegister(Reg)) SwiftError->setCurrentVReg(FuncInfo->MBB, SwiftError->getFunctionArg(), Reg); } @@ -9763,7 +9861,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // FIXME: This isn't very clean... it would be nice to make this more // general. unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (Register::isVirtualRegister(Reg)) { FuncInfo->ValueMap[&Arg] = Reg; continue; } @@ -10087,8 +10185,6 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, break; } case CC_BitTests: { - // FIXME: If Fallthrough is unreachable, skip the range check. - // FIXME: Optimize away range check based on pivot comparisons. BitTestBlock *BTB = &SL->BitTestCases[I->BTCasesIndex]; @@ -10109,6 +10205,11 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, BTB->DefaultProb -= DefaultProb / 2; } + if (FallthroughUnreachable) { + // Skip the range check if the fallthrough block is unreachable. + BTB->OmitRangeCheck = true; + } + // If we're in the right place, emit the bit test header right now. if (CurMBB == SwitchMBB) { visitBitTestHeader(*BTB, SwitchMBB); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 0072e33f23b7..bfcf30b430b6 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -426,7 +426,7 @@ public: SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo, SwiftErrorValueTracking &swifterror, CodeGenOpt::Level ol) : SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()), DAG(dag), - SL(make_unique<SDAGSwitchLowering>(this, funcinfo)), FuncInfo(funcinfo), + SL(std::make_unique<SDAGSwitchLowering>(this, funcinfo)), FuncInfo(funcinfo), SwiftError(swifterror) {} void init(GCFunctionInfo *gfi, AliasAnalysis *AA, diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index da3049881d31..bc10f7621239 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -280,6 +280,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::EXTRACT_SUBVECTOR: return "extract_subvector"; case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector"; case ISD::VECTOR_SHUFFLE: return "vector_shuffle"; + case ISD::SPLAT_VECTOR: return "splat_vector"; case ISD::CARRY_FALSE: return "carry_false"; case ISD::ADDC: return "addc"; case ISD::ADDE: return "adde"; @@ -305,6 +306,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::SMULFIX: return "smulfix"; case ISD::SMULFIXSAT: return "smulfixsat"; case ISD::UMULFIX: return "umulfix"; + case ISD::UMULFIXSAT: return "umulfixsat"; // Conversion operators. case ISD::SIGN_EXTEND: return "sign_extend"; @@ -318,22 +320,27 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FP_ROUND: return "fp_round"; case ISD::STRICT_FP_ROUND: return "strict_fp_round"; case ISD::FLT_ROUNDS_: return "flt_rounds"; - case ISD::FP_ROUND_INREG: return "fp_round_inreg"; case ISD::FP_EXTEND: return "fp_extend"; case ISD::STRICT_FP_EXTEND: return "strict_fp_extend"; case ISD::SINT_TO_FP: return "sint_to_fp"; case ISD::UINT_TO_FP: return "uint_to_fp"; case ISD::FP_TO_SINT: return "fp_to_sint"; + case ISD::STRICT_FP_TO_SINT: return "strict_fp_to_sint"; case ISD::FP_TO_UINT: return "fp_to_uint"; + case ISD::STRICT_FP_TO_UINT: return "strict_fp_to_uint"; case ISD::BITCAST: return "bitcast"; case ISD::ADDRSPACECAST: return "addrspacecast"; case ISD::FP16_TO_FP: return "fp16_to_fp"; case ISD::FP_TO_FP16: return "fp_to_fp16"; case ISD::LROUND: return "lround"; + case ISD::STRICT_LROUND: return "strict_lround"; case ISD::LLROUND: return "llround"; + case ISD::STRICT_LLROUND: return "strict_llround"; case ISD::LRINT: return "lrint"; + case ISD::STRICT_LRINT: return "strict_lrint"; case ISD::LLRINT: return "llrint"; + case ISD::STRICT_LLRINT: return "strict_llrint"; // Control flow instructions case ISD::BR: return "br"; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index bdf9f2c166e1..1f07a241a824 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -27,6 +27,7 @@ #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/EHPersonalities.h" +#include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -434,9 +435,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { TII = MF->getSubtarget().getInstrInfo(); TLI = MF->getSubtarget().getTargetLowering(); RegInfo = &MF->getRegInfo(); - LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); + LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(Fn); GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr; - ORE = make_unique<OptimizationRemarkEmitter>(&Fn); + ORE = std::make_unique<OptimizationRemarkEmitter>(&Fn); auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>(); @@ -524,8 +525,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { To = J->second; } // Make sure the new register has a sufficiently constrained register class. - if (TargetRegisterInfo::isVirtualRegister(From) && - TargetRegisterInfo::isVirtualRegister(To)) + if (Register::isVirtualRegister(From) && Register::isVirtualRegister(To)) MRI.constrainRegClass(To, MRI.getRegClass(From)); // Replace it. @@ -572,7 +572,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { bool hasFI = MI->getOperand(0).isFI(); Register Reg = hasFI ? TRI.getFrameRegister(*MF) : MI->getOperand(0).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) EntryMBB->insert(EntryMBB->begin(), MI); else { MachineInstr *Def = RegInfo->getVRegDef(Reg); @@ -582,7 +582,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { Def->getParent()->insert(std::next(InsertPos), MI); } else LLVM_DEBUG(dbgs() << "Dropping debug info for dead vreg" - << TargetRegisterInfo::virtReg2Index(Reg) << "\n"); + << Register::virtReg2Index(Reg) << "\n"); } // If Reg is live-in then update debug info to track its copy in a vreg. @@ -671,8 +671,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { To = J->second; } // Make sure the new register has a sufficiently constrained register class. - if (TargetRegisterInfo::isVirtualRegister(From) && - TargetRegisterInfo::isVirtualRegister(To)) + if (Register::isVirtualRegister(From) && Register::isVirtualRegister(To)) MRI.constrainRegClass(To, MRI.getRegClass(From)); // Replace it. @@ -760,7 +759,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() { continue; unsigned DestReg = cast<RegisterSDNode>(N->getOperand(1))->getReg(); - if (!TargetRegisterInfo::isVirtualRegister(DestReg)) + if (!Register::isVirtualRegister(DestReg)) continue; // Ignore non-integer values. @@ -1652,9 +1651,8 @@ static bool MIIsInTerminatorSequence(const MachineInstr &MI) { // Make sure that the copy dest is not a vreg when the copy source is a // physical register. - if (!OPI2->isReg() || - (!TargetRegisterInfo::isPhysicalRegister(OPI->getReg()) && - TargetRegisterInfo::isPhysicalRegister(OPI2->getReg()))) + if (!OPI2->isReg() || (!Register::isPhysicalRegister(OPI->getReg()) && + Register::isPhysicalRegister(OPI2->getReg()))) return false; return true; @@ -2234,9 +2232,9 @@ void SelectionDAGISel::Select_READ_REGISTER(SDNode *Op) { SDLoc dl(Op); MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1)); const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0)); - unsigned Reg = + Register Reg = TLI->getRegisterByName(RegStr->getString().data(), Op->getValueType(0), - *CurDAG); + CurDAG->getMachineFunction()); SDValue New = CurDAG->getCopyFromReg( Op->getOperand(0), dl, Reg, Op->getValueType(0)); New->setNodeId(-1); @@ -2248,9 +2246,9 @@ void SelectionDAGISel::Select_WRITE_REGISTER(SDNode *Op) { SDLoc dl(Op); MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1)); const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0)); - unsigned Reg = TLI->getRegisterByName(RegStr->getString().data(), + Register Reg = TLI->getRegisterByName(RegStr->getString().data(), Op->getOperand(2).getValueType(), - *CurDAG); + CurDAG->getMachineFunction()); SDValue New = CurDAG->getCopyToReg( Op->getOperand(0), dl, Reg, Op->getOperand(2)); New->setNodeId(-1); @@ -3323,10 +3321,13 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, continue; } - case OPC_EmitCopyToReg: { + case OPC_EmitCopyToReg: + case OPC_EmitCopyToReg2: { unsigned RecNo = MatcherTable[MatcherIndex++]; assert(RecNo < RecordedNodes.size() && "Invalid EmitCopyToReg"); unsigned DestPhysReg = MatcherTable[MatcherIndex++]; + if (Opcode == OPC_EmitCopyToReg2) + DestPhysReg |= MatcherTable[MatcherIndex++] << 8; if (!InputChain.getNode()) InputChain = CurDAG->getEntryNode(); diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 395e9a8a4fc5..fad98b6f50dc 100644 --- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -378,7 +378,6 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain, // We use TargetFrameIndex so that isel will not select it into LEA Loc = Builder.DAG.getTargetFrameIndex(Index, Builder.getFrameIndexTy()); -#ifndef NDEBUG // Right now we always allocate spill slots that are of the same // size as the value we're about to spill (the size of spillee can // vary since we spill vectors of pointers too). At some point we @@ -387,12 +386,18 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain, MachineFrameInfo &MFI = Builder.DAG.getMachineFunction().getFrameInfo(); assert((MFI.getObjectSize(Index) * 8) == Incoming.getValueSizeInBits() && "Bad spill: stack slot does not match!"); -#endif + // Note: Using the alignment of the spill slot (rather than the abi or + // preferred alignment) is required for correctness when dealing with spill + // slots with preferred alignments larger than frame alignment.. auto &MF = Builder.DAG.getMachineFunction(); auto PtrInfo = MachinePointerInfo::getFixedStack(MF, Index); + auto *StoreMMO = + MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore, + MFI.getObjectSize(Index), + MFI.getObjectAlignment(Index)); Chain = Builder.DAG.getStore(Chain, Builder.getCurSDLoc(), Incoming, Loc, - PtrInfo); + StoreMMO); MMO = getMachineMemOperand(MF, *cast<FrameIndexSDNode>(Loc)); @@ -1011,20 +1016,27 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { return; } - SDValue SpillSlot = - DAG.getTargetFrameIndex(*DerivedPtrLocation, getFrameIndexTy()); + unsigned Index = *DerivedPtrLocation; + SDValue SpillSlot = DAG.getTargetFrameIndex(Index, getFrameIndexTy()); // Note: We know all of these reloads are independent, but don't bother to // exploit that chain wise. DAGCombine will happily do so as needed, so // doing it here would be a small compile time win at most. SDValue Chain = getRoot(); - SDValue SpillLoad = - DAG.getLoad(DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), - Relocate.getType()), - getCurSDLoc(), Chain, SpillSlot, - MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), - *DerivedPtrLocation)); + auto &MF = DAG.getMachineFunction(); + auto &MFI = MF.getFrameInfo(); + auto PtrInfo = MachinePointerInfo::getFixedStack(MF, Index); + auto *LoadMMO = + MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad, + MFI.getObjectSize(Index), + MFI.getObjectAlignment(Index)); + + auto LoadVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + Relocate.getType()); + + SDValue SpillLoad = DAG.getLoad(LoadVT, getCurSDLoc(), Chain, + SpillSlot, LoadMMO); DAG.setRoot(SpillLoad.getValue(1)); diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index b260cd91d468..9ab1324533f1 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -11,7 +11,6 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/TargetLowering.h" -#include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -37,7 +36,7 @@ using namespace llvm; /// NOTE: The TargetMachine owns TLOF. TargetLowering::TargetLowering(const TargetMachine &tm) - : TargetLoweringBase(tm) {} + : TargetLoweringBase(tm) {} const char *TargetLowering::getTargetNodeName(unsigned Opcode) const { return nullptr; @@ -80,7 +79,7 @@ bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI, const CCValAssign &ArgLoc = ArgLocs[I]; if (!ArgLoc.isRegLoc()) continue; - unsigned Reg = ArgLoc.getLocReg(); + Register Reg = ArgLoc.getLocReg(); // Only look at callee saved registers. if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg)) continue; @@ -121,19 +120,25 @@ void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call, /// result of type RetVT. std::pair<SDValue, SDValue> TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, - ArrayRef<SDValue> Ops, bool isSigned, - const SDLoc &dl, bool doesNotReturn, - bool isReturnValueUsed, - bool isPostTypeLegalization) const { + ArrayRef<SDValue> Ops, + MakeLibCallOptions CallOptions, + const SDLoc &dl) const { TargetLowering::ArgListTy Args; Args.reserve(Ops.size()); TargetLowering::ArgListEntry Entry; - for (SDValue Op : Ops) { - Entry.Node = Op; + for (unsigned i = 0; i < Ops.size(); ++i) { + SDValue NewOp = Ops[i]; + Entry.Node = NewOp; Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); - Entry.IsSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned); - Entry.IsZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned); + Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(), + CallOptions.IsSExt); + Entry.IsZExt = !Entry.IsSExt; + + if (CallOptions.IsSoften && + !shouldExtendTypeInLibCall(CallOptions.OpsVTBeforeSoften[i])) { + Entry.IsSExt = Entry.IsZExt = false; + } Args.push_back(Entry); } @@ -144,15 +149,22 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); TargetLowering::CallLoweringInfo CLI(DAG); - bool signExtend = shouldSignExtendTypeInLibCall(RetVT, isSigned); + bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt); + bool zeroExtend = !signExtend; + + if (CallOptions.IsSoften && + !shouldExtendTypeInLibCall(CallOptions.RetVTBeforeSoften)) { + signExtend = zeroExtend = false; + } + CLI.setDebugLoc(dl) .setChain(DAG.getEntryNode()) .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) - .setNoReturn(doesNotReturn) - .setDiscardResult(!isReturnValueUsed) - .setIsPostTypeLegalization(isPostTypeLegalization) + .setNoReturn(CallOptions.DoesNotReturn) + .setDiscardResult(!CallOptions.IsReturnValueUsed) + .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization) .setSExtResult(signExtend) - .setZExtResult(!signExtend); + .setZExtResult(zeroExtend); return LowerCallTo(CLI); } @@ -263,7 +275,8 @@ TargetLowering::findOptimalMemOpLowering(std::vector<EVT> &MemOps, void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, - const SDLoc &dl) const { + const SDLoc &dl, const SDValue OldLHS, + const SDValue OldRHS) const { assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128) && "Unsupported setcc type!"); @@ -365,8 +378,11 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, // Use the target specific return value for comparions lib calls. EVT RetVT = getCmpLibcallReturnType(); SDValue Ops[2] = {NewLHS, NewRHS}; - NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, false /*sign irrelevant*/, - dl).first; + TargetLowering::MakeLibCallOptions CallOptions; + EVT OpsVT[2] = { OldLHS.getValueType(), + OldRHS.getValueType() }; + CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true); + NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl).first; NewRHS = DAG.getConstant(0, dl, RetVT); CCCode = getCmpLibcallCC(LC1); @@ -378,8 +394,7 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, ISD::SETCC, dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT), NewLHS, NewRHS, DAG.getCondCode(CCCode)); - NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, false/*sign irrelevant*/, - dl).first; + NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl).first; NewLHS = DAG.getNode( ISD::SETCC, dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT), @@ -564,6 +579,170 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, AssumeSingleUse); } +// TODO: Can we merge SelectionDAG::GetDemandedBits into this? +// TODO: Under what circumstances can we create nodes? Constant folding? +SDValue TargetLowering::SimplifyMultipleUseDemandedBits( + SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, + SelectionDAG &DAG, unsigned Depth) const { + // Limit search depth. + if (Depth >= SelectionDAG::MaxRecursionDepth) + return SDValue(); + + // Ignore UNDEFs. + if (Op.isUndef()) + return SDValue(); + + // Not demanding any bits/elts from Op. + if (DemandedBits == 0 || DemandedElts == 0) + return DAG.getUNDEF(Op.getValueType()); + + unsigned NumElts = DemandedElts.getBitWidth(); + KnownBits LHSKnown, RHSKnown; + switch (Op.getOpcode()) { + case ISD::BITCAST: { + SDValue Src = peekThroughBitcasts(Op.getOperand(0)); + EVT SrcVT = Src.getValueType(); + EVT DstVT = Op.getValueType(); + unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits(); + unsigned NumDstEltBits = DstVT.getScalarSizeInBits(); + + if (NumSrcEltBits == NumDstEltBits) + if (SDValue V = SimplifyMultipleUseDemandedBits( + Src, DemandedBits, DemandedElts, DAG, Depth + 1)) + return DAG.getBitcast(DstVT, V); + + // TODO - bigendian once we have test coverage. + if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0 && + DAG.getDataLayout().isLittleEndian()) { + unsigned Scale = NumDstEltBits / NumSrcEltBits; + unsigned NumSrcElts = SrcVT.getVectorNumElements(); + APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits); + APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts); + for (unsigned i = 0; i != Scale; ++i) { + unsigned Offset = i * NumSrcEltBits; + APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset); + if (!Sub.isNullValue()) { + DemandedSrcBits |= Sub; + for (unsigned j = 0; j != NumElts; ++j) + if (DemandedElts[j]) + DemandedSrcElts.setBit((j * Scale) + i); + } + } + + if (SDValue V = SimplifyMultipleUseDemandedBits( + Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1)) + return DAG.getBitcast(DstVT, V); + } + + // TODO - bigendian once we have test coverage. + if ((NumSrcEltBits % NumDstEltBits) == 0 && + DAG.getDataLayout().isLittleEndian()) { + unsigned Scale = NumSrcEltBits / NumDstEltBits; + unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; + APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits); + APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts); + for (unsigned i = 0; i != NumElts; ++i) + if (DemandedElts[i]) { + unsigned Offset = (i % Scale) * NumDstEltBits; + DemandedSrcBits.insertBits(DemandedBits, Offset); + DemandedSrcElts.setBit(i / Scale); + } + + if (SDValue V = SimplifyMultipleUseDemandedBits( + Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1)) + return DAG.getBitcast(DstVT, V); + } + + break; + } + case ISD::AND: { + LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + + // If all of the demanded bits are known 1 on one side, return the other. + // These bits cannot contribute to the result of the 'and' in this + // context. + if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One)) + return Op.getOperand(0); + if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One)) + return Op.getOperand(1); + break; + } + case ISD::OR: { + LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + + // If all of the demanded bits are known zero on one side, return the + // other. These bits cannot contribute to the result of the 'or' in this + // context. + if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero)) + return Op.getOperand(0); + if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero)) + return Op.getOperand(1); + break; + } + case ISD::XOR: { + LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + + // If all of the demanded bits are known zero on one side, return the + // other. + if (DemandedBits.isSubsetOf(RHSKnown.Zero)) + return Op.getOperand(0); + if (DemandedBits.isSubsetOf(LHSKnown.Zero)) + return Op.getOperand(1); + break; + } + case ISD::SIGN_EXTEND_INREG: { + // If none of the extended bits are demanded, eliminate the sextinreg. + EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + if (DemandedBits.getActiveBits() <= ExVT.getScalarSizeInBits()) + return Op.getOperand(0); + break; + } + case ISD::INSERT_VECTOR_ELT: { + // If we don't demand the inserted element, return the base vector. + SDValue Vec = Op.getOperand(0); + auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2)); + EVT VecVT = Vec.getValueType(); + if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) && + !DemandedElts[CIdx->getZExtValue()]) + return Vec; + break; + } + case ISD::VECTOR_SHUFFLE: { + ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask(); + + // If all the demanded elts are from one operand and are inline, + // then we can use the operand directly. + bool AllUndef = true, IdentityLHS = true, IdentityRHS = true; + for (unsigned i = 0; i != NumElts; ++i) { + int M = ShuffleMask[i]; + if (M < 0 || !DemandedElts[i]) + continue; + AllUndef = false; + IdentityLHS &= (M == (int)i); + IdentityRHS &= ((M - NumElts) == i); + } + + if (AllUndef) + return DAG.getUNDEF(Op.getValueType()); + if (IdentityLHS) + return Op.getOperand(0); + if (IdentityRHS) + return Op.getOperand(1); + break; + } + default: + if (Op.getOpcode() >= ISD::BUILTIN_OP_END) + if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode( + Op, DemandedBits, DemandedElts, DAG, Depth)) + return V; + break; + } + return SDValue(); +} + /// Look at Op. At this point, we know that only the OriginalDemandedBits of the /// result of Op are ever used downstream. If we can use this information to /// simplify Op, create a new simplified DAG node and return true, returning the @@ -619,12 +798,15 @@ bool TargetLowering::SimplifyDemandedBits( } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) { // Not demanding any bits/elts from Op. return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); - } else if (Depth == 6) { // Limit search depth. + } else if (Depth >= SelectionDAG::MaxRecursionDepth) { + // Limit search depth. return false; } KnownBits Known2, KnownOut; switch (Op.getOpcode()) { + case ISD::TargetConstant: + llvm_unreachable("Can't simplify this node"); case ISD::SCALAR_TO_VECTOR: { if (!DemandedElts[0]) return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); @@ -728,6 +910,21 @@ bool TargetLowering::SimplifyDemandedBits( } break; } + case ISD::EXTRACT_SUBVECTOR: { + // If index isn't constant, assume we need all the source vector elements. + SDValue Src = Op.getOperand(0); + ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1)); + unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); + APInt SrcElts = APInt::getAllOnesValue(NumSrcElts); + if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) { + // Offset the demanded elts by the subvector index. + uint64_t Idx = SubIdx->getZExtValue(); + SrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); + } + if (SimplifyDemandedBits(Src, DemandedBits, SrcElts, Known, TLO, Depth + 1)) + return true; + break; + } case ISD::CONCAT_VECTORS: { Known.Zero.setAllBits(); Known.One.setAllBits(); @@ -773,22 +970,37 @@ bool TargetLowering::SimplifyDemandedBits( } if (!!DemandedLHS || !!DemandedRHS) { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + Known.Zero.setAllBits(); Known.One.setAllBits(); if (!!DemandedLHS) { - if (SimplifyDemandedBits(Op.getOperand(0), DemandedBits, DemandedLHS, - Known2, TLO, Depth + 1)) + if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO, + Depth + 1)) return true; Known.One &= Known2.One; Known.Zero &= Known2.Zero; } if (!!DemandedRHS) { - if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedRHS, - Known2, TLO, Depth + 1)) + if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO, + Depth + 1)) return true; Known.One &= Known2.One; Known.Zero &= Known2.Zero; } + + // Attempt to avoid multi-use ops if we don't need anything from them. + SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( + Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1); + SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( + Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1); + if (DemandedOp0 || DemandedOp1) { + Op0 = DemandedOp0 ? DemandedOp0 : Op0; + Op1 = DemandedOp1 ? DemandedOp1 : Op1; + SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask); + return TLO.CombineTo(Op, NewOp); + } } break; } @@ -834,6 +1046,20 @@ bool TargetLowering::SimplifyDemandedBits( return true; assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { + SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( + Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); + SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( + Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); + if (DemandedOp0 || DemandedOp1) { + Op0 = DemandedOp0 ? DemandedOp0 : Op0; + Op1 = DemandedOp1 ? DemandedOp1 : Op1; + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1); + return TLO.CombineTo(Op, NewOp); + } + } + // If all of the demanded bits are known one on one side, return the other. // These bits cannot contribute to the result of the 'and'. if (DemandedBits.isSubsetOf(Known2.Zero | Known.One)) @@ -869,6 +1095,20 @@ bool TargetLowering::SimplifyDemandedBits( return true; assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { + SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( + Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); + SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( + Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); + if (DemandedOp0 || DemandedOp1) { + Op0 = DemandedOp0 ? DemandedOp0 : Op0; + Op1 = DemandedOp1 ? DemandedOp1 : Op1; + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1); + return TLO.CombineTo(Op, NewOp); + } + } + // If all of the demanded bits are known zero on one side, return the other. // These bits cannot contribute to the result of the 'or'. if (DemandedBits.isSubsetOf(Known2.One | Known.Zero)) @@ -901,6 +1141,20 @@ bool TargetLowering::SimplifyDemandedBits( return true; assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { + SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( + Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); + SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( + Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); + if (DemandedOp0 || DemandedOp1) { + Op0 = DemandedOp0 ? DemandedOp0 : Op0; + Op1 = DemandedOp1 ? DemandedOp1 : Op1; + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1); + return TLO.CombineTo(Op, NewOp); + } + } + // If all of the demanded bits are known zero on one side, return the other. // These bits cannot contribute to the result of the 'xor'. if (DemandedBits.isSubsetOf(Known.Zero)) @@ -1034,7 +1288,7 @@ bool TargetLowering::SimplifyDemandedBits( // out) are never demanded. // TODO - support non-uniform vector amounts. if (Op0.getOpcode() == ISD::SRL) { - if ((DemandedBits & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) { + if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) { if (ConstantSDNode *SA2 = isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) { if (SA2->getAPIntValue().ult(BitWidth)) { @@ -1141,7 +1395,8 @@ bool TargetLowering::SimplifyDemandedBits( if (Op0.getOpcode() == ISD::SHL) { if (ConstantSDNode *SA2 = isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) { - if ((DemandedBits & APInt::getHighBitsSet(BitWidth, ShAmt)) == 0) { + if (!DemandedBits.intersects( + APInt::getHighBitsSet(BitWidth, ShAmt))) { if (SA2->getAPIntValue().ult(BitWidth)) { unsigned C1 = SA2->getZExtValue(); unsigned Opc = ISD::SRL; @@ -1479,6 +1734,11 @@ bool TargetLowering::SimplifyDemandedBits( return true; Known = Known.trunc(BitWidth); + // Attempt to avoid multi-use ops if we don't need anything from them. + if (SDValue NewSrc = SimplifyMultipleUseDemandedBits( + Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1)) + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc)); + // If the input is only used by this truncate, see if we can shrink it based // on the known demanded bits. if (Src.getNode()->hasOneUse()) { @@ -1595,9 +1855,7 @@ bool TargetLowering::SimplifyDemandedBits( // Bitcast from a vector using SimplifyDemanded Bits/VectorElts. // Demand the elt/bit if any of the original elts/bits are demanded. // TODO - bigendian once we have test coverage. - // TODO - bool vectors once SimplifyDemandedVectorElts has SETCC support. - if (SrcVT.isVector() && NumSrcEltBits > 1 && - (BitWidth % NumSrcEltBits) == 0 && + if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0 && TLO.DAG.getDataLayout().isLittleEndian()) { unsigned Scale = BitWidth / NumSrcEltBits; unsigned NumSrcElts = SrcVT.getVectorNumElements(); @@ -1663,6 +1921,7 @@ bool TargetLowering::SimplifyDemandedBits( // Add, Sub, and Mul don't demand any bits in positions beyond that // of the highest bit demanded of them. SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1); + SDNodeFlags Flags = Op.getNode()->getFlags(); unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros(); APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ); if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO, @@ -1671,7 +1930,6 @@ bool TargetLowering::SimplifyDemandedBits( Depth + 1) || // See if the operation should be performed at a smaller bit width. ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) { - SDNodeFlags Flags = Op.getNode()->getFlags(); if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) { // Disable the nsw and nuw flags. We can no longer guarantee that we // won't wrap after simplification. @@ -1684,6 +1942,23 @@ bool TargetLowering::SimplifyDemandedBits( return true; } + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!LoMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { + SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( + Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1); + SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( + Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1); + if (DemandedOp0 || DemandedOp1) { + Flags.setNoSignedWrap(false); + Flags.setNoUnsignedWrap(false); + Op0 = DemandedOp0 ? DemandedOp0 : Op0; + Op1 = DemandedOp1 ? DemandedOp1 : Op1; + SDValue NewOp = + TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags); + return TLO.CombineTo(Op, NewOp); + } + } + // If we have a constant operand, we may be able to turn it into -1 if we // do not demand the high bits. This can make the constant smaller to // encode, allow more general folding, or match specialized instruction @@ -1694,10 +1969,8 @@ bool TargetLowering::SimplifyDemandedBits( if (C && !C->isAllOnesValue() && !C->isOne() && (C->getAPIntValue() | HighMask).isAllOnesValue()) { SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT); - // We can't guarantee that the new math op doesn't wrap, so explicitly - // clear those flags to prevent folding with a potential existing node - // that has those flags set. - SDNodeFlags Flags; + // Disable the nsw and nuw flags. We can no longer guarantee that we + // won't wrap after simplification. Flags.setNoSignedWrap(false); Flags.setNoUnsignedWrap(false); SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags); @@ -1837,7 +2110,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( } // Limit search depth. - if (Depth >= 6) + if (Depth >= SelectionDAG::MaxRecursionDepth) return false; SDLoc DL(Op); @@ -2001,6 +2274,15 @@ bool TargetLowering::SimplifyDemandedVectorElts( return true; APInt BaseElts = DemandedElts; BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx); + + // If none of the base operand elements are demanded, replace it with undef. + if (!BaseElts && !Base.isUndef()) + return TLO.CombineTo(Op, + TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, + TLO.DAG.getUNDEF(VT), + Op.getOperand(1), + Op.getOperand(2))); + if (SimplifyDemandedVectorElts(Base, BaseElts, KnownUndef, KnownZero, TLO, Depth + 1)) return true; @@ -2134,11 +2416,13 @@ bool TargetLowering::SimplifyDemandedVectorElts( // Update legal shuffle masks based on demanded elements if it won't reduce // to Identity which can cause premature removal of the shuffle mask. - if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps && - isShuffleMaskLegal(NewMask, VT)) - return TLO.CombineTo(Op, - TLO.DAG.getVectorShuffle(VT, DL, Op.getOperand(0), - Op.getOperand(1), NewMask)); + if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) { + SDValue LegalShuffle = + buildLegalVectorShuffle(VT, DL, Op.getOperand(0), Op.getOperand(1), + NewMask, TLO.DAG); + if (LegalShuffle) + return TLO.CombineTo(Op, LegalShuffle); + } // Propagate undef/zero elements from LHS/RHS. for (unsigned i = 0; i != NumElts; ++i) { @@ -2304,6 +2588,13 @@ void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, Known.resetAll(); } +void TargetLowering::computeKnownBitsForTargetInstr( + GISelKnownBits &Analysis, Register R, KnownBits &Known, + const APInt &DemandedElts, const MachineRegisterInfo &MRI, + unsigned Depth) const { + Known.resetAll(); +} + void TargetLowering::computeKnownBitsForFrameIndex(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, @@ -2357,6 +2648,36 @@ bool TargetLowering::SimplifyDemandedBitsForTargetNode( return false; } +SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode( + SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, + SelectionDAG &DAG, unsigned Depth) const { + assert( + (Op.getOpcode() >= ISD::BUILTIN_OP_END || + Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_VOID) && + "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op" + " is a target node!"); + return SDValue(); +} + +SDValue +TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, + SDValue N1, MutableArrayRef<int> Mask, + SelectionDAG &DAG) const { + bool LegalMask = isShuffleMaskLegal(Mask, VT); + if (!LegalMask) { + std::swap(N0, N1); + ShuffleVectorSDNode::commuteMask(Mask); + LegalMask = isShuffleMaskLegal(Mask, VT); + } + + if (!LegalMask) + return SDValue(); + + return DAG.getVectorShuffle(VT, DL, N0, N1, Mask); +} + const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const { return nullptr; } @@ -2610,6 +2931,77 @@ SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck( return T2; } +// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0 +SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift( + EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond, + DAGCombinerInfo &DCI, const SDLoc &DL) const { + assert(isConstOrConstSplat(N1C) && + isConstOrConstSplat(N1C)->getAPIntValue().isNullValue() && + "Should be a comparison with 0."); + assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + "Valid only for [in]equality comparisons."); + + unsigned NewShiftOpcode; + SDValue X, C, Y; + + SelectionDAG &DAG = DCI.DAG; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // Look for '(C l>>/<< Y)'. + auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) { + // The shift should be one-use. + if (!V.hasOneUse()) + return false; + unsigned OldShiftOpcode = V.getOpcode(); + switch (OldShiftOpcode) { + case ISD::SHL: + NewShiftOpcode = ISD::SRL; + break; + case ISD::SRL: + NewShiftOpcode = ISD::SHL; + break; + default: + return false; // must be a logical shift. + } + // We should be shifting a constant. + // FIXME: best to use isConstantOrConstantVector(). + C = V.getOperand(0); + ConstantSDNode *CC = + isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true); + if (!CC) + return false; + Y = V.getOperand(1); + + ConstantSDNode *XC = + isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true); + return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( + X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG); + }; + + // LHS of comparison should be an one-use 'and'. + if (N0.getOpcode() != ISD::AND || !N0.hasOneUse()) + return SDValue(); + + X = N0.getOperand(0); + SDValue Mask = N0.getOperand(1); + + // 'and' is commutative! + if (!Match(Mask)) { + std::swap(X, Mask); + if (!Match(Mask)) + return SDValue(); + } + + EVT VT = X.getValueType(); + + // Produce: + // ((X 'OppositeShiftOpcode' Y) & C) Cond 0 + SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y); + SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C); + SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond); + return T2; +} + /// Try to fold an equality comparison with a {add/sub/xor} binary operation as /// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to /// handle the commuted versions of these patterns. @@ -2726,9 +3118,9 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // (ctpop x) u< 2 -> (x & x-1) == 0 // (ctpop x) u> 1 -> (x & x-1) != 0 if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){ - SDValue Sub = DAG.getNode(ISD::SUB, dl, CTVT, CTOp, - DAG.getConstant(1, dl, CTVT)); - SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Sub); + SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT); + SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne); + SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add); ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE; return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, dl, CTVT), CC); } @@ -2852,7 +3244,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0)); APInt bestMask; unsigned bestWidth = 0, bestOffset = 0; - if (!Lod->isVolatile() && Lod->isUnindexed()) { + if (Lod->isSimple() && Lod->isUnindexed()) { unsigned origWidth = N0.getValueSizeInBits(); unsigned maskWidth = origWidth; // We can narrow (e.g.) 16-bit extending loads on 32-bit target to @@ -3178,6 +3570,14 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } } + if (Cond == ISD::SETEQ || Cond == ISD::SETNE) { + // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0 + if (C1.isNullValue()) + if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift( + VT, N0, N1, Cond, DCI, dl)) + return CC; + } + // If we have "setcc X, C0", check to see if we can shrink the immediate // by changing cc. // TODO: Support this for vectors after legalize ops. @@ -3203,33 +3603,35 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // Back to non-vector simplifications. // TODO: Can we do these for vector splats? if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); const APInt &C1 = N1C->getAPIntValue(); + EVT ShValTy = N0.getValueType(); // Fold bit comparisons when we can. if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && - (VT == N0.getValueType() || - (isTypeLegal(VT) && VT.bitsLE(N0.getValueType()))) && + (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) && N0.getOpcode() == ISD::AND) { auto &DL = DAG.getDataLayout(); if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { - EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL, - !DCI.isBeforeLegalize()); + EVT ShiftTy = getShiftAmountTy(ShValTy, DL, !DCI.isBeforeLegalize()); if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3 // Perform the xform if the AND RHS is a single bit. - if (AndRHS->getAPIntValue().isPowerOf2()) { + unsigned ShCt = AndRHS->getAPIntValue().logBase2(); + if (AndRHS->getAPIntValue().isPowerOf2() && + ShCt <= TLI.getShiftAmountThreshold(ShValTy)) { return DAG.getNode(ISD::TRUNCATE, dl, VT, - DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0, - DAG.getConstant(AndRHS->getAPIntValue().logBase2(), dl, - ShiftTy))); + DAG.getNode(ISD::SRL, dl, ShValTy, N0, + DAG.getConstant(ShCt, dl, ShiftTy))); } } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) { // (X & 8) == 8 --> (X & 8) >> 3 // Perform the xform if C1 is a single bit. - if (C1.isPowerOf2()) { + unsigned ShCt = C1.logBase2(); + if (C1.isPowerOf2() && + ShCt <= TLI.getShiftAmountThreshold(ShValTy)) { return DAG.getNode(ISD::TRUNCATE, dl, VT, - DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0, - DAG.getConstant(C1.logBase2(), dl, - ShiftTy))); + DAG.getNode(ISD::SRL, dl, ShValTy, N0, + DAG.getConstant(ShCt, dl, ShiftTy))); } } } @@ -3452,15 +3854,21 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } // Fold remainder of division by a constant. - if (N0.getOpcode() == ISD::UREM && N0.hasOneUse() && - (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { + if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) && + N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); // When division is cheap or optimizing for minimum size, // fall through to DIVREM creation by skipping this fold. - if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize)) - if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl)) - return Folded; + if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize)) { + if (N0.getOpcode() == ISD::UREM) { + if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl)) + return Folded; + } else if (N0.getOpcode() == ISD::SREM) { + if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl)) + return Folded; + } + } } // Fold away ALL boolean setcc's. @@ -3567,15 +3975,17 @@ TargetLowering::getConstraintType(StringRef Constraint) const { if (S == 1) { switch (Constraint[0]) { default: break; - case 'r': return C_RegisterClass; + case 'r': + return C_RegisterClass; case 'm': // memory case 'o': // offsetable case 'V': // not offsetable return C_Memory; - case 'i': // Simple Integer or Relocatable Constant case 'n': // Simple Integer case 'E': // Floating Point Constant case 'F': // Floating Point Constant + return C_Immediate; + case 'i': // Simple Integer or Relocatable Constant case 's': // Relocatable Constant case 'p': // Address. case 'X': // Allow ANY value. @@ -3950,6 +4360,7 @@ TargetLowering::ParseConstraints(const DataLayout &DL, /// Return an integer indicating how general CT is. static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) { switch (CT) { + case TargetLowering::C_Immediate: case TargetLowering::C_Other: case TargetLowering::C_Unknown: return 0; @@ -4069,11 +4480,12 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, TargetLowering::ConstraintType CType = TLI.getConstraintType(OpInfo.Codes[i]); - // If this is an 'other' constraint, see if the operand is valid for it. - // For example, on X86 we might have an 'rI' constraint. If the operand - // is an integer in the range [0..31] we want to use I (saving a load - // of a register), otherwise we must use 'r'. - if (CType == TargetLowering::C_Other && Op.getNode()) { + // If this is an 'other' or 'immediate' constraint, see if the operand is + // valid for it. For example, on X86 we might have an 'rI' constraint. If + // the operand is an integer in the range [0..31] we want to use I (saving a + // load of a register), otherwise we must use 'r'. + if ((CType == TargetLowering::C_Other || + CType == TargetLowering::C_Immediate) && Op.getNode()) { assert(OpInfo.Codes[i].size() == 1 && "Unhandled multi-letter 'other' constraint"); std::vector<SDValue> ResultOps; @@ -4455,6 +4867,34 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, return DAG.getSelect(dl, VT, IsOne, N0, Q); } +/// If all values in Values that *don't* match the predicate are same 'splat' +/// value, then replace all values with that splat value. +/// Else, if AlternativeReplacement was provided, then replace all values that +/// do match predicate with AlternativeReplacement value. +static void +turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values, + std::function<bool(SDValue)> Predicate, + SDValue AlternativeReplacement = SDValue()) { + SDValue Replacement; + // Is there a value for which the Predicate does *NOT* match? What is it? + auto SplatValue = llvm::find_if_not(Values, Predicate); + if (SplatValue != Values.end()) { + // Does Values consist only of SplatValue's and values matching Predicate? + if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) { + return Value == *SplatValue || Predicate(Value); + })) // Then we shall replace values matching predicate with SplatValue. + Replacement = *SplatValue; + } + if (!Replacement) { + // Oops, we did not find the "baseline" splat value. + if (!AlternativeReplacement) + return; // Nothing to do. + // Let's replace with provided value then. + Replacement = AlternativeReplacement; + } + std::replace_if(Values.begin(), Values.end(), Predicate, Replacement); +} + /// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE /// where the divisor is constant and the comparison target is zero, /// return a DAG expression that will generate the same comparison result @@ -4482,77 +4922,409 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, DAGCombinerInfo &DCI, const SDLoc &DL, SmallVectorImpl<SDNode *> &Created) const { // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q) - // - D must be constant with D = D0 * 2^K where D0 is odd and D0 != 1 + // - D must be constant, with D = D0 * 2^K where D0 is odd // - P is the multiplicative inverse of D0 modulo 2^W - // - Q = floor((2^W - 1) / D0) + // - Q = floor(((2^W) - 1) / D) // where W is the width of the common type of N and D. assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Only applicable for (in)equality comparisons."); + SelectionDAG &DAG = DCI.DAG; + EVT VT = REMNode.getValueType(); + EVT SVT = VT.getScalarType(); + EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); + EVT ShSVT = ShVT.getScalarType(); // If MUL is unavailable, we cannot proceed in any case. if (!isOperationLegalOrCustom(ISD::MUL, VT)) return SDValue(); - // TODO: Add non-uniform constant support. - ConstantSDNode *Divisor = isConstOrConstSplat(REMNode->getOperand(1)); + // TODO: Could support comparing with non-zero too. ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode); - if (!Divisor || !CompTarget || Divisor->isNullValue() || - !CompTarget->isNullValue()) + if (!CompTarget || !CompTarget->isNullValue()) return SDValue(); - const APInt &D = Divisor->getAPIntValue(); + bool HadOneDivisor = false; + bool AllDivisorsAreOnes = true; + bool HadEvenDivisor = false; + bool AllDivisorsArePowerOfTwo = true; + SmallVector<SDValue, 16> PAmts, KAmts, QAmts; + + auto BuildUREMPattern = [&](ConstantSDNode *C) { + // Division by 0 is UB. Leave it to be constant-folded elsewhere. + if (C->isNullValue()) + return false; + + const APInt &D = C->getAPIntValue(); + // If all divisors are ones, we will prefer to avoid the fold. + HadOneDivisor |= D.isOneValue(); + AllDivisorsAreOnes &= D.isOneValue(); + + // Decompose D into D0 * 2^K + unsigned K = D.countTrailingZeros(); + assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate."); + APInt D0 = D.lshr(K); + + // D is even if it has trailing zeros. + HadEvenDivisor |= (K != 0); + // D is a power-of-two if D0 is one. + // If all divisors are power-of-two, we will prefer to avoid the fold. + AllDivisorsArePowerOfTwo &= D0.isOneValue(); + + // P = inv(D0, 2^W) + // 2^W requires W + 1 bits, so we have to extend and then truncate. + unsigned W = D.getBitWidth(); + APInt P = D0.zext(W + 1) + .multiplicativeInverse(APInt::getSignedMinValue(W + 1)) + .trunc(W); + assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable + assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check."); + + // Q = floor((2^W - 1) / D) + APInt Q = APInt::getAllOnesValue(W).udiv(D); + + assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) && + "We are expecting that K is always less than all-ones for ShSVT"); + + // If the divisor is 1 the result can be constant-folded. + if (D.isOneValue()) { + // Set P and K amount to a bogus values so we can try to splat them. + P = 0; + K = -1; + assert(Q.isAllOnesValue() && + "Expecting all-ones comparison for one divisor"); + } + + PAmts.push_back(DAG.getConstant(P, DL, SVT)); + KAmts.push_back( + DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT)); + QAmts.push_back(DAG.getConstant(Q, DL, SVT)); + return true; + }; + + SDValue N = REMNode.getOperand(0); + SDValue D = REMNode.getOperand(1); - // Decompose D into D0 * 2^K - unsigned K = D.countTrailingZeros(); - bool DivisorIsEven = (K != 0); - APInt D0 = D.lshr(K); + // Collect the values from each element. + if (!ISD::matchUnaryPredicate(D, BuildUREMPattern)) + return SDValue(); - // The fold is invalid when D0 == 1. - // This is reachable because visitSetCC happens before visitREM. - if (D0.isOneValue()) + // If this is a urem by a one, avoid the fold since it can be constant-folded. + if (AllDivisorsAreOnes) return SDValue(); - // P = inv(D0, 2^W) - // 2^W requires W + 1 bits, so we have to extend and then truncate. - unsigned W = D.getBitWidth(); - APInt P = D0.zext(W + 1) - .multiplicativeInverse(APInt::getSignedMinValue(W + 1)) - .trunc(W); - assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable - assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check."); + // If this is a urem by a powers-of-two, avoid the fold since it can be + // best implemented as a bit test. + if (AllDivisorsArePowerOfTwo) + return SDValue(); - // Q = floor((2^W - 1) / D) - APInt Q = APInt::getAllOnesValue(W).udiv(D); + SDValue PVal, KVal, QVal; + if (VT.isVector()) { + if (HadOneDivisor) { + // Try to turn PAmts into a splat, since we don't care about the values + // that are currently '0'. If we can't, just keep '0'`s. + turnVectorIntoSplatVector(PAmts, isNullConstant); + // Try to turn KAmts into a splat, since we don't care about the values + // that are currently '-1'. If we can't, change them to '0'`s. + turnVectorIntoSplatVector(KAmts, isAllOnesConstant, + DAG.getConstant(0, DL, ShSVT)); + } - SelectionDAG &DAG = DCI.DAG; + PVal = DAG.getBuildVector(VT, DL, PAmts); + KVal = DAG.getBuildVector(ShVT, DL, KAmts); + QVal = DAG.getBuildVector(VT, DL, QAmts); + } else { + PVal = PAmts[0]; + KVal = KAmts[0]; + QVal = QAmts[0]; + } - SDValue PVal = DAG.getConstant(P, DL, VT); - SDValue QVal = DAG.getConstant(Q, DL, VT); // (mul N, P) - SDValue Op1 = DAG.getNode(ISD::MUL, DL, VT, REMNode->getOperand(0), PVal); - Created.push_back(Op1.getNode()); + SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal); + Created.push_back(Op0.getNode()); - // Rotate right only if D was even. - if (DivisorIsEven) { + // Rotate right only if any divisor was even. We avoid rotates for all-odd + // divisors as a performance improvement, since rotating by 0 is a no-op. + if (HadEvenDivisor) { // We need ROTR to do this. if (!isOperationLegalOrCustom(ISD::ROTR, VT)) return SDValue(); - SDValue ShAmt = - DAG.getConstant(K, DL, getShiftAmountTy(VT, DAG.getDataLayout())); SDNodeFlags Flags; Flags.setExact(true); // UREM: (rotr (mul N, P), K) - Op1 = DAG.getNode(ISD::ROTR, DL, VT, Op1, ShAmt, Flags); - Created.push_back(Op1.getNode()); + Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal, Flags); + Created.push_back(Op0.getNode()); } // UREM: (setule/setugt (rotr (mul N, P), K), Q) - return DAG.getSetCC(DL, SETCCVT, Op1, QVal, + return DAG.getSetCC(DL, SETCCVT, Op0, QVal, ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT)); } +/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE +/// where the divisor is constant and the comparison target is zero, +/// return a DAG expression that will generate the same comparison result +/// using only multiplications, additions and shifts/rotations. +/// Ref: "Hacker's Delight" 10-17. +SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode, + SDValue CompTargetNode, + ISD::CondCode Cond, + DAGCombinerInfo &DCI, + const SDLoc &DL) const { + SmallVector<SDNode *, 7> Built; + if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond, + DCI, DL, Built)) { + assert(Built.size() <= 7 && "Max size prediction failed."); + for (SDNode *N : Built) + DCI.AddToWorklist(N); + return Folded; + } + + return SDValue(); +} + +SDValue +TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, + SDValue CompTargetNode, ISD::CondCode Cond, + DAGCombinerInfo &DCI, const SDLoc &DL, + SmallVectorImpl<SDNode *> &Created) const { + // Fold: + // (seteq/ne (srem N, D), 0) + // To: + // (setule/ugt (rotr (add (mul N, P), A), K), Q) + // + // - D must be constant, with D = D0 * 2^K where D0 is odd + // - P is the multiplicative inverse of D0 modulo 2^W + // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k))) + // - Q = floor((2 * A) / (2^K)) + // where W is the width of the common type of N and D. + assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + "Only applicable for (in)equality comparisons."); + + SelectionDAG &DAG = DCI.DAG; + + EVT VT = REMNode.getValueType(); + EVT SVT = VT.getScalarType(); + EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); + EVT ShSVT = ShVT.getScalarType(); + + // If MUL is unavailable, we cannot proceed in any case. + if (!isOperationLegalOrCustom(ISD::MUL, VT)) + return SDValue(); + + // TODO: Could support comparing with non-zero too. + ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode); + if (!CompTarget || !CompTarget->isNullValue()) + return SDValue(); + + bool HadIntMinDivisor = false; + bool HadOneDivisor = false; + bool AllDivisorsAreOnes = true; + bool HadEvenDivisor = false; + bool NeedToApplyOffset = false; + bool AllDivisorsArePowerOfTwo = true; + SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts; + + auto BuildSREMPattern = [&](ConstantSDNode *C) { + // Division by 0 is UB. Leave it to be constant-folded elsewhere. + if (C->isNullValue()) + return false; + + // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine. + + // WARNING: this fold is only valid for positive divisors! + APInt D = C->getAPIntValue(); + if (D.isNegative()) + D.negate(); // `rem %X, -C` is equivalent to `rem %X, C` + + HadIntMinDivisor |= D.isMinSignedValue(); + + // If all divisors are ones, we will prefer to avoid the fold. + HadOneDivisor |= D.isOneValue(); + AllDivisorsAreOnes &= D.isOneValue(); + + // Decompose D into D0 * 2^K + unsigned K = D.countTrailingZeros(); + assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate."); + APInt D0 = D.lshr(K); + + if (!D.isMinSignedValue()) { + // D is even if it has trailing zeros; unless it's INT_MIN, in which case + // we don't care about this lane in this fold, we'll special-handle it. + HadEvenDivisor |= (K != 0); + } + + // D is a power-of-two if D0 is one. This includes INT_MIN. + // If all divisors are power-of-two, we will prefer to avoid the fold. + AllDivisorsArePowerOfTwo &= D0.isOneValue(); + + // P = inv(D0, 2^W) + // 2^W requires W + 1 bits, so we have to extend and then truncate. + unsigned W = D.getBitWidth(); + APInt P = D0.zext(W + 1) + .multiplicativeInverse(APInt::getSignedMinValue(W + 1)) + .trunc(W); + assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable + assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check."); + + // A = floor((2^(W - 1) - 1) / D0) & -2^K + APInt A = APInt::getSignedMaxValue(W).udiv(D0); + A.clearLowBits(K); + + if (!D.isMinSignedValue()) { + // If divisor INT_MIN, then we don't care about this lane in this fold, + // we'll special-handle it. + NeedToApplyOffset |= A != 0; + } + + // Q = floor((2 * A) / (2^K)) + APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K)); + + assert(APInt::getAllOnesValue(SVT.getSizeInBits()).ugt(A) && + "We are expecting that A is always less than all-ones for SVT"); + assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) && + "We are expecting that K is always less than all-ones for ShSVT"); + + // If the divisor is 1 the result can be constant-folded. Likewise, we + // don't care about INT_MIN lanes, those can be set to undef if appropriate. + if (D.isOneValue()) { + // Set P, A and K to a bogus values so we can try to splat them. + P = 0; + A = -1; + K = -1; + + // x ?% 1 == 0 <--> true <--> x u<= -1 + Q = -1; + } + + PAmts.push_back(DAG.getConstant(P, DL, SVT)); + AAmts.push_back(DAG.getConstant(A, DL, SVT)); + KAmts.push_back( + DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT)); + QAmts.push_back(DAG.getConstant(Q, DL, SVT)); + return true; + }; + + SDValue N = REMNode.getOperand(0); + SDValue D = REMNode.getOperand(1); + + // Collect the values from each element. + if (!ISD::matchUnaryPredicate(D, BuildSREMPattern)) + return SDValue(); + + // If this is a srem by a one, avoid the fold since it can be constant-folded. + if (AllDivisorsAreOnes) + return SDValue(); + + // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold + // since it can be best implemented as a bit test. + if (AllDivisorsArePowerOfTwo) + return SDValue(); + + SDValue PVal, AVal, KVal, QVal; + if (VT.isVector()) { + if (HadOneDivisor) { + // Try to turn PAmts into a splat, since we don't care about the values + // that are currently '0'. If we can't, just keep '0'`s. + turnVectorIntoSplatVector(PAmts, isNullConstant); + // Try to turn AAmts into a splat, since we don't care about the + // values that are currently '-1'. If we can't, change them to '0'`s. + turnVectorIntoSplatVector(AAmts, isAllOnesConstant, + DAG.getConstant(0, DL, SVT)); + // Try to turn KAmts into a splat, since we don't care about the values + // that are currently '-1'. If we can't, change them to '0'`s. + turnVectorIntoSplatVector(KAmts, isAllOnesConstant, + DAG.getConstant(0, DL, ShSVT)); + } + + PVal = DAG.getBuildVector(VT, DL, PAmts); + AVal = DAG.getBuildVector(VT, DL, AAmts); + KVal = DAG.getBuildVector(ShVT, DL, KAmts); + QVal = DAG.getBuildVector(VT, DL, QAmts); + } else { + PVal = PAmts[0]; + AVal = AAmts[0]; + KVal = KAmts[0]; + QVal = QAmts[0]; + } + + // (mul N, P) + SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal); + Created.push_back(Op0.getNode()); + + if (NeedToApplyOffset) { + // We need ADD to do this. + if (!isOperationLegalOrCustom(ISD::ADD, VT)) + return SDValue(); + + // (add (mul N, P), A) + Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal); + Created.push_back(Op0.getNode()); + } + + // Rotate right only if any divisor was even. We avoid rotates for all-odd + // divisors as a performance improvement, since rotating by 0 is a no-op. + if (HadEvenDivisor) { + // We need ROTR to do this. + if (!isOperationLegalOrCustom(ISD::ROTR, VT)) + return SDValue(); + SDNodeFlags Flags; + Flags.setExact(true); + // SREM: (rotr (add (mul N, P), A), K) + Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal, Flags); + Created.push_back(Op0.getNode()); + } + + // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q) + SDValue Fold = + DAG.getSetCC(DL, SETCCVT, Op0, QVal, + ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT)); + + // If we didn't have lanes with INT_MIN divisor, then we're done. + if (!HadIntMinDivisor) + return Fold; + + // That fold is only valid for positive divisors. Which effectively means, + // it is invalid for INT_MIN divisors. So if we have such a lane, + // we must fix-up results for said lanes. + assert(VT.isVector() && "Can/should only get here for vectors."); + + if (!isOperationLegalOrCustom(ISD::SETEQ, VT) || + !isOperationLegalOrCustom(ISD::AND, VT) || + !isOperationLegalOrCustom(Cond, VT) || + !isOperationLegalOrCustom(ISD::VSELECT, VT)) + return SDValue(); + + Created.push_back(Fold.getNode()); + + SDValue IntMin = DAG.getConstant( + APInt::getSignedMinValue(SVT.getScalarSizeInBits()), DL, VT); + SDValue IntMax = DAG.getConstant( + APInt::getSignedMaxValue(SVT.getScalarSizeInBits()), DL, VT); + SDValue Zero = + DAG.getConstant(APInt::getNullValue(SVT.getScalarSizeInBits()), DL, VT); + + // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded. + SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ); + Created.push_back(DivisorIsIntMin.getNode()); + + // (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0 + SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax); + Created.push_back(Masked.getNode()); + SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond); + Created.push_back(MaskedIsZero.getNode()); + + // To produce final result we need to blend 2 vectors: 'SetCC' and + // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick + // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is + // constant-folded, select can get lowered to a shuffle with constant mask. + SDValue Blended = + DAG.getNode(ISD::VSELECT, DL, VT, DivisorIsIntMin, MaskedIsZero, Fold); + + return Blended; +} + bool TargetLowering:: verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const { if (!isa<ConstantSDNode>(Op.getOperand(0))) { @@ -4564,6 +5336,246 @@ verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const { return false; } +char TargetLowering::isNegatibleForFree(SDValue Op, SelectionDAG &DAG, + bool LegalOperations, bool ForCodeSize, + unsigned Depth) const { + // fneg is removable even if it has multiple uses. + if (Op.getOpcode() == ISD::FNEG) + return 2; + + // Don't allow anything with multiple uses unless we know it is free. + EVT VT = Op.getValueType(); + const SDNodeFlags Flags = Op->getFlags(); + const TargetOptions &Options = DAG.getTarget().Options; + if (!Op.hasOneUse() && !(Op.getOpcode() == ISD::FP_EXTEND && + isFPExtFree(VT, Op.getOperand(0).getValueType()))) + return 0; + + // Don't recurse exponentially. + if (Depth > SelectionDAG::MaxRecursionDepth) + return 0; + + switch (Op.getOpcode()) { + case ISD::ConstantFP: { + if (!LegalOperations) + return 1; + + // Don't invert constant FP values after legalization unless the target says + // the negated constant is legal. + return isOperationLegal(ISD::ConstantFP, VT) || + isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT, + ForCodeSize); + } + case ISD::BUILD_VECTOR: { + // Only permit BUILD_VECTOR of constants. + if (llvm::any_of(Op->op_values(), [&](SDValue N) { + return !N.isUndef() && !isa<ConstantFPSDNode>(N); + })) + return 0; + if (!LegalOperations) + return 1; + if (isOperationLegal(ISD::ConstantFP, VT) && + isOperationLegal(ISD::BUILD_VECTOR, VT)) + return 1; + return llvm::all_of(Op->op_values(), [&](SDValue N) { + return N.isUndef() || + isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT, + ForCodeSize); + }); + } + case ISD::FADD: + if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros()) + return 0; + + // After operation legalization, it might not be legal to create new FSUBs. + if (LegalOperations && !isOperationLegalOrCustom(ISD::FSUB, VT)) + return 0; + + // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) + if (char V = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, + ForCodeSize, Depth + 1)) + return V; + // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) + return isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations, + ForCodeSize, Depth + 1); + case ISD::FSUB: + // We can't turn -(A-B) into B-A when we honor signed zeros. + if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros()) + return 0; + + // fold (fneg (fsub A, B)) -> (fsub B, A) + return 1; + + case ISD::FMUL: + case ISD::FDIV: + // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) + if (char V = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, + ForCodeSize, Depth + 1)) + return V; + + // Ignore X * 2.0 because that is expected to be canonicalized to X + X. + if (auto *C = isConstOrConstSplatFP(Op.getOperand(1))) + if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL) + return 0; + + return isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations, + ForCodeSize, Depth + 1); + + case ISD::FMA: + case ISD::FMAD: { + if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros()) + return 0; + + // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z)) + // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z)) + char V2 = isNegatibleForFree(Op.getOperand(2), DAG, LegalOperations, + ForCodeSize, Depth + 1); + if (!V2) + return 0; + + // One of Op0/Op1 must be cheaply negatible, then select the cheapest. + char V0 = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, + ForCodeSize, Depth + 1); + char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations, + ForCodeSize, Depth + 1); + char V01 = std::max(V0, V1); + return V01 ? std::max(V01, V2) : 0; + } + + case ISD::FP_EXTEND: + case ISD::FP_ROUND: + case ISD::FSIN: + return isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, + ForCodeSize, Depth + 1); + } + + return 0; +} + +SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, + bool LegalOperations, + bool ForCodeSize, + unsigned Depth) const { + // fneg is removable even if it has multiple uses. + if (Op.getOpcode() == ISD::FNEG) + return Op.getOperand(0); + + assert(Depth <= SelectionDAG::MaxRecursionDepth && + "getNegatedExpression doesn't match isNegatibleForFree"); + const SDNodeFlags Flags = Op->getFlags(); + + switch (Op.getOpcode()) { + case ISD::ConstantFP: { + APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF(); + V.changeSign(); + return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType()); + } + case ISD::BUILD_VECTOR: { + SmallVector<SDValue, 4> Ops; + for (SDValue C : Op->op_values()) { + if (C.isUndef()) { + Ops.push_back(C); + continue; + } + APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF(); + V.changeSign(); + Ops.push_back(DAG.getConstantFP(V, SDLoc(Op), C.getValueType())); + } + return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops); + } + case ISD::FADD: + assert((DAG.getTarget().Options.NoSignedZerosFPMath || + Flags.hasNoSignedZeros()) && + "Expected NSZ fp-flag"); + + // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) + if (isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, ForCodeSize, + Depth + 1)) + return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), + getNegatedExpression(Op.getOperand(0), DAG, + LegalOperations, ForCodeSize, + Depth + 1), + Op.getOperand(1), Flags); + // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) + return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), + getNegatedExpression(Op.getOperand(1), DAG, + LegalOperations, ForCodeSize, + Depth + 1), + Op.getOperand(0), Flags); + case ISD::FSUB: + // fold (fneg (fsub 0, B)) -> B + if (ConstantFPSDNode *N0CFP = + isConstOrConstSplatFP(Op.getOperand(0), /*AllowUndefs*/ true)) + if (N0CFP->isZero()) + return Op.getOperand(1); + + // fold (fneg (fsub A, B)) -> (fsub B, A) + return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), + Op.getOperand(1), Op.getOperand(0), Flags); + + case ISD::FMUL: + case ISD::FDIV: + // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) + if (isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, ForCodeSize, + Depth + 1)) + return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), + getNegatedExpression(Op.getOperand(0), DAG, + LegalOperations, ForCodeSize, + Depth + 1), + Op.getOperand(1), Flags); + + // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) + return DAG.getNode( + Op.getOpcode(), SDLoc(Op), Op.getValueType(), Op.getOperand(0), + getNegatedExpression(Op.getOperand(1), DAG, LegalOperations, + ForCodeSize, Depth + 1), + Flags); + + case ISD::FMA: + case ISD::FMAD: { + assert((DAG.getTarget().Options.NoSignedZerosFPMath || + Flags.hasNoSignedZeros()) && + "Expected NSZ fp-flag"); + + SDValue Neg2 = getNegatedExpression(Op.getOperand(2), DAG, LegalOperations, + ForCodeSize, Depth + 1); + + char V0 = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, + ForCodeSize, Depth + 1); + char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations, + ForCodeSize, Depth + 1); + if (V0 >= V1) { + // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z)) + SDValue Neg0 = getNegatedExpression( + Op.getOperand(0), DAG, LegalOperations, ForCodeSize, Depth + 1); + return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Neg0, + Op.getOperand(1), Neg2, Flags); + } + + // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z)) + SDValue Neg1 = getNegatedExpression(Op.getOperand(1), DAG, LegalOperations, + ForCodeSize, Depth + 1); + return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), + Op.getOperand(0), Neg1, Neg2, Flags); + } + + case ISD::FP_EXTEND: + case ISD::FSIN: + return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), + getNegatedExpression(Op.getOperand(0), DAG, + LegalOperations, ForCodeSize, + Depth + 1)); + case ISD::FP_ROUND: + return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(), + getNegatedExpression(Op.getOperand(0), DAG, + LegalOperations, ForCodeSize, + Depth + 1), + Op.getOperand(1)); + } + + llvm_unreachable("Unknown code"); +} + //===----------------------------------------------------------------------===// // Legalization Utilities //===----------------------------------------------------------------------===// @@ -4862,7 +5874,8 @@ bool TargetLowering::expandROT(SDNode *Node, SDValue &Result, bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const { - SDValue Src = Node->getOperand(0); + unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0; + SDValue Src = Node->getOperand(OpNo); EVT SrcVT = Src.getValueType(); EVT DstVT = Node->getValueType(0); SDLoc dl(SDValue(Node, 0)); @@ -4871,6 +5884,13 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, if (SrcVT != MVT::f32 || DstVT != MVT::i64) return false; + if (Node->isStrictFPOpcode()) + // When a NaN is converted to an integer a trap is allowed. We can't + // use this expansion here because it would eliminate that trap. Other + // traps are also allowed and cannot be eliminated. See + // IEEE 754-2008 sec 5.8. + return false; + // Expand f32 -> i64 conversion // This algorithm comes from compiler-rt's implementation of fixsfdi: // https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/builtins/fixsfdi.c @@ -4924,9 +5944,11 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, } bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, + SDValue &Chain, SelectionDAG &DAG) const { SDLoc dl(SDValue(Node, 0)); - SDValue Src = Node->getOperand(0); + unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0; + SDValue Src = Node->getOperand(OpNo); EVT SrcVT = Src.getValueType(); EVT DstVT = Node->getValueType(0); @@ -4934,7 +5956,9 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT); // Only expand vector types if we have the appropriate vector bit operations. - if (DstVT.isVector() && (!isOperationLegalOrCustom(ISD::FP_TO_SINT, DstVT) || + unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT : + ISD::FP_TO_SINT; + if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) || !isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT))) return false; @@ -4946,14 +5970,21 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits()); if (APFloat::opOverflow & APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) { - Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src); + if (Node->isStrictFPOpcode()) { + Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other }, + { Node->getOperand(0), Src }); + Chain = Result.getValue(1); + } else + Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src); return true; } SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT); SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT); - bool Strict = shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false); + bool Strict = Node->isStrictFPOpcode() || + shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false); + if (Strict) { // Expand based on maximum range of FP_TO_SINT, if the value exceeds the // signmask then offset (the result of which should be fully representable). @@ -4963,12 +5994,23 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, // Result = fp_to_sint(Val) ^ Ofs // TODO: Should any fast-math-flags be set for the FSUB? - SDValue Val = DAG.getSelect(dl, SrcVT, Sel, Src, - DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst)); + SDValue SrcBiased; + if (Node->isStrictFPOpcode()) + SrcBiased = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other }, + { Node->getOperand(0), Src, Cst }); + else + SrcBiased = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst); + SDValue Val = DAG.getSelect(dl, SrcVT, Sel, Src, SrcBiased); SDValue Ofs = DAG.getSelect(dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), DAG.getConstant(SignMask, dl, DstVT)); - Result = DAG.getNode(ISD::XOR, dl, DstVT, - DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val), Ofs); + SDValue SInt; + if (Node->isStrictFPOpcode()) { + SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other }, + { SrcBiased.getValue(1), Val }); + Chain = SInt.getValue(1); + } else + SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val); + Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, Ofs); } else { // Expand based on maximum range of FP_TO_SINT: // True = fp_to_sint(Src) @@ -5918,7 +6960,8 @@ SDValue TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const { assert((Node->getOpcode() == ISD::SMULFIX || Node->getOpcode() == ISD::UMULFIX || - Node->getOpcode() == ISD::SMULFIXSAT) && + Node->getOpcode() == ISD::SMULFIXSAT || + Node->getOpcode() == ISD::UMULFIXSAT) && "Expected a fixed point multiplication opcode"); SDLoc dl(Node); @@ -5926,15 +6969,19 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const { SDValue RHS = Node->getOperand(1); EVT VT = LHS.getValueType(); unsigned Scale = Node->getConstantOperandVal(2); - bool Saturating = Node->getOpcode() == ISD::SMULFIXSAT; + bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT || + Node->getOpcode() == ISD::UMULFIXSAT); + bool Signed = (Node->getOpcode() == ISD::SMULFIX || + Node->getOpcode() == ISD::SMULFIXSAT); EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); unsigned VTSize = VT.getScalarSizeInBits(); if (!Scale) { // [us]mul.fix(a, b, 0) -> mul(a, b) - if (!Saturating && isOperationLegalOrCustom(ISD::MUL, VT)) { - return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS); - } else if (Saturating && isOperationLegalOrCustom(ISD::SMULO, VT)) { + if (!Saturating) { + if (isOperationLegalOrCustom(ISD::MUL, VT)) + return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS); + } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) { SDValue Result = DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS); SDValue Product = Result.getValue(0); @@ -5948,11 +6995,18 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const { SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT); Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin); return DAG.getSelect(dl, VT, Overflow, Result, Product); + } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) { + SDValue Result = + DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS); + SDValue Product = Result.getValue(0); + SDValue Overflow = Result.getValue(1); + + APInt MaxVal = APInt::getMaxValue(VTSize); + SDValue SatMax = DAG.getConstant(MaxVal, dl, VT); + return DAG.getSelect(dl, VT, Overflow, SatMax, Product); } } - bool Signed = - Node->getOpcode() == ISD::SMULFIX || Node->getOpcode() == ISD::SMULFIXSAT; assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) && "Expected scale to be less than the number of bits if signed or at " "most the number of bits if unsigned."); @@ -5978,7 +7032,8 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const { if (Scale == VTSize) // Result is just the top half since we'd be shifting by the width of the - // operand. + // operand. Overflow impossible so this works for both UMULFIX and + // UMULFIXSAT. return Hi; // The result will need to be shifted right by the scale since both operands @@ -5990,20 +7045,55 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const { if (!Saturating) return Result; - unsigned OverflowBits = VTSize - Scale + 1; // +1 for the sign - SDValue HiMask = - DAG.getConstant(APInt::getHighBitsSet(VTSize, OverflowBits), dl, VT); - SDValue LoMask = DAG.getConstant( - APInt::getLowBitsSet(VTSize, VTSize - OverflowBits), dl, VT); - APInt MaxVal = APInt::getSignedMaxValue(VTSize); - APInt MinVal = APInt::getSignedMinValue(VTSize); - - Result = DAG.getSelectCC(dl, Hi, LoMask, - DAG.getConstant(MaxVal, dl, VT), Result, - ISD::SETGT); - return DAG.getSelectCC(dl, Hi, HiMask, - DAG.getConstant(MinVal, dl, VT), Result, - ISD::SETLT); + if (!Signed) { + // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the + // widened multiplication) aren't all zeroes. + + // Saturate to max if ((Hi >> Scale) != 0), + // which is the same as if (Hi > ((1 << Scale) - 1)) + APInt MaxVal = APInt::getMaxValue(VTSize); + SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale), + dl, VT); + Result = DAG.getSelectCC(dl, Hi, LowMask, + DAG.getConstant(MaxVal, dl, VT), Result, + ISD::SETUGT); + + return Result; + } + + // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the + // widened multiplication) aren't all ones or all zeroes. + + SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT); + SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT); + + if (Scale == 0) { + SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo, + DAG.getConstant(VTSize - 1, dl, ShiftTy)); + SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE); + // Saturated to SatMin if wide product is negative, and SatMax if wide + // product is positive ... + SDValue Zero = DAG.getConstant(0, dl, VT); + SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax, + ISD::SETLT); + // ... but only if we overflowed. + return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result); + } + + // We handled Scale==0 above so all the bits to examine is in Hi. + + // Saturate to max if ((Hi >> (Scale - 1)) > 0), + // which is the same as if (Hi > (1 << (Scale - 1)) - 1) + SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1), + dl, VT); + Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT); + // Saturate to min if (Hi >> (Scale - 1)) < -1), + // which is the same as if (HI < (-1 << (Scale - 1)) + SDValue HighMask = + DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1), + dl, VT); + Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT); + return Result; } void TargetLowering::expandUADDSUBO( @@ -6060,24 +7150,19 @@ void TargetLowering::expandSADDSUBO( SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType()); - // LHSSign -> LHS >= 0 - // RHSSign -> RHS >= 0 - // SumSign -> Result >= 0 - // - // Add: - // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign) - // Sub: - // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) - SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE); - SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE); - SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign, - IsAdd ? ISD::SETEQ : ISD::SETNE); - - SDValue SumSign = DAG.getSetCC(dl, OType, Result, Zero, ISD::SETGE); - SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE); - - SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE); - Overflow = DAG.getBoolExtOrTrunc(Cmp, dl, ResultType, ResultType); + // For an addition, the result should be less than one of the operands (LHS) + // if and only if the other operand (RHS) is negative, otherwise there will + // be overflow. + // For a subtraction, the result should be less than one of the operands + // (LHS) if and only if the other operand (RHS) is (non-zero) positive, + // otherwise there will be overflow. + SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT); + SDValue ConditionRHS = + DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT); + + Overflow = DAG.getBoolExtOrTrunc( + DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl, + ResultType, ResultType); } bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result, @@ -6176,20 +7261,19 @@ bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result, // being a legal type for the architecture and thus has to be split to // two arguments. SDValue Ret; + TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(isSigned); + CallOptions.setIsPostTypeLegalization(true); if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) { // Halves of WideVT are packed into registers in different order // depending on platform endianness. This is usually handled by // the C calling convention, but we can't defer to it in // the legalizer. SDValue Args[] = { LHS, HiLHS, RHS, HiRHS }; - Ret = makeLibCall(DAG, LC, WideVT, Args, isSigned, dl, - /* doesNotReturn */ false, /* isReturnValueUsed */ true, - /* isPostTypeLegalization */ true).first; + Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first; } else { SDValue Args[] = { HiLHS, LHS, HiRHS, RHS }; - Ret = makeLibCall(DAG, LC, WideVT, Args, isSigned, dl, - /* doesNotReturn */ false, /* isReturnValueUsed */ true, - /* isPostTypeLegalization */ true).first; + Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first; } assert(Ret.getOpcode() == ISD::MERGE_VALUES && "Ret value is a collection of constituent nodes holding result."); |