diff options
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG')
24 files changed, 3453 insertions, 1427 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index de909cc10795..f35f663d6ba1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -149,7 +149,7 @@ namespace { const TargetLowering &TLI; const SelectionDAGTargetInfo *STI; CombineLevel Level = BeforeLegalizeTypes; - CodeGenOpt::Level OptLevel; + CodeGenOptLevel OptLevel; bool LegalDAG = false; bool LegalOperations = false; bool LegalTypes = false; @@ -242,7 +242,7 @@ namespace { SDValue visit(SDNode *N); public: - DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL) + DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOptLevel OL) : DAG(D), TLI(D.getTargetLoweringInfo()), STI(D.getSubtarget().getSelectionDAGInfo()), OptLevel(OL), AA(AA) { ForCodeSize = DAG.shouldOptForSize(); @@ -430,6 +430,8 @@ namespace { SDValue visitSADDO_CARRY(SDNode *N); SDValue visitUADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N); + SDValue visitSADDO_CARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, + SDNode *N); SDValue visitSUBE(SDNode *N); SDValue visitUSUBO_CARRY(SDNode *N); SDValue visitSSUBO_CARRY(SDNode *N); @@ -493,6 +495,7 @@ namespace { SDValue visitFSUB(SDNode *N); SDValue visitFMUL(SDNode *N); template <class MatchContextClass> SDValue visitFMA(SDNode *N); + SDValue visitFMAD(SDNode *N); SDValue visitFDIV(SDNode *N); SDValue visitFREM(SDNode *N); SDValue visitFSQRT(SDNode *N); @@ -502,6 +505,7 @@ namespace { SDValue visitUINT_TO_FP(SDNode *N); SDValue visitFP_TO_SINT(SDNode *N); SDValue visitFP_TO_UINT(SDNode *N); + SDValue visitXRINT(SDNode *N); SDValue visitFP_ROUND(SDNode *N); SDValue visitFP_EXTEND(SDNode *N); SDValue visitFNEG(SDNode *N); @@ -537,6 +541,8 @@ namespace { SDValue visitMSCATTER(SDNode *N); SDValue visitVPGATHER(SDNode *N); SDValue visitVPSCATTER(SDNode *N); + SDValue visitVP_STRIDED_LOAD(SDNode *N); + SDValue visitVP_STRIDED_STORE(SDNode *N); SDValue visitFP_TO_FP16(SDNode *N); SDValue visitFP16_TO_FP(SDNode *N); SDValue visitFP_TO_BF16(SDNode *N); @@ -561,7 +567,7 @@ namespace { SDValue N1, SDNodeFlags Flags); SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, SDValue N1, SDNodeFlags Flags); - SDValue reassociateReduction(unsigned ResOpc, unsigned Opc, const SDLoc &DL, + SDValue reassociateReduction(unsigned RedOpc, unsigned Opc, const SDLoc &DL, EVT VT, SDValue N0, SDValue N1, SDNodeFlags Flags = SDNodeFlags()); @@ -607,6 +613,7 @@ namespace { SDValue CombineExtLoad(SDNode *N); SDValue CombineZExtLogicopShiftLoad(SDNode *N); SDValue combineRepeatedFPDivisors(SDNode *N); + SDValue combineFMulOrFDivWithIntPow2(SDNode *N); SDValue mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex); SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex); SDValue combineInsertEltToLoad(SDNode *N, unsigned InsIndex); @@ -616,7 +623,10 @@ namespace { SDValue BuildUDIV(SDNode *N); SDValue BuildSREMPow2(SDNode *N); SDValue buildOptimizedSREM(SDValue N0, SDValue N1, SDNode *N); - SDValue BuildLogBase2(SDValue V, const SDLoc &DL); + SDValue BuildLogBase2(SDValue V, const SDLoc &DL, + bool KnownNeverZero = false, + bool InexpensiveOnly = false, + std::optional<EVT> OutVT = std::nullopt); SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags); SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags); SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags); @@ -698,6 +708,11 @@ namespace { case ISD::Constant: case ISD::ConstantFP: return StoreSource::Constant; + case ISD::BUILD_VECTOR: + if (ISD::isBuildVectorOfConstantSDNodes(StoreVal.getNode()) || + ISD::isBuildVectorOfConstantFPSDNodes(StoreVal.getNode())) + return StoreSource::Constant; + return StoreSource::Unknown; case ISD::EXTRACT_VECTOR_ELT: case ISD::EXTRACT_SUBVECTOR: return StoreSource::Extract; @@ -1329,6 +1344,30 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, return DAG.getNode(Opc, DL, VT, SDValue(NE, 0), N00); } } + + // Reassociate the operands from (OR/AND (OR/AND(N00, N001)), N1) to (OR/AND + // (OR/AND(N00, N1)), N01) when N00 and N1 are comparisons with the same + // predicate or to (OR/AND (OR/AND(N1, N01)), N00) when N01 and N1 are + // comparisons with the same predicate. This enables optimizations as the + // following one: + // CMP(A,C)||CMP(B,C) => CMP(MIN/MAX(A,B), C) + // CMP(A,C)&&CMP(B,C) => CMP(MIN/MAX(A,B), C) + if (Opc == ISD::AND || Opc == ISD::OR) { + if (N1->getOpcode() == ISD::SETCC && N00->getOpcode() == ISD::SETCC && + N01->getOpcode() == ISD::SETCC) { + ISD::CondCode CC1 = cast<CondCodeSDNode>(N1.getOperand(2))->get(); + ISD::CondCode CC00 = cast<CondCodeSDNode>(N00.getOperand(2))->get(); + ISD::CondCode CC01 = cast<CondCodeSDNode>(N01.getOperand(2))->get(); + if (CC1 == CC00 && CC1 != CC01) { + SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, Flags); + return DAG.getNode(Opc, DL, VT, OpNode, N01, Flags); + } + if (CC1 == CC01 && CC1 != CC00) { + SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N01, N1, Flags); + return DAG.getNode(Opc, DL, VT, OpNode, N00, Flags); + } + } + } } return SDValue(); @@ -1873,6 +1912,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) { } SDValue DAGCombiner::visit(SDNode *N) { + // clang-format off switch (N->getOpcode()) { default: break; case ISD::TokenFactor: return visitTokenFactor(N); @@ -1963,6 +2003,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::FSUB: return visitFSUB(N); case ISD::FMUL: return visitFMUL(N); case ISD::FMA: return visitFMA<EmptyMatchContext>(N); + case ISD::FMAD: return visitFMAD(N); case ISD::FDIV: return visitFDIV(N); case ISD::FREM: return visitFREM(N); case ISD::FSQRT: return visitFSQRT(N); @@ -1972,6 +2013,8 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::UINT_TO_FP: return visitUINT_TO_FP(N); case ISD::FP_TO_SINT: return visitFP_TO_SINT(N); case ISD::FP_TO_UINT: return visitFP_TO_UINT(N); + case ISD::LRINT: + case ISD::LLRINT: return visitXRINT(N); case ISD::FP_ROUND: return visitFP_ROUND(N); case ISD::FP_EXTEND: return visitFP_EXTEND(N); case ISD::FNEG: return visitFNEG(N); @@ -2026,6 +2069,7 @@ SDValue DAGCombiner::visit(SDNode *N) { #include "llvm/IR/VPIntrinsics.def" return visitVPOp(N); } + // clang-format on return SDValue(); } @@ -2124,7 +2168,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { } // Don't simplify token factors if optnone. - if (OptLevel == CodeGenOpt::None) + if (OptLevel == CodeGenOptLevel::None) return SDValue(); // Don't simplify the token factor if the node itself has too many operands. @@ -2649,15 +2693,6 @@ static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) { return SDValue(); } -static bool isADDLike(SDValue V, const SelectionDAG &DAG) { - unsigned Opcode = V.getOpcode(); - if (Opcode == ISD::OR) - return DAG.haveNoCommonBitsSet(V.getOperand(0), V.getOperand(1)); - if (Opcode == ISD::XOR) - return isMinSignedConstant(V.getOperand(1)); - return false; -} - static bool areBitwiseNotOfEachother(SDValue Op0, SDValue Op1) { return (isBitwiseNot(Op0) && Op0.getOperand(0) == Op1) || @@ -2739,7 +2774,7 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) { // iff (or x, c0) is equivalent to (add x, c0). // Fold (add (xor x, c0), c1) -> (add x, (c0 + c1)) // iff (xor x, c0) is equivalent to (add x, c0). - if (isADDLike(N0, DAG)) { + if (DAG.isADDLike(N0)) { SDValue N01 = N0.getOperand(1); if (SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, {N1, N01})) return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add); @@ -2760,7 +2795,7 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) { // Do this optimization only when adding c does not introduce instructions // for adding carries. auto ReassociateAddOr = [&](SDValue N0, SDValue N1) { - if (isADDLike(N0, DAG) && N0.hasOneUse() && + if (DAG.isADDLike(N0) && N0.hasOneUse() && isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) { // If N0's type does not split or is a sign mask, it does not introduce // add carry. @@ -3011,7 +3046,7 @@ SDValue DAGCombiner::visitADDSAT(SDNode *N) { return N0; // If it cannot overflow, transform into an add. - if (DAG.computeOverflowForAdd(IsSigned, N0, N1) == SelectionDAG::OFK_Never) + if (DAG.willNotOverflowAdd(IsSigned, N0, N1)) return DAG.getNode(ISD::ADD, DL, VT, N0, N1); return SDValue(); @@ -3281,11 +3316,16 @@ SDValue DAGCombiner::visitADDO(SDNode *N) { return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT)); // If it cannot overflow, transform into an add. - if (DAG.computeOverflowForAdd(IsSigned, N0, N1) == SelectionDAG::OFK_Never) + if (DAG.willNotOverflowAdd(IsSigned, N0, N1)) return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1), DAG.getConstant(0, DL, CarryVT)); - if (!IsSigned) { + if (IsSigned) { + // fold (saddo (xor a, -1), 1) -> (ssub 0, a). + if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) + return DAG.getNode(ISD::SSUBO, DL, N->getVTList(), + DAG.getConstant(0, DL, VT), N0.getOperand(0)); + } else { // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry. if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) { SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(), @@ -3617,6 +3657,18 @@ SDValue DAGCombiner::visitUADDO_CARRYLike(SDValue N0, SDValue N1, return SDValue(); } +SDValue DAGCombiner::visitSADDO_CARRYLike(SDValue N0, SDValue N1, + SDValue CarryIn, SDNode *N) { + // fold (saddo_carry (xor a, -1), b, c) -> (ssubo_carry b, a, !c) + if (isBitwiseNot(N0)) { + if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) + return DAG.getNode(ISD::SSUBO_CARRY, SDLoc(N), N->getVTList(), N1, + N0.getOperand(0), NotC); + } + + return SDValue(); +} + SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -3636,6 +3688,12 @@ SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) { return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0, N1); } + if (SDValue Combined = visitSADDO_CARRYLike(N0, N1, CarryIn, N)) + return Combined; + + if (SDValue Combined = visitSADDO_CARRYLike(N1, N0, CarryIn, N)) + return Combined; + return SDValue(); } @@ -4141,7 +4199,7 @@ SDValue DAGCombiner::visitSUBSAT(SDNode *N) { return N0; // If it cannot overflow, transform into an sub. - if (DAG.computeOverflowForSub(IsSigned, N0, N1) == SelectionDAG::OFK_Never) + if (DAG.willNotOverflowSub(IsSigned, N0, N1)) return DAG.getNode(ISD::SUB, DL, VT, N0, N1); return SDValue(); @@ -4207,7 +4265,7 @@ SDValue DAGCombiner::visitSUBO(SDNode *N) { return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT)); // If it cannot overflow, transform into an sub. - if (DAG.computeOverflowForSub(IsSigned, N0, N1) == SelectionDAG::OFK_Never) + if (DAG.willNotOverflowSub(IsSigned, N0, N1)) return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1), DAG.getConstant(0, DL, CarryVT)); @@ -4342,12 +4400,12 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { // fold (mul x, (1 << c)) -> x << c if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) && - DAG.isKnownToBeAPowerOfTwo(N1) && (!VT.isVector() || Level <= AfterLegalizeVectorOps)) { - SDValue LogBase2 = BuildLogBase2(N1, DL); - EVT ShiftVT = getShiftAmountTy(N0.getValueType()); - SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT); - return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc); + if (SDValue LogBase2 = BuildLogBase2(N1, DL)) { + EVT ShiftVT = getShiftAmountTy(N0.getValueType()); + SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT); + return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc); + } } // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c @@ -4869,31 +4927,31 @@ SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) { EVT VT = N->getValueType(0); // fold (udiv x, (1 << c)) -> x >>u c - if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) && - DAG.isKnownToBeAPowerOfTwo(N1)) { - SDValue LogBase2 = BuildLogBase2(N1, DL); - AddToWorklist(LogBase2.getNode()); + if (isConstantOrConstantVector(N1, /*NoOpaques*/ true)) { + if (SDValue LogBase2 = BuildLogBase2(N1, DL)) { + AddToWorklist(LogBase2.getNode()); - EVT ShiftVT = getShiftAmountTy(N0.getValueType()); - SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT); - AddToWorklist(Trunc.getNode()); - return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc); + EVT ShiftVT = getShiftAmountTy(N0.getValueType()); + SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT); + AddToWorklist(Trunc.getNode()); + return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc); + } } // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 if (N1.getOpcode() == ISD::SHL) { SDValue N10 = N1.getOperand(0); - if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) && - DAG.isKnownToBeAPowerOfTwo(N10)) { - SDValue LogBase2 = BuildLogBase2(N10, DL); - AddToWorklist(LogBase2.getNode()); + if (isConstantOrConstantVector(N10, /*NoOpaques*/ true)) { + if (SDValue LogBase2 = BuildLogBase2(N10, DL)) { + AddToWorklist(LogBase2.getNode()); - EVT ADDVT = N1.getOperand(1).getValueType(); - SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT); - AddToWorklist(Trunc.getNode()); - SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc); - AddToWorklist(Add.getNode()); - return DAG.getNode(ISD::SRL, DL, VT, N0, Add); + EVT ADDVT = N1.getOperand(1).getValueType(); + SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT); + AddToWorklist(Trunc.getNode()); + SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc); + AddToWorklist(Add.getNode()); + return DAG.getNode(ISD::SRL, DL, VT, N0, Add); + } } } @@ -5111,14 +5169,15 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c) if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) && - DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) { - unsigned NumEltBits = VT.getScalarSizeInBits(); - SDValue LogBase2 = BuildLogBase2(N1, DL); - SDValue SRLAmt = DAG.getNode( - ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2); - EVT ShiftVT = getShiftAmountTy(N0.getValueType()); - SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT); - return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc); + hasOperation(ISD::SRL, VT)) { + if (SDValue LogBase2 = BuildLogBase2(N1, DL)) { + unsigned NumEltBits = VT.getScalarSizeInBits(); + SDValue SRLAmt = DAG.getNode( + ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2); + EVT ShiftVT = getShiftAmountTy(N0.getValueType()); + SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT); + return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc); + } } // If the type twice as wide is legal, transform the mulhu to a wider multiply @@ -5292,6 +5351,10 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); + // Constant fold. + if (isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1)) + return DAG.getNode(ISD::SMUL_LOHI, DL, N->getVTList(), N0, N1); + // canonicalize constant to RHS (vector doesn't have to splat) if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && !DAG.isConstantIntBuildVectorOrConstantInt(N1)) @@ -5330,6 +5393,10 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); + // Constant fold. + if (isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1)) + return DAG.getNode(ISD::UMUL_LOHI, DL, N->getVTList(), N0, N1); + // canonicalize constant to RHS (vector doesn't have to splat) if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && !DAG.isConstantIntBuildVectorOrConstantInt(N1)) @@ -5412,34 +5479,18 @@ SDValue DAGCombiner::visitMULO(SDNode *N) { return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL, N->getVTList(), N0, N0); - if (IsSigned) { - // A 1 bit SMULO overflows if both inputs are 1. - if (VT.getScalarSizeInBits() == 1) { - SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1); - return CombineTo(N, And, - DAG.getSetCC(DL, CarryVT, And, - DAG.getConstant(0, DL, VT), ISD::SETNE)); - } - - // Multiplying n * m significant bits yields a result of n + m significant - // bits. If the total number of significant bits does not exceed the - // result bit width (minus 1), there is no overflow. - unsigned SignBits = DAG.ComputeNumSignBits(N0); - if (SignBits > 1) - SignBits += DAG.ComputeNumSignBits(N1); - if (SignBits > VT.getScalarSizeInBits() + 1) - return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1), - DAG.getConstant(0, DL, CarryVT)); - } else { - KnownBits N1Known = DAG.computeKnownBits(N1); - KnownBits N0Known = DAG.computeKnownBits(N0); - bool Overflow; - (void)N0Known.getMaxValue().umul_ov(N1Known.getMaxValue(), Overflow); - if (!Overflow) - return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1), - DAG.getConstant(0, DL, CarryVT)); + // A 1 bit SMULO overflows if both inputs are 1. + if (IsSigned && VT.getScalarSizeInBits() == 1) { + SDValue And = DAG.getNode(ISD::AND, DL, VT, N0, N1); + SDValue Cmp = DAG.getSetCC(DL, CarryVT, And, + DAG.getConstant(0, DL, VT), ISD::SETNE); + return CombineTo(N, And, Cmp); } + // If it cannot overflow, transform into a mul. + if (DAG.willNotOverflowMul(IsSigned, N0, N1)) + return CombineTo(N, DAG.getNode(ISD::MUL, DL, VT, N0, N1), + DAG.getConstant(0, DL, CarryVT)); return SDValue(); } @@ -5459,12 +5510,12 @@ static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2, if (N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0))) return 0; // The constants need to be the same or a truncated version of each other. - ConstantSDNode *N1C = isConstOrConstSplat(N1); - ConstantSDNode *N3C = isConstOrConstSplat(N3); + ConstantSDNode *N1C = isConstOrConstSplat(peekThroughTruncates(N1)); + ConstantSDNode *N3C = isConstOrConstSplat(peekThroughTruncates(N3)); if (!N1C || !N3C) return 0; - const APInt &C1 = N1C->getAPIntValue(); - const APInt &C2 = N3C->getAPIntValue(); + const APInt &C1 = N1C->getAPIntValue().trunc(N1.getScalarValueSizeInBits()); + const APInt &C2 = N3C->getAPIntValue().trunc(N3.getScalarValueSizeInBits()); if (C1.getBitWidth() < C2.getBitWidth() || C1 != C2.sext(C1.getBitWidth())) return 0; return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0); @@ -5579,7 +5630,7 @@ static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, SelectionDAG &DAG) { // We are looking for UMIN(FPTOUI(X), (2^n)-1), which may have come via a // select/vselect/select_cc. The two operands pairs for the select (N2/N3) may - // be truncated versions of the the setcc (N0/N1). + // be truncated versions of the setcc (N0/N1). if ((N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0))) || N0.getOpcode() != ISD::FP_TO_UINT || CC != ISD::SETULT) @@ -6013,6 +6064,72 @@ SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1, return SDValue(); } +static bool arebothOperandsNotSNan(SDValue Operand1, SDValue Operand2, + SelectionDAG &DAG) { + return DAG.isKnownNeverSNaN(Operand2) && DAG.isKnownNeverSNaN(Operand1); +} + +static bool arebothOperandsNotNan(SDValue Operand1, SDValue Operand2, + SelectionDAG &DAG) { + return DAG.isKnownNeverNaN(Operand2) && DAG.isKnownNeverNaN(Operand1); +} + +static unsigned getMinMaxOpcodeForFP(SDValue Operand1, SDValue Operand2, + ISD::CondCode CC, unsigned OrAndOpcode, + SelectionDAG &DAG, + bool isFMAXNUMFMINNUM_IEEE, + bool isFMAXNUMFMINNUM) { + // The optimization cannot be applied for all the predicates because + // of the way FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle + // NaNs. For FMINNUM_IEEE/FMAXNUM_IEEE, the optimization cannot be + // applied at all if one of the operands is a signaling NaN. + + // It is safe to use FMINNUM_IEEE/FMAXNUM_IEEE if all the operands + // are non NaN values. + if (((CC == ISD::SETLT || CC == ISD::SETLE) && (OrAndOpcode == ISD::OR)) || + ((CC == ISD::SETGT || CC == ISD::SETGE) && (OrAndOpcode == ISD::AND))) + return arebothOperandsNotNan(Operand1, Operand2, DAG) && + isFMAXNUMFMINNUM_IEEE + ? ISD::FMINNUM_IEEE + : ISD::DELETED_NODE; + else if (((CC == ISD::SETGT || CC == ISD::SETGE) && + (OrAndOpcode == ISD::OR)) || + ((CC == ISD::SETLT || CC == ISD::SETLE) && + (OrAndOpcode == ISD::AND))) + return arebothOperandsNotNan(Operand1, Operand2, DAG) && + isFMAXNUMFMINNUM_IEEE + ? ISD::FMAXNUM_IEEE + : ISD::DELETED_NODE; + // Both FMINNUM/FMAXNUM and FMINNUM_IEEE/FMAXNUM_IEEE handle quiet + // NaNs in the same way. But, FMINNUM/FMAXNUM and FMINNUM_IEEE/ + // FMAXNUM_IEEE handle signaling NaNs differently. If we cannot prove + // that there are not any sNaNs, then the optimization is not valid + // for FMINNUM_IEEE/FMAXNUM_IEEE. In the presence of sNaNs, we apply + // the optimization using FMINNUM/FMAXNUM for the following cases. If + // we can prove that we do not have any sNaNs, then we can do the + // optimization using FMINNUM_IEEE/FMAXNUM_IEEE for the following + // cases. + else if (((CC == ISD::SETOLT || CC == ISD::SETOLE) && + (OrAndOpcode == ISD::OR)) || + ((CC == ISD::SETUGT || CC == ISD::SETUGE) && + (OrAndOpcode == ISD::AND))) + return isFMAXNUMFMINNUM ? ISD::FMINNUM + : arebothOperandsNotSNan(Operand1, Operand2, DAG) && + isFMAXNUMFMINNUM_IEEE + ? ISD::FMINNUM_IEEE + : ISD::DELETED_NODE; + else if (((CC == ISD::SETOGT || CC == ISD::SETOGE) && + (OrAndOpcode == ISD::OR)) || + ((CC == ISD::SETULT || CC == ISD::SETULE) && + (OrAndOpcode == ISD::AND))) + return isFMAXNUMFMINNUM ? ISD::FMAXNUM + : arebothOperandsNotSNan(Operand1, Operand2, DAG) && + isFMAXNUMFMINNUM_IEEE + ? ISD::FMAXNUM_IEEE + : ISD::DELETED_NODE; + return ISD::DELETED_NODE; +} + static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) { using AndOrSETCCFoldKind = TargetLowering::AndOrSETCCFoldKind; assert( @@ -6022,7 +6139,8 @@ static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) { // TODO: Search past casts/truncates. SDValue LHS = LogicOp->getOperand(0); SDValue RHS = LogicOp->getOperand(1); - if (LHS->getOpcode() != ISD::SETCC || RHS->getOpcode() != ISD::SETCC) + if (LHS->getOpcode() != ISD::SETCC || RHS->getOpcode() != ISD::SETCC || + !LHS->hasOneUse() || !RHS->hasOneUse()) return SDValue(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -6050,59 +6168,77 @@ static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) { // (LHS0 < LHS1) | (RHS0 < RHS1) -> min(LHS0, RHS0) < LHS1 // and and-cmp-cmp will be replaced with max-cmp sequence: // (LHS0 < LHS1) & (RHS0 < RHS1) -> max(LHS0, RHS0) < LHS1 - if (OpVT.isInteger() && TLI.isOperationLegal(ISD::UMAX, OpVT) && - TLI.isOperationLegal(ISD::SMAX, OpVT) && - TLI.isOperationLegal(ISD::UMIN, OpVT) && - TLI.isOperationLegal(ISD::SMIN, OpVT)) { - if (LHS->getOpcode() == ISD::SETCC && RHS->getOpcode() == ISD::SETCC && - LHS->hasOneUse() && RHS->hasOneUse() && - // The two comparisons should have either the same predicate or the - // predicate of one of the comparisons is the opposite of the other one. - (CCL == CCR || CCL == ISD::getSetCCSwappedOperands(CCR)) && - // The optimization does not work for `==` or `!=` . - !ISD::isIntEqualitySetCC(CCL) && !ISD::isIntEqualitySetCC(CCR)) { - SDValue CommonValue, Operand1, Operand2; - ISD::CondCode CC = ISD::SETCC_INVALID; - if (CCL == CCR) { - if (LHS0 == RHS0) { - CommonValue = LHS0; - Operand1 = LHS1; - Operand2 = RHS1; - CC = ISD::getSetCCSwappedOperands(CCL); - } else if (LHS1 == RHS1) { - CommonValue = LHS1; - Operand1 = LHS0; - Operand2 = RHS0; - CC = CCL; - } - } else { - assert(CCL == ISD::getSetCCSwappedOperands(CCR) && "Unexpected CC"); - if (LHS0 == RHS1) { - CommonValue = LHS0; - Operand1 = LHS1; - Operand2 = RHS0; - CC = ISD::getSetCCSwappedOperands(CCL); - } else if (RHS0 == LHS1) { - CommonValue = LHS1; - Operand1 = LHS0; - Operand2 = RHS1; - CC = CCL; - } + // The optimization does not work for `==` or `!=` . + // The two comparisons should have either the same predicate or the + // predicate of one of the comparisons is the opposite of the other one. + bool isFMAXNUMFMINNUM_IEEE = TLI.isOperationLegal(ISD::FMAXNUM_IEEE, OpVT) && + TLI.isOperationLegal(ISD::FMINNUM_IEEE, OpVT); + bool isFMAXNUMFMINNUM = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, OpVT) && + TLI.isOperationLegalOrCustom(ISD::FMINNUM, OpVT); + if (((OpVT.isInteger() && TLI.isOperationLegal(ISD::UMAX, OpVT) && + TLI.isOperationLegal(ISD::SMAX, OpVT) && + TLI.isOperationLegal(ISD::UMIN, OpVT) && + TLI.isOperationLegal(ISD::SMIN, OpVT)) || + (OpVT.isFloatingPoint() && + (isFMAXNUMFMINNUM_IEEE || isFMAXNUMFMINNUM))) && + !ISD::isIntEqualitySetCC(CCL) && !ISD::isFPEqualitySetCC(CCL) && + CCL != ISD::SETFALSE && CCL != ISD::SETO && CCL != ISD::SETUO && + CCL != ISD::SETTRUE && + (CCL == CCR || CCL == ISD::getSetCCSwappedOperands(CCR))) { + + SDValue CommonValue, Operand1, Operand2; + ISD::CondCode CC = ISD::SETCC_INVALID; + if (CCL == CCR) { + if (LHS0 == RHS0) { + CommonValue = LHS0; + Operand1 = LHS1; + Operand2 = RHS1; + CC = ISD::getSetCCSwappedOperands(CCL); + } else if (LHS1 == RHS1) { + CommonValue = LHS1; + Operand1 = LHS0; + Operand2 = RHS0; + CC = CCL; } + } else { + assert(CCL == ISD::getSetCCSwappedOperands(CCR) && "Unexpected CC"); + if (LHS0 == RHS1) { + CommonValue = LHS0; + Operand1 = LHS1; + Operand2 = RHS0; + CC = CCR; + } else if (RHS0 == LHS1) { + CommonValue = LHS1; + Operand1 = LHS0; + Operand2 = RHS1; + CC = CCL; + } + } - if (CC != ISD::SETCC_INVALID) { - unsigned NewOpcode; - bool IsSigned = isSignedIntSetCC(CC); - if (((CC == ISD::SETLE || CC == ISD::SETULE || CC == ISD::SETLT || - CC == ISD::SETULT) && - (LogicOp->getOpcode() == ISD::OR)) || - ((CC == ISD::SETGE || CC == ISD::SETUGE || CC == ISD::SETGT || - CC == ISD::SETUGT) && - (LogicOp->getOpcode() == ISD::AND))) + // Don't do this transform for sign bit tests. Let foldLogicOfSetCCs + // handle it using OR/AND. + if (CC == ISD::SETLT && isNullOrNullSplat(CommonValue)) + CC = ISD::SETCC_INVALID; + else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CommonValue)) + CC = ISD::SETCC_INVALID; + + if (CC != ISD::SETCC_INVALID) { + unsigned NewOpcode = ISD::DELETED_NODE; + bool IsSigned = isSignedIntSetCC(CC); + if (OpVT.isInteger()) { + bool IsLess = (CC == ISD::SETLE || CC == ISD::SETULE || + CC == ISD::SETLT || CC == ISD::SETULT); + bool IsOr = (LogicOp->getOpcode() == ISD::OR); + if (IsLess == IsOr) NewOpcode = IsSigned ? ISD::SMIN : ISD::UMIN; else NewOpcode = IsSigned ? ISD::SMAX : ISD::UMAX; + } else if (OpVT.isFloatingPoint()) + NewOpcode = + getMinMaxOpcodeForFP(Operand1, Operand2, CC, LogicOp->getOpcode(), + DAG, isFMAXNUMFMINNUM_IEEE, isFMAXNUMFMINNUM); + if (NewOpcode != ISD::DELETED_NODE) { SDValue MinMaxValue = DAG.getNode(NewOpcode, DL, OpVT, Operand1, Operand2); return DAG.getSetCC(DL, VT, MinMaxValue, CommonValue, CC); @@ -6115,8 +6251,7 @@ static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) { if (CCL == CCR && CCL == (LogicOp->getOpcode() == ISD::AND ? ISD::SETNE : ISD::SETEQ) && - LHS0 == RHS0 && LHS1C && RHS1C && OpVT.isInteger() && LHS.hasOneUse() && - RHS.hasOneUse()) { + LHS0 == RHS0 && LHS1C && RHS1C && OpVT.isInteger()) { const APInt &APLhs = LHS1C->getAPIntValue(); const APInt &APRhs = RHS1C->getAPIntValue(); @@ -6179,6 +6314,33 @@ static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) { return SDValue(); } +// Combine `(select c, (X & 1), 0)` -> `(and (zext c), X)`. +// We canonicalize to the `select` form in the middle end, but the `and` form +// gets better codegen and all tested targets (arm, x86, riscv) +static SDValue combineSelectAsExtAnd(SDValue Cond, SDValue T, SDValue F, + const SDLoc &DL, SelectionDAG &DAG) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!isNullConstant(F)) + return SDValue(); + + EVT CondVT = Cond.getValueType(); + if (TLI.getBooleanContents(CondVT) != + TargetLoweringBase::ZeroOrOneBooleanContent) + return SDValue(); + + if (T.getOpcode() != ISD::AND) + return SDValue(); + + if (!isOneConstant(T.getOperand(1))) + return SDValue(); + + EVT OpVT = T.getValueType(); + + SDValue CondMask = + OpVT == CondVT ? Cond : DAG.getBoolExtOrTrunc(Cond, DL, OpVT, CondVT); + return DAG.getNode(ISD::AND, DL, OpVT, CondMask, T.getOperand(0)); +} + /// This contains all DAGCombine rules which reduce two values combined by /// an And operation to a single value. This makes them reusable in the context /// of visitSELECT(). Rules involving constants are not included as @@ -6464,7 +6626,7 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N) { SmallPtrSet<SDNode*, 2> NodesWithConsts; SDNode *FixupNode = nullptr; if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) { - if (Loads.size() == 0) + if (Loads.empty()) return false; LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump()); @@ -6488,12 +6650,17 @@ bool DAGCombiner::BackwardsPropagateMask(SDNode *N) { SDValue Op1 = LogicN->getOperand(1); if (isa<ConstantSDNode>(Op0)) - std::swap(Op0, Op1); + Op0 = + DAG.getNode(ISD::AND, SDLoc(Op0), Op0.getValueType(), Op0, MaskOp); + + if (isa<ConstantSDNode>(Op1)) + Op1 = + DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), Op1, MaskOp); - SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(), - Op1, MaskOp); + if (isa<ConstantSDNode>(Op0) && !isa<ConstantSDNode>(Op1)) + std::swap(Op0, Op1); - DAG.UpdateNodeOperands(LogicN, Op0, And); + DAG.UpdateNodeOperands(LogicN, Op0, Op1); } // Create narrow loads. @@ -6924,12 +7091,23 @@ SDValue DAGCombiner::visitAND(SDNode *N) { N1, /*AllowUndef=*/false, /*AllowTruncation=*/true)) { Constant = C->getAPIntValue(); } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) { + unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits(); APInt SplatValue, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; - bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef, - SplatBitSize, HasAnyUndefs); - if (IsSplat) { + // Endianness should not matter here. Code below makes sure that we only + // use the result if the SplatBitSize is a multiple of the vector element + // size. And after that we AND all element sized parts of the splat + // together. So the end result should be the same regardless of in which + // order we do those operations. + const bool IsBigEndian = false; + bool IsSplat = + Vector->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, + HasAnyUndefs, EltBitWidth, IsBigEndian); + + // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a + // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value. + if (IsSplat && (SplatBitSize % EltBitWidth) == 0) { // Undef bits can contribute to a possible optimisation if set, so // set them. SplatValue |= SplatUndef; @@ -6938,23 +7116,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // the first vector value and FF for the rest, repeating. We need a mask // that will apply equally to all members of the vector, so AND all the // lanes of the constant together. - unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits(); - - // If the splat value has been compressed to a bitlength lower - // than the size of the vector lane, we need to re-expand it to - // the lane size. - if (EltBitWidth > SplatBitSize) - for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth); - SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2) - SplatValue |= SplatValue.shl(SplatBitSize); - - // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a - // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value. - if ((SplatBitSize % EltBitWidth) == 0) { - Constant = APInt::getAllOnes(EltBitWidth); - for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i) - Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth); - } + Constant = APInt::getAllOnes(EltBitWidth); + for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i) + Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth); } } @@ -7467,12 +7631,12 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N0, N1, VT, getShiftAmountTy(VT))) - return BSwap; + return BSwap; // Try again with commuted operands. if (SDValue BSwap = matchBSwapHWordOrAndAnd(TLI, DAG, N, N1, N0, VT, getShiftAmountTy(VT))) - return BSwap; + return BSwap; // Look for either @@ -8493,7 +8657,7 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { /// *ExtractVectorElement using SDByteProvider = ByteProvider<SDNode *>; -static const std::optional<SDByteProvider> +static std::optional<SDByteProvider> calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, std::optional<uint64_t> VectorIndex, unsigned StartingIndex = 0) { @@ -8701,7 +8865,7 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) { // TODO: If there is evidence that running this later would help, this // limitation could be removed. Legality checks may need to be added // for the created store and optional bswap/rotate. - if (LegalOperations || OptLevel == CodeGenOpt::None) + if (LegalOperations || OptLevel == CodeGenOptLevel::None) return SDValue(); // We only handle merging simple stores of 1-4 bytes. @@ -9710,9 +9874,6 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1); } - if (SimplifyDemandedBits(SDValue(N, 0))) - return SDValue(N, 0); - // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2)) if (N0.getOpcode() == ISD::SHL) { auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS, @@ -9886,15 +10047,35 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // Variant of version done on multiply, except mul by a power of 2 is turned // into a shift. if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) && - N0->hasOneUse() && - isConstantOrConstantVector(N1, /* No Opaques */ true) && - isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) && + N0->hasOneUse() && TLI.isDesirableToCommuteWithShift(N, Level)) { + SDValue N01 = N0.getOperand(1); + if (SDValue Shl1 = + DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, {N01, N1})) { + SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1); + AddToWorklist(Shl0.getNode()); + return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1); + } + } + + // fold (shl (sext (add_nsw x, c1)), c2) -> (add (shl (sext x), c2), c1 << c2) + // TODO: Add zext/add_nuw variant with suitable test coverage + // TODO: Should we limit this with isLegalAddImmediate? + if (N0.getOpcode() == ISD::SIGN_EXTEND && + N0.getOperand(0).getOpcode() == ISD::ADD && + N0.getOperand(0)->getFlags().hasNoSignedWrap() && N0->hasOneUse() && + N0.getOperand(0)->hasOneUse() && TLI.isDesirableToCommuteWithShift(N, Level)) { - SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1); - SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1); - AddToWorklist(Shl0.getNode()); - AddToWorklist(Shl1.getNode()); - return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1); + SDValue Add = N0.getOperand(0); + SDLoc DL(N0); + if (SDValue ExtC = DAG.FoldConstantArithmetic(N0.getOpcode(), DL, VT, + {Add.getOperand(1)})) { + if (SDValue ShlC = + DAG.FoldConstantArithmetic(ISD::SHL, DL, VT, {ExtC, N1})) { + SDValue ExtX = DAG.getNode(N0.getOpcode(), DL, VT, Add.getOperand(0)); + SDValue ShlX = DAG.getNode(ISD::SHL, DL, VT, ExtX, N1); + return DAG.getNode(ISD::ADD, DL, VT, ShlX, ShlC); + } + } } // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2) @@ -9910,6 +10091,9 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if (SDValue NewSHL = visitShiftByConstant(N)) return NewSHL; + if (SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)). if (N0.getOpcode() == ISD::VSCALE && N1C) { const APInt &C0 = N0.getConstantOperandAPInt(0); @@ -10110,25 +10294,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; - // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports - // sext_inreg. ConstantSDNode *N1C = isConstOrConstSplat(N1); - if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) { - unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue(); - EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits); - if (VT.isVector()) - ExtVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, - VT.getVectorElementCount()); - if (!LegalOperations || - TLI.getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) == - TargetLowering::Legal) - return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, - N0.getOperand(0), DAG.getValueType(ExtVT)); - // Even if we can't convert to sext_inreg, we might be able to remove - // this shift pair if the input is already sign extended. - if (DAG.ComputeNumSignBits(N0.getOperand(0)) > N1C->getZExtValue()) - return N0.getOperand(0); - } // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2)) // clamp (add c1, c2) to max shift. @@ -10169,7 +10335,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // If truncate is free for the target sext(shl) is likely to result in better // code. if (N0.getOpcode() == ISD::SHL && N1C) { - // Get the two constanst of the shifts, CN0 = m, CN = n. + // Get the two constants of the shifts, CN0 = m, CN = n. const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1)); if (N01C) { LLVMContext &Ctx = *DAG.getContext(); @@ -10640,7 +10806,7 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) { RHS->getMemOperand()->getFlags(), &Fast) && Fast) { SDValue NewPtr = DAG.getMemBasePlusOffset( - RHS->getBasePtr(), TypeSize::Fixed(PtrOff), DL); + RHS->getBasePtr(), TypeSize::getFixed(PtrOff), DL); AddToWorklist(NewPtr.getNode()); SDValue Load = DAG.getLoad( VT, DL, RHS->getChain(), NewPtr, @@ -10739,9 +10905,12 @@ SDValue DAGCombiner::foldABSToABD(SDNode *N) { Op1 = AbsOp1.getOperand(1); unsigned Opc0 = Op0.getOpcode(); + // Check if the operands of the sub are (zero|sign)-extended. + // TODO: Should we use ValueTracking instead? if (Opc0 != Op1.getOpcode() || - (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND)) { + (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND && + Opc0 != ISD::SIGN_EXTEND_INREG)) { // fold (abs (sub nsw x, y)) -> abds(x, y) if (AbsOp1->getFlags().hasNoSignedWrap() && hasOperation(ISD::ABDS, VT) && TLI.preferABDSToABSWithNSW(VT)) { @@ -10751,17 +10920,24 @@ SDValue DAGCombiner::foldABSToABD(SDNode *N) { return SDValue(); } - EVT VT1 = Op0.getOperand(0).getValueType(); - EVT VT2 = Op1.getOperand(0).getValueType(); - unsigned ABDOpcode = (Opc0 == ISD::SIGN_EXTEND) ? ISD::ABDS : ISD::ABDU; + EVT VT0, VT1; + if (Opc0 == ISD::SIGN_EXTEND_INREG) { + VT0 = cast<VTSDNode>(Op0.getOperand(1))->getVT(); + VT1 = cast<VTSDNode>(Op1.getOperand(1))->getVT(); + } else { + VT0 = Op0.getOperand(0).getValueType(); + VT1 = Op1.getOperand(0).getValueType(); + } + unsigned ABDOpcode = (Opc0 == ISD::ZERO_EXTEND) ? ISD::ABDU : ISD::ABDS; // fold abs(sext(x) - sext(y)) -> zext(abds(x, y)) // fold abs(zext(x) - zext(y)) -> zext(abdu(x, y)) - // NOTE: Extensions must be equivalent. - if (VT1 == VT2 && hasOperation(ABDOpcode, VT1)) { - Op0 = Op0.getOperand(0); - Op1 = Op1.getOperand(0); - SDValue ABD = DAG.getNode(ABDOpcode, DL, VT1, Op0, Op1); + EVT MaxVT = VT0.bitsGT(VT1) ? VT0 : VT1; + if ((VT0 == MaxVT || Op0->hasOneUse()) && + (VT1 == MaxVT || Op1->hasOneUse()) && hasOperation(ABDOpcode, MaxVT)) { + SDValue ABD = DAG.getNode(ABDOpcode, DL, MaxVT, + DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op0), + DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op1)); ABD = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, ABD); return DAG.getZExtOrTrunc(ABD, DL, SrcVT); } @@ -11487,6 +11663,9 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { if (SDValue BinOp = foldSelectOfBinops(N)) return BinOp; + if (SDValue R = combineSelectAsExtAnd(N0, N1, N2, DL, DAG)) + return R; + return SDValue(); } @@ -11547,8 +11726,6 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled, SelectionDAG &DAG, const SDLoc &DL) { - if (Index.getOpcode() != ISD::ADD) - return false; // Only perform the transformation when existing operands can be reused. if (IndexIsScaled) @@ -11558,21 +11735,27 @@ bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled, return false; EVT VT = BasePtr.getValueType(); + + if (SDValue SplatVal = DAG.getSplatValue(Index); + SplatVal && !isNullConstant(SplatVal) && + SplatVal.getValueType() == VT) { + BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal); + Index = DAG.getSplat(Index.getValueType(), DL, DAG.getConstant(0, DL, VT)); + return true; + } + + if (Index.getOpcode() != ISD::ADD) + return false; + if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(0)); SplatVal && SplatVal.getValueType() == VT) { - if (isNullConstant(BasePtr)) - BasePtr = SplatVal; - else - BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal); + BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal); Index = Index.getOperand(1); return true; } if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(1)); SplatVal && SplatVal.getValueType() == VT) { - if (isNullConstant(BasePtr)) - BasePtr = SplatVal; - else - BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal); + BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal); Index = Index.getOperand(0); return true; } @@ -11586,10 +11769,9 @@ bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT, // It's always safe to look through zero extends. if (Index.getOpcode() == ISD::ZERO_EXTEND) { - SDValue Op = Index.getOperand(0); - if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType(), DataVT)) { + if (TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) { IndexType = ISD::UNSIGNED_SCALED; - Index = Op; + Index = Index.getOperand(0); return true; } if (ISD::isIndexTypeSigned(IndexType)) { @@ -11600,12 +11782,10 @@ bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT, // It's only safe to look through sign extends when Index is signed. if (Index.getOpcode() == ISD::SIGN_EXTEND && - ISD::isIndexTypeSigned(IndexType)) { - SDValue Op = Index.getOperand(0); - if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType(), DataVT)) { - Index = Op; - return true; - } + ISD::isIndexTypeSigned(IndexType) && + TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) { + Index = Index.getOperand(0); + return true; } return false; @@ -11756,6 +11936,21 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitVP_STRIDED_STORE(SDNode *N) { + auto *SST = cast<VPStridedStoreSDNode>(N); + EVT EltVT = SST->getValue().getValueType().getVectorElementType(); + // Combine strided stores with unit-stride to a regular VP store. + if (auto *CStride = dyn_cast<ConstantSDNode>(SST->getStride()); + CStride && CStride->getZExtValue() == EltVT.getStoreSize()) { + return DAG.getStoreVP(SST->getChain(), SDLoc(N), SST->getValue(), + SST->getBasePtr(), SST->getOffset(), SST->getMask(), + SST->getVectorLength(), SST->getMemoryVT(), + SST->getMemOperand(), SST->getAddressingMode(), + SST->isTruncatingStore(), SST->isCompressingStore()); + } + return SDValue(); +} + SDValue DAGCombiner::visitVPGATHER(SDNode *N) { VPGatherSDNode *MGT = cast<VPGatherSDNode>(N); SDValue Mask = MGT->getMask(); @@ -11843,6 +12038,22 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitVP_STRIDED_LOAD(SDNode *N) { + auto *SLD = cast<VPStridedLoadSDNode>(N); + EVT EltVT = SLD->getValueType(0).getVectorElementType(); + // Combine strided loads with unit-stride to a regular VP load. + if (auto *CStride = dyn_cast<ConstantSDNode>(SLD->getStride()); + CStride && CStride->getZExtValue() == EltVT.getStoreSize()) { + SDValue NewLd = DAG.getLoadVP( + SLD->getAddressingMode(), SLD->getExtensionType(), SLD->getValueType(0), + SDLoc(N), SLD->getChain(), SLD->getBasePtr(), SLD->getOffset(), + SLD->getMask(), SLD->getVectorLength(), SLD->getMemoryVT(), + SLD->getMemOperand(), SLD->isExpandingLoad()); + return CombineTo(N, NewLd, NewLd.getValue(1)); + } + return SDValue(); +} + /// A vector select of 2 constant vectors can be simplified to math/logic to /// avoid a variable select instruction and possibly avoid constant loads. SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) { @@ -12255,27 +12466,132 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) { ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get(); EVT VT = N->getValueType(0); + SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); - SDValue Combined = SimplifySetCC(VT, N->getOperand(0), N->getOperand(1), Cond, - SDLoc(N), !PreferSetCC); + SDValue Combined = SimplifySetCC(VT, N0, N1, Cond, SDLoc(N), !PreferSetCC); - if (!Combined) - return SDValue(); + if (Combined) { + // If we prefer to have a setcc, and we don't, we'll try our best to + // recreate one using rebuildSetCC. + if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) { + SDValue NewSetCC = rebuildSetCC(Combined); - // If we prefer to have a setcc, and we don't, we'll try our best to - // recreate one using rebuildSetCC. - if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) { - SDValue NewSetCC = rebuildSetCC(Combined); - - // We don't have anything interesting to combine to. - if (NewSetCC.getNode() == N) - return SDValue(); + // We don't have anything interesting to combine to. + if (NewSetCC.getNode() == N) + return SDValue(); - if (NewSetCC) - return NewSetCC; + if (NewSetCC) + return NewSetCC; + } + return Combined; } - return Combined; + // Optimize + // 1) (icmp eq/ne (and X, C0), (shift X, C1)) + // or + // 2) (icmp eq/ne X, (rotate X, C1)) + // If C0 is a mask or shifted mask and the shift amt (C1) isolates the + // remaining bits (i.e something like `(x64 & UINT32_MAX) == (x64 >> 32)`) + // Then: + // If C1 is a power of 2, then the rotate and shift+and versions are + // equivilent, so we can interchange them depending on target preference. + // Otherwise, if we have the shift+and version we can interchange srl/shl + // which inturn affects the constant C0. We can use this to get better + // constants again determined by target preference. + if (Cond == ISD::SETNE || Cond == ISD::SETEQ) { + auto IsAndWithShift = [](SDValue A, SDValue B) { + return A.getOpcode() == ISD::AND && + (B.getOpcode() == ISD::SRL || B.getOpcode() == ISD::SHL) && + A.getOperand(0) == B.getOperand(0); + }; + auto IsRotateWithOp = [](SDValue A, SDValue B) { + return (B.getOpcode() == ISD::ROTL || B.getOpcode() == ISD::ROTR) && + B.getOperand(0) == A; + }; + SDValue AndOrOp = SDValue(), ShiftOrRotate = SDValue(); + bool IsRotate = false; + + // Find either shift+and or rotate pattern. + if (IsAndWithShift(N0, N1)) { + AndOrOp = N0; + ShiftOrRotate = N1; + } else if (IsAndWithShift(N1, N0)) { + AndOrOp = N1; + ShiftOrRotate = N0; + } else if (IsRotateWithOp(N0, N1)) { + IsRotate = true; + AndOrOp = N0; + ShiftOrRotate = N1; + } else if (IsRotateWithOp(N1, N0)) { + IsRotate = true; + AndOrOp = N1; + ShiftOrRotate = N0; + } + + if (AndOrOp && ShiftOrRotate && ShiftOrRotate.hasOneUse() && + (IsRotate || AndOrOp.hasOneUse())) { + EVT OpVT = N0.getValueType(); + // Get constant shift/rotate amount and possibly mask (if its shift+and + // variant). + auto GetAPIntValue = [](SDValue Op) -> std::optional<APInt> { + ConstantSDNode *CNode = isConstOrConstSplat(Op, /*AllowUndefs*/ false, + /*AllowTrunc*/ false); + if (CNode == nullptr) + return std::nullopt; + return CNode->getAPIntValue(); + }; + std::optional<APInt> AndCMask = + IsRotate ? std::nullopt : GetAPIntValue(AndOrOp.getOperand(1)); + std::optional<APInt> ShiftCAmt = + GetAPIntValue(ShiftOrRotate.getOperand(1)); + unsigned NumBits = OpVT.getScalarSizeInBits(); + + // We found constants. + if (ShiftCAmt && (IsRotate || AndCMask) && ShiftCAmt->ult(NumBits)) { + unsigned ShiftOpc = ShiftOrRotate.getOpcode(); + // Check that the constants meet the constraints. + bool CanTransform = IsRotate; + if (!CanTransform) { + // Check that mask and shift compliment eachother + CanTransform = *ShiftCAmt == (~*AndCMask).popcount(); + // Check that we are comparing all bits + CanTransform &= (*ShiftCAmt + AndCMask->popcount()) == NumBits; + // Check that the and mask is correct for the shift + CanTransform &= + ShiftOpc == ISD::SHL ? (~*AndCMask).isMask() : AndCMask->isMask(); + } + + // See if target prefers another shift/rotate opcode. + unsigned NewShiftOpc = TLI.preferedOpcodeForCmpEqPiecesOfOperand( + OpVT, ShiftOpc, ShiftCAmt->isPowerOf2(), *ShiftCAmt, AndCMask); + // Transform is valid and we have a new preference. + if (CanTransform && NewShiftOpc != ShiftOpc) { + SDLoc DL(N); + SDValue NewShiftOrRotate = + DAG.getNode(NewShiftOpc, DL, OpVT, ShiftOrRotate.getOperand(0), + ShiftOrRotate.getOperand(1)); + SDValue NewAndOrOp = SDValue(); + + if (NewShiftOpc == ISD::SHL || NewShiftOpc == ISD::SRL) { + APInt NewMask = + NewShiftOpc == ISD::SHL + ? APInt::getHighBitsSet(NumBits, + NumBits - ShiftCAmt->getZExtValue()) + : APInt::getLowBitsSet(NumBits, + NumBits - ShiftCAmt->getZExtValue()); + NewAndOrOp = + DAG.getNode(ISD::AND, DL, OpVT, ShiftOrRotate.getOperand(0), + DAG.getConstant(NewMask, DL, OpVT)); + } else { + NewAndOrOp = ShiftOrRotate.getOperand(0); + } + + return DAG.getSetCC(DL, VT, NewAndOrOp, NewShiftOrRotate, Cond); + } + } + } + } + return SDValue(); } SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) { @@ -12510,7 +12826,7 @@ static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0, if (BothLiveOut) // Both unextended and extended values are live out. There had better be // a good reason for the transformation. - return ExtendNodes.size(); + return !ExtendNodes.empty(); } return true; } @@ -12612,7 +12928,7 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) { LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align, LN0->getMemOperand()->getFlags(), LN0->getAAInfo()); - BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::Fixed(Stride), DL); + BasePtr = DAG.getMemBasePlusOffset(BasePtr, TypeSize::getFixed(Stride), DL); Loads.push_back(SplitLoad.getValue(0)); Chains.push_back(SplitLoad.getValue(1)); @@ -12832,11 +13148,10 @@ static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner, return SDValue(N, 0); // Return N so it doesn't get rechecked! } -static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, - const TargetLowering &TLI, EVT VT, - SDNode *N, SDValue N0, - ISD::LoadExtType ExtLoadType, - ISD::NodeType ExtOpc) { +static SDValue +tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, const TargetLowering &TLI, EVT VT, + bool LegalOperations, SDNode *N, SDValue N0, + ISD::LoadExtType ExtLoadType, ISD::NodeType ExtOpc) { if (!N0.hasOneUse()) return SDValue(); @@ -12844,7 +13159,8 @@ static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD) return SDValue(); - if (!TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0))) + if ((LegalOperations || !cast<MaskedLoadSDNode>(N0)->isSimple()) && + !TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0))) return SDValue(); if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0))) @@ -13117,8 +13433,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { return foldedExt; if (SDValue foldedExt = - tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::SEXTLOAD, - ISD::SIGN_EXTEND)) + tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0, + ISD::SEXTLOAD, ISD::SIGN_EXTEND)) return foldedExt; // fold (sext (load x)) to multiple smaller sextloads. @@ -13181,9 +13497,13 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { return V; // fold (sext x) -> (zext x) if the sign bit is known zero. - if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) && - DAG.SignBitIsZero(N0)) - return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0); + if (!TLI.isSExtCheaperThanZExt(N0.getValueType(), VT) && + (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) && + DAG.SignBitIsZero(N0)) { + SDNodeFlags Flags; + Flags.setNonNeg(true); + return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0, Flags); + } if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N)) return NewVSel; @@ -13327,8 +13647,12 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { N0.getScalarValueSizeInBits(), std::min(Op.getScalarValueSizeInBits(), VT.getScalarSizeInBits())); - if (TruncatedBits.isSubsetOf(Known.Zero)) - return DAG.getZExtOrTrunc(Op, DL, VT); + if (TruncatedBits.isSubsetOf(Known.Zero)) { + SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, DL, VT); + DAG.salvageDebugInfo(*N0.getNode()); + + return ZExtOrTrunc; + } } // fold (zext (truncate x)) -> (and x, mask) @@ -13396,8 +13720,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { return foldedExt; if (SDValue foldedExt = - tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::ZEXTLOAD, - ISD::ZERO_EXTEND)) + tryToFoldExtOfMaskedLoad(DAG, TLI, VT, LegalOperations, N, N0, + ISD::ZEXTLOAD, ISD::ZERO_EXTEND)) return foldedExt; // fold (zext (load x)) to multiple smaller zextloads. @@ -13408,8 +13732,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // fold (zext (and/or/xor (load x), cst)) -> // (and/or/xor (zextload x), (zext cst)) // Unless (and (load x) cst) will match as a zextload already and has - // additional users. - if (ISD::isBitwiseLogicOp(N0.getOpcode()) && + // additional users, or the zext is already free. + if (ISD::isBitwiseLogicOp(N0.getOpcode()) && !TLI.isZExtFree(N0, VT) && isa<LoadSDNode>(N0.getOperand(0)) && N0.getOperand(1).getOpcode() == ISD::Constant && (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { @@ -14005,8 +14329,8 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) { // The original load itself didn't wrap, so an offset within it doesn't. SDNodeFlags Flags; Flags.setNoUnsignedWrap(true); - SDValue NewPtr = DAG.getMemBasePlusOffset(LN0->getBasePtr(), - TypeSize::Fixed(PtrOff), DL, Flags); + SDValue NewPtr = DAG.getMemBasePlusOffset( + LN0->getBasePtr(), TypeSize::getFixed(PtrOff), DL, Flags); AddToWorklist(NewPtr.getNode()); SDValue Load; @@ -14316,9 +14640,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { EVT SrcVT = N0.getValueType(); bool isLE = DAG.getDataLayout().isLittleEndian(); - // noop truncate - if (SrcVT == VT) - return N0; + // trunc(undef) = undef + if (N0.isUndef()) + return DAG.getUNDEF(VT); // fold (truncate (truncate x)) -> (truncate x) if (N0.getOpcode() == ISD::TRUNCATE) @@ -14350,7 +14674,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SDValue X = N0.getOperand(0); SDValue ExtVal = N0.getOperand(1); EVT ExtVT = cast<VTSDNode>(ExtVal)->getVT(); - if (ExtVT.bitsLT(VT)) { + if (ExtVT.bitsLT(VT) && TLI.preferSextInRegOfTruncate(VT, SrcVT, ExtVT)) { SDValue TrX = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X); return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, TrX, ExtVal); } @@ -14448,6 +14772,16 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { return DAG.getBuildVector(VT, DL, TruncOps); } + // trunc (splat_vector x) -> splat_vector (trunc x) + if (N0.getOpcode() == ISD::SPLAT_VECTOR && + (!LegalTypes || TLI.isTypeLegal(VT.getScalarType())) && + (!LegalOperations || TLI.isOperationLegal(ISD::SPLAT_VECTOR, VT))) { + SDLoc DL(N); + EVT SVT = VT.getScalarType(); + return DAG.getSplatVector( + VT, DL, DAG.getNode(ISD::TRUNCATE, DL, SVT, N0->getOperand(0))); + } + // Fold a series of buildvector, bitcast, and truncate if possible. // For example fold // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to @@ -15301,7 +15635,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { DAG.ReplaceAllUsesOfValueWith(FMul, CDE); // Replacing the inner FMul could cause the outer FMA to be simplified // away. - return FMA.getOpcode() == ISD::DELETED_NODE ? SDValue() : FMA; + return FMA.getOpcode() == ISD::DELETED_NODE ? SDValue(N, 0) : FMA; } TmpFMA = TmpFMA->getOperand(2); @@ -15859,7 +16193,8 @@ SDValue DAGCombiner::visitVP_FADD(SDNode *N) { // FADD -> FMA combines: if (SDValue Fused = visitFADDForFMACombine<VPMatchContext>(N)) { - AddToWorklist(Fused.getNode()); + if (Fused.getOpcode() != ISD::DELETED_NODE) + AddToWorklist(Fused.getNode()); return Fused; } return SDValue(); @@ -16051,7 +16386,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // FADD -> FMA combines: if (SDValue Fused = visitFADDForFMACombine<EmptyMatchContext>(N)) { - AddToWorklist(Fused.getNode()); + if (Fused.getOpcode() != ISD::DELETED_NODE) + AddToWorklist(Fused.getNode()); return Fused; } return SDValue(); @@ -16168,6 +16504,112 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { return SDValue(); } +// Transform IEEE Floats: +// (fmul C, (uitofp Pow2)) +// -> (bitcast_to_FP (add (bitcast_to_INT C), Log2(Pow2) << mantissa)) +// (fdiv C, (uitofp Pow2)) +// -> (bitcast_to_FP (sub (bitcast_to_INT C), Log2(Pow2) << mantissa)) +// +// The rationale is fmul/fdiv by a power of 2 is just change the exponent, so +// there is no need for more than an add/sub. +// +// This is valid under the following circumstances: +// 1) We are dealing with IEEE floats +// 2) C is normal +// 3) The fmul/fdiv add/sub will not go outside of min/max exponent bounds. +// TODO: Much of this could also be used for generating `ldexp` on targets the +// prefer it. +SDValue DAGCombiner::combineFMulOrFDivWithIntPow2(SDNode *N) { + EVT VT = N->getValueType(0); + SDValue ConstOp, Pow2Op; + + std::optional<int> Mantissa; + auto GetConstAndPow2Ops = [&](unsigned ConstOpIdx) { + if (ConstOpIdx == 1 && N->getOpcode() == ISD::FDIV) + return false; + + ConstOp = peekThroughBitcasts(N->getOperand(ConstOpIdx)); + Pow2Op = N->getOperand(1 - ConstOpIdx); + if (Pow2Op.getOpcode() != ISD::UINT_TO_FP && + (Pow2Op.getOpcode() != ISD::SINT_TO_FP || + !DAG.computeKnownBits(Pow2Op).isNonNegative())) + return false; + + Pow2Op = Pow2Op.getOperand(0); + + // `Log2(Pow2Op) < Pow2Op.getScalarSizeInBits()`. + // TODO: We could use knownbits to make this bound more precise. + int MaxExpChange = Pow2Op.getValueType().getScalarSizeInBits(); + + auto IsFPConstValid = [N, MaxExpChange, &Mantissa](ConstantFPSDNode *CFP) { + if (CFP == nullptr) + return false; + + const APFloat &APF = CFP->getValueAPF(); + + // Make sure we have normal/ieee constant. + if (!APF.isNormal() || !APF.isIEEE()) + return false; + + // Make sure the floats exponent is within the bounds that this transform + // produces bitwise equals value. + int CurExp = ilogb(APF); + // FMul by pow2 will only increase exponent. + int MinExp = + N->getOpcode() == ISD::FMUL ? CurExp : (CurExp - MaxExpChange); + // FDiv by pow2 will only decrease exponent. + int MaxExp = + N->getOpcode() == ISD::FDIV ? CurExp : (CurExp + MaxExpChange); + if (MinExp <= APFloat::semanticsMinExponent(APF.getSemantics()) || + MaxExp >= APFloat::semanticsMaxExponent(APF.getSemantics())) + return false; + + // Finally make sure we actually know the mantissa for the float type. + int ThisMantissa = APFloat::semanticsPrecision(APF.getSemantics()) - 1; + if (!Mantissa) + Mantissa = ThisMantissa; + + return *Mantissa == ThisMantissa && ThisMantissa > 0; + }; + + // TODO: We may be able to include undefs. + return ISD::matchUnaryFpPredicate(ConstOp, IsFPConstValid); + }; + + if (!GetConstAndPow2Ops(0) && !GetConstAndPow2Ops(1)) + return SDValue(); + + if (!TLI.optimizeFMulOrFDivAsShiftAddBitcast(N, ConstOp, Pow2Op)) + return SDValue(); + + // Get log2 after all other checks have taken place. This is because + // BuildLogBase2 may create a new node. + SDLoc DL(N); + // Get Log2 type with same bitwidth as the float type (VT). + EVT NewIntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits()); + if (VT.isVector()) + NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewIntVT, + VT.getVectorElementCount()); + + SDValue Log2 = BuildLogBase2(Pow2Op, DL, DAG.isKnownNeverZero(Pow2Op), + /*InexpensiveOnly*/ true, NewIntVT); + if (!Log2) + return SDValue(); + + // Perform actual transform. + SDValue MantissaShiftCnt = + DAG.getConstant(*Mantissa, DL, getShiftAmountTy(NewIntVT)); + // TODO: Sometimes Log2 is of form `(X + C)`. `(X + C) << C1` should fold to + // `(X << C1) + (C << C1)`, but that isn't always the case because of the + // cast. We could implement that by handle here to handle the casts. + SDValue Shift = DAG.getNode(ISD::SHL, DL, NewIntVT, Log2, MantissaShiftCnt); + SDValue ResAsInt = + DAG.getNode(N->getOpcode() == ISD::FMUL ? ISD::ADD : ISD::SUB, DL, + NewIntVT, DAG.getBitcast(NewIntVT, ConstOp), Shift); + SDValue ResAsFP = DAG.getBitcast(VT, ResAsInt); + return ResAsFP; +} + SDValue DAGCombiner::visitFMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -16308,6 +16750,11 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { return Fused; } + // Don't do `combineFMulOrFDivWithIntPow2` until after FMUL -> FMA has been + // able to run. + if (SDValue R = combineFMulOrFDivWithIntPow2(N)) + return R; + return SDValue(); } @@ -16438,6 +16885,21 @@ template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitFMAD(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + EVT VT = N->getValueType(0); + SDLoc DL(N); + + // Constant fold FMAD. + if (isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1) && + isa<ConstantFPSDNode>(N2)) + return DAG.getNode(ISD::FMAD, DL, VT, N0, N1, N2); + + return SDValue(); +} + // Combine multiple FDIVs with the same divisor into multiple FMULs by the // reciprocal. // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip) @@ -16659,6 +17121,9 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1); } + if (SDValue R = combineFMulOrFDivWithIntPow2(N)) + return R; + return SDValue(); } @@ -17046,6 +17511,21 @@ SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) { return FoldIntToFPToInt(N, DAG); } +SDValue DAGCombiner::visitXRINT(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (lrint|llrint undef) -> undef + if (N0.isUndef()) + return DAG.getUNDEF(VT); + + // fold (lrint|llrint c1fp) -> c1 + if (DAG.isConstantFPBuildVectorOrConstantFP(N0)) + return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0); + + return SDValue(); +} + SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -17197,6 +17677,7 @@ SDValue DAGCombiner::visitFTRUNC(SDNode *N) { case ISD::FRINT: case ISD::FTRUNC: case ISD::FNEARBYINT: + case ISD::FROUNDEVEN: case ISD::FFLOOR: case ISD::FCEIL: return N0; @@ -17671,6 +18152,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { // can be folded with this one. We should do this to avoid having to keep // a copy of the original base pointer. SmallVector<SDNode *, 16> OtherUses; + constexpr unsigned int MaxSteps = 8192; if (isa<ConstantSDNode>(Offset)) for (SDNode::use_iterator UI = BasePtr->use_begin(), UE = BasePtr->use_end(); @@ -17681,7 +18163,8 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { if (Use.getUser() == Ptr.getNode() || Use != BasePtr) continue; - if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist)) + if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist, + MaxSteps)) continue; if (Use.getUser()->getOpcode() != ISD::ADD && @@ -17714,7 +18197,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { for (SDNode *Use : Ptr->uses()) { if (Use == N) continue; - if (SDNode::hasPredecessorHelper(Use, Visited, Worklist)) + if (SDNode::hasPredecessorHelper(Use, Visited, Worklist, MaxSteps)) return false; // If Ptr may be folded in addressing mode of other use, then it's @@ -17888,12 +18371,13 @@ static SDNode *getPostIndexedLoadStoreOp(SDNode *N, bool &IsLoad, // Check for #2. SmallPtrSet<const SDNode *, 32> Visited; SmallVector<const SDNode *, 8> Worklist; + constexpr unsigned int MaxSteps = 8192; // Ptr is predecessor to both N and Op. Visited.insert(Ptr.getNode()); Worklist.push_back(N); Worklist.push_back(Op); - if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) && - !SDNode::hasPredecessorHelper(Op, Visited, Worklist)) + if (!SDNode::hasPredecessorHelper(N, Visited, Worklist, MaxSteps) && + !SDNode::hasPredecessorHelper(Op, Visited, Worklist, MaxSteps)) return Op; } return nullptr; @@ -18070,7 +18554,7 @@ StoreSDNode *DAGCombiner::getUniqueStoreFeeding(LoadSDNode *LD, } SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) { - if (OptLevel == CodeGenOpt::None || !LD->isSimple()) + if (OptLevel == CodeGenOptLevel::None || !LD->isSimple()) return SDValue(); SDValue Chain = LD->getOperand(0); int64_t Offset; @@ -18270,7 +18754,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { return V; // Try to infer better alignment information than the load already has. - if (OptLevel != CodeGenOpt::None && LD->isUnindexed() && !LD->isAtomic()) { + if (OptLevel != CodeGenOptLevel::None && LD->isUnindexed() && + !LD->isAtomic()) { if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) { if (*Alignment > LD->getAlign() && isAligned(*Alignment, LD->getSrcValueOffset())) { @@ -19006,7 +19491,7 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, SDValue Ptr = St->getBasePtr(); if (StOffset) { SDLoc DL(IVal); - Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(StOffset), DL); + Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(StOffset), DL); } ++OpsNarrowed; @@ -19132,7 +19617,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { return SDValue(); SDValue NewPtr = - DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(PtrOff), SDLoc(LD)); + DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(PtrOff), SDLoc(LD)); SDValue NewLD = DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr, LD->getPointerInfo().getWithOffset(PtrOff), NewAlign, @@ -19305,7 +19790,7 @@ SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes, Chains.push_back(StoreNodes[i].MemNode->getChain()); } - assert(Chains.size() > 0 && "Chain should have generated a chain"); + assert(!Chains.empty() && "Chain should have generated a chain"); return DAG.getTokenFactor(StoreDL, Chains); } @@ -19381,23 +19866,24 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts( for (unsigned I = 0; I != NumStores; ++I) { StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode); SDValue Val = St->getValue(); - // If constant is of the wrong type, convert it now. + // If constant is of the wrong type, convert it now. This comes up + // when one of our stores was truncating. if (MemVT != Val.getValueType()) { Val = peekThroughBitcasts(Val); // Deal with constants of wrong size. if (ElementSizeBits != Val.getValueSizeInBits()) { - EVT IntMemVT = - EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()); - if (isa<ConstantFPSDNode>(Val)) { + auto *C = dyn_cast<ConstantSDNode>(Val); + if (!C) // Not clear how to truncate FP values. + // TODO: Handle truncation of build_vector constants return false; - } - if (auto *C = dyn_cast<ConstantSDNode>(Val)) - Val = DAG.getConstant(C->getAPIntValue() - .zextOrTrunc(Val.getValueSizeInBits()) - .zextOrTrunc(ElementSizeBits), - SDLoc(C), IntMemVT); + EVT IntMemVT = + EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()); + Val = DAG.getConstant(C->getAPIntValue() + .zextOrTrunc(Val.getValueSizeInBits()) + .zextOrTrunc(ElementSizeBits), + SDLoc(C), IntMemVT); } // Make sure correctly size type is the correct type. Val = DAG.getBitcast(MemVT, Val); @@ -19473,6 +19959,10 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts( // If fp truncation is necessary give up for now. if (MemVT.getSizeInBits() != ElementSizeBits) return false; + } else if (ISD::isBuildVectorOfConstantSDNodes(Val.getNode()) || + ISD::isBuildVectorOfConstantFPSDNodes(Val.getNode())) { + // Not yet handled + return false; } else { llvm_unreachable("Invalid constant element type"); } @@ -19603,7 +20093,7 @@ void DAGCombiner::getStoreMergeCandidates( case StoreSource::Constant: if (NoTypeMatch) return false; - if (!isIntOrFPConstant(OtherBC)) + if (getStoreSource(OtherBC) != StoreSource::Constant) return false; break; case StoreSource::Extract: @@ -19825,6 +20315,8 @@ bool DAGCombiner::tryStoreMergeOfConstants( IsElementZero = C->isZero(); else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) IsElementZero = C->getConstantFPValue()->isNullValue(); + else if (ISD::isBuildVectorAllZeros(StoredVal.getNode())) + IsElementZero = true; if (IsElementZero) { if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores) FirstZeroAfterNonZero = i; @@ -20286,7 +20778,7 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes, } bool DAGCombiner::mergeConsecutiveStores(StoreSDNode *St) { - if (OptLevel == CodeGenOpt::None || !EnableStoreMerging) + if (OptLevel == CodeGenOptLevel::None || !EnableStoreMerging) return false; // TODO: Extend this function to merge stores of scalable vectors. @@ -20448,8 +20940,8 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) { Ptr, ST->getMemOperand()); } - if (ST->isSimple() && - TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { + if (ST->isSimple() && TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32) && + !TLI.isFPImmLegal(CFP->getValueAPF(), MVT::f64)) { // Many FP stores are not made apparent until after legalize, e.g. for // argument passing. Since this is so common, custom legalize the // 64-bit integer store into two 32-bit stores. @@ -20464,7 +20956,7 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) { SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(), ST->getOriginalAlign(), MMOFlags, AAInfo); - Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(4), DL); + Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(4), DL); SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(4), ST->getOriginalAlign(), MMOFlags, AAInfo); @@ -20492,9 +20984,11 @@ SDValue DAGCombiner::replaceStoreOfInsertLoad(StoreSDNode *ST) { SDValue Elt = Value.getOperand(1); SDValue Idx = Value.getOperand(2); - // If the element isn't byte sized then we can't compute an offset + // If the element isn't byte sized or is implicitly truncated then we can't + // compute an offset. EVT EltVT = Elt.getValueType(); - if (!EltVT.isByteSized()) + if (!EltVT.isByteSized() || + EltVT != Value.getOperand(0).getValueType().getVectorElementType()) return SDValue(); auto *Ld = dyn_cast<LoadSDNode>(Value.getOperand(0)); @@ -20515,7 +21009,7 @@ SDValue DAGCombiner::replaceStoreOfInsertLoad(StoreSDNode *ST) { EVT PtrVT = Ptr.getValueType(); SDValue Offset = - DAG.getNode(ISD::MUL, DL, PtrVT, Idx, + DAG.getNode(ISD::MUL, DL, PtrVT, DAG.getZExtOrTrunc(Idx, DL, PtrVT), DAG.getConstant(EltVT.getSizeInBits() / 8, DL, PtrVT)); SDValue NewPtr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, Offset); MachinePointerInfo PointerInfo(ST->getAddressSpace()); @@ -20524,7 +21018,7 @@ SDValue DAGCombiner::replaceStoreOfInsertLoad(StoreSDNode *ST) { // info if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx)) { unsigned COffset = CIdx->getSExtValue() * EltVT.getSizeInBits() / 8; - NewPtr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(COffset), DL); + NewPtr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(COffset), DL); PointerInfo = ST->getPointerInfo().getWithOffset(COffset); } @@ -20563,7 +21057,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { return Chain; // Try to infer better alignment information than the store already has. - if (OptLevel != CodeGenOpt::None && ST->isUnindexed() && !ST->isAtomic()) { + if (OptLevel != CodeGenOptLevel::None && ST->isUnindexed() && + !ST->isAtomic()) { if (MaybeAlign Alignment = DAG.InferPtrAlign(Ptr)) { if (*Alignment > ST->getAlign() && isAligned(*Alignment, ST->getSrcValueOffset())) { @@ -20679,7 +21174,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) { if (ST->isUnindexed() && ST->isSimple() && ST1->isUnindexed() && ST1->isSimple()) { - if (OptLevel != CodeGenOpt::None && ST1->getBasePtr() == Ptr && + if (OptLevel != CodeGenOptLevel::None && ST1->getBasePtr() == Ptr && ST1->getValue() == Value && ST->getMemoryVT() == ST1->getMemoryVT() && ST->getAddressSpace() == ST1->getAddressSpace()) { // If this is a store followed by a store with the same value to the @@ -20687,7 +21182,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { return Chain; } - if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() && + if (OptLevel != CodeGenOptLevel::None && ST1->hasOneUse() && !ST1->getBasePtr().isUndef() && ST->getAddressSpace() == ST1->getAddressSpace()) { // If we consider two stores and one smaller in size is a scalable @@ -20700,7 +21195,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { TypeSize::isKnownLE(ST1->getMemoryVT().getStoreSize(), ST->getMemoryVT().getStoreSize())) { CombineTo(ST1, ST1->getChain()); - return SDValue(); + return SDValue(N, 0); } } else { const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG); @@ -20713,7 +21208,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { ChainBase, ST1->getMemoryVT().getFixedSizeInBits())) { CombineTo(ST1, ST1->getChain()); - return SDValue(); + return SDValue(N, 0); } } } @@ -20850,7 +21345,7 @@ SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) { /// } /// SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) { - if (OptLevel == CodeGenOpt::None) + if (OptLevel == CodeGenOptLevel::None) return SDValue(); // Can't change the number of memory accesses for a volatile store or break @@ -20920,7 +21415,8 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) { // Lower value store. SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(), ST->getOriginalAlign(), MMOFlags, AAInfo); - Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(HalfValBitSize / 8), DL); + Ptr = + DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(HalfValBitSize / 8), DL); // Higher value store. SDValue St1 = DAG.getStore( St0, DL, Hi, Ptr, ST->getPointerInfo().getWithOffset(HalfValBitSize / 8), @@ -21687,14 +22183,15 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { if (DAG.isKnownNeverZero(Index)) return DAG.getUNDEF(ScalarVT); - // Check if the result type doesn't match the inserted element type. A - // SCALAR_TO_VECTOR may truncate the inserted element and the - // EXTRACT_VECTOR_ELT may widen the extracted vector. + // Check if the result type doesn't match the inserted element type. + // The inserted element and extracted element may have mismatched bitwidth. + // As a result, EXTRACT_VECTOR_ELT may extend or truncate the extracted vector. SDValue InOp = VecOp.getOperand(0); if (InOp.getValueType() != ScalarVT) { - assert(InOp.getValueType().isInteger() && ScalarVT.isInteger() && - InOp.getValueType().bitsGT(ScalarVT)); - return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, InOp); + assert(InOp.getValueType().isInteger() && ScalarVT.isInteger()); + if (InOp.getValueType().bitsGT(ScalarVT)) + return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, InOp); + return DAG.getNode(ISD::ANY_EXTEND, DL, ScalarVT, InOp); } return InOp; } @@ -21746,6 +22243,19 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { unsigned NumElts = VecVT.getVectorNumElements(); unsigned VecEltBitWidth = VecVT.getScalarSizeInBits(); + // See if the extracted element is constant, in which case fold it if its + // a legal fp immediate. + if (IndexC && ScalarVT.isFloatingPoint()) { + APInt EltMask = APInt::getOneBitSet(NumElts, IndexC->getZExtValue()); + KnownBits KnownElt = DAG.computeKnownBits(VecOp, EltMask); + if (KnownElt.isConstant()) { + APFloat CstFP = + APFloat(DAG.EVTToAPFloatSemantics(ScalarVT), KnownElt.getConstant()); + if (TLI.isFPImmLegal(CstFP, ScalarVT)) + return DAG.getConstantFP(CstFP, DL, ScalarVT); + } + } + // TODO: These transforms should not require the 'hasOneUse' restriction, but // there are regressions on multiple targets without it. We can end up with a // mess of scalar and vector code if we reduce only part of the DAG to scalar. @@ -22108,12 +22618,18 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) { assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector"); + EVT VT = N->getValueType(0); + + // Don't run this before LegalizeTypes if VT is legal. + // Targets may have other preferences. + if (Level < AfterLegalizeTypes && TLI.isTypeLegal(VT)) + return SDValue(); + // Only for little endian if (!DAG.getDataLayout().isLittleEndian()) return SDValue(); SDLoc DL(N); - EVT VT = N->getValueType(0); EVT OutScalarTy = VT.getScalarType(); uint64_t ScalarTypeBitsize = OutScalarTy.getSizeInBits(); @@ -23574,7 +24090,8 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG, // Bail out if the target does not support a narrower version of the binop. EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(), WideNumElts / NarrowingRatio); - if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT)) + if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT, + LegalOperations)) return SDValue(); // If extraction is cheap, we don't need to look at the binop operands @@ -23819,7 +24336,7 @@ static SDValue foldExtractSubvectorFromShuffleVector(SDNode *N, // Profitability check: only deal with extractions from the first subvector // unless the mask becomes an identity mask. - if (!ShuffleVectorInst::isIdentityMask(NewMask) || + if (!ShuffleVectorInst::isIdentityMask(NewMask, NewMask.size()) || any_of(NewMask, [](int M) { return M < 0; })) for (auto &DemandedSubvector : DemandedSubvectors) if (DemandedSubvector.second != 0) @@ -25581,15 +26098,31 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { return N0; // If this is an insert of an extracted vector into an undef vector, we can - // just use the input to the extract. + // just use the input to the extract if the types match, and can simplify + // in some cases even if they don't. if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR && - N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT) - return N1.getOperand(0); + N1.getOperand(1) == N2) { + EVT SrcVT = N1.getOperand(0).getValueType(); + if (SrcVT == VT) + return N1.getOperand(0); + // TODO: To remove the zero check, need to adjust the offset to + // a multiple of the new src type. + if (isNullConstant(N2) && + VT.isScalableVector() == SrcVT.isScalableVector()) { + if (VT.getVectorMinNumElements() >= SrcVT.getVectorMinNumElements()) + return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), + VT, N0, N1.getOperand(0), N2); + else + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), + VT, N1.getOperand(0), N2); + } + } // Simplify scalar inserts into an undef vector: // insert_subvector undef, (splat X), N2 -> splat X if (N0.isUndef() && N1.getOpcode() == ISD::SPLAT_VECTOR) - return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, N1.getOperand(0)); + if (DAG.isConstantValueOfAnyType(N1.getOperand(0)) || N1.hasOneUse()) + return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, N1.getOperand(0)); // If we are inserting a bitcast value into an undef, with the same // number of elements, just use the bitcast input of the extract. @@ -25633,10 +26166,11 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { N1, N2); // Eliminate an intermediate insert into an undef vector: - // insert_subvector undef, (insert_subvector undef, X, 0), N2 --> - // insert_subvector undef, X, N2 + // insert_subvector undef, (insert_subvector undef, X, 0), 0 --> + // insert_subvector undef, X, 0 if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR && - N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2))) + N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)) && + isNullConstant(N2)) return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0, N1.getOperand(1), N2); @@ -25812,6 +26346,14 @@ SDValue DAGCombiner::visitVPOp(SDNode *N) { if (SDValue SD = visitVPSCATTER(N)) return SD; + if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD) + if (SDValue SD = visitVP_STRIDED_LOAD(N)) + return SD; + + if (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE) + if (SDValue SD = visitVP_STRIDED_STORE(N)) + return SD; + // VP operations in which all vector elements are disabled - either by // determining that the mask is all false or that the EVL is 0 - can be // eliminated. @@ -26533,11 +27075,11 @@ SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); - EVT VT = N->getValueType(0); SDLoc DL(N); unsigned BinOpc = N1.getOpcode(); - if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc)) + if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc) || + (N1.getResNo() != N2.getResNo())) return SDValue(); // The use checks are intentionally on SDNode because we may be dealing @@ -26554,26 +27096,29 @@ SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) { // Fold select(cond, binop(x, y), binop(z, y)) // --> binop(select(cond, x, z), y) if (N1.getOperand(1) == N2.getOperand(1)) { - SDValue NewSel = - DAG.getSelect(DL, VT, N0, N1.getOperand(0), N2.getOperand(0)); + SDValue N10 = N1.getOperand(0); + SDValue N20 = N2.getOperand(0); + SDValue NewSel = DAG.getSelect(DL, N10.getValueType(), N0, N10, N20); SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, NewSel, N1.getOperand(1)); NewBinOp->setFlags(N1->getFlags()); NewBinOp->intersectFlagsWith(N2->getFlags()); - return NewBinOp; + return SDValue(NewBinOp.getNode(), N1.getResNo()); } // Fold select(cond, binop(x, y), binop(x, z)) // --> binop(x, select(cond, y, z)) - // Second op VT might be different (e.g. shift amount type) - if (N1.getOperand(0) == N2.getOperand(0) && - VT == N1.getOperand(1).getValueType() && - VT == N2.getOperand(1).getValueType()) { - SDValue NewSel = - DAG.getSelect(DL, VT, N0, N1.getOperand(1), N2.getOperand(1)); - SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel); - NewBinOp->setFlags(N1->getFlags()); - NewBinOp->intersectFlagsWith(N2->getFlags()); - return NewBinOp; + if (N1.getOperand(0) == N2.getOperand(0)) { + SDValue N11 = N1.getOperand(1); + SDValue N21 = N2.getOperand(1); + // Second op VT might be different (e.g. shift amount type) + if (N11.getValueType() == N21.getValueType()) { + SDValue NewSel = DAG.getSelect(DL, N11.getValueType(), N0, N11, N21); + SDValue NewBinOp = + DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel); + NewBinOp->setFlags(N1->getFlags()); + NewBinOp->intersectFlagsWith(N2->getFlags()); + return SDValue(NewBinOp.getNode(), N1.getResNo()); + } } // TODO: Handle isCommutativeBinOp patterns as well? @@ -26722,8 +27267,8 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, if (ConstAndRHS && ConstAndRHS->getAPIntValue().popcount() == 1) { // Shift the tested bit over the sign bit. const APInt &AndMask = ConstAndRHS->getAPIntValue(); - unsigned ShCt = AndMask.getBitWidth() - 1; - if (!TLI.shouldAvoidTransformToShift(VT, ShCt)) { + if (TLI.shouldFoldSelectWithSingleBitTest(VT, AndMask)) { + unsigned ShCt = AndMask.getBitWidth() - 1; SDValue ShlAmt = DAG.getConstant(AndMask.countl_zero(), SDLoc(AndLHS), getShiftAmountTy(AndLHS.getValueType())); @@ -26764,10 +27309,7 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, // zext (setcc n0, n1) if (LegalTypes) { SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC); - if (VT.bitsLT(SCC.getValueType())) - Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT); - else - Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC); + Temp = DAG.getZExtOrTrunc(SCC, SDLoc(N2), VT); } else { SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC); Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC); @@ -26937,10 +27479,129 @@ SDValue DAGCombiner::BuildSREMPow2(SDNode *N) { return SDValue(); } +// This is basically just a port of takeLog2 from InstCombineMulDivRem.cpp +// +// Returns the node that represents `Log2(Op)`. This may create a new node. If +// we are unable to compute `Log2(Op)` its return `SDValue()`. +// +// All nodes will be created at `DL` and the output will be of type `VT`. +// +// This will only return `Log2(Op)` if we can prove `Op` is non-zero. Set +// `AssumeNonZero` if this function should simply assume (not require proving +// `Op` is non-zero). +static SDValue takeInexpensiveLog2(SelectionDAG &DAG, const SDLoc &DL, EVT VT, + SDValue Op, unsigned Depth, + bool AssumeNonZero) { + assert(VT.isInteger() && "Only integer types are supported!"); + + auto PeekThroughCastsAndTrunc = [](SDValue V) { + while (true) { + switch (V.getOpcode()) { + case ISD::TRUNCATE: + case ISD::ZERO_EXTEND: + V = V.getOperand(0); + break; + default: + return V; + } + } + }; + + if (VT.isScalableVector()) + return SDValue(); + + Op = PeekThroughCastsAndTrunc(Op); + + // Helper for determining whether a value is a power-2 constant scalar or a + // vector of such elements. + SmallVector<APInt> Pow2Constants; + auto IsPowerOfTwo = [&Pow2Constants](ConstantSDNode *C) { + if (C->isZero() || C->isOpaque()) + return false; + // TODO: We may also be able to support negative powers of 2 here. + if (C->getAPIntValue().isPowerOf2()) { + Pow2Constants.emplace_back(C->getAPIntValue()); + return true; + } + return false; + }; + + if (ISD::matchUnaryPredicate(Op, IsPowerOfTwo)) { + if (!VT.isVector()) + return DAG.getConstant(Pow2Constants.back().logBase2(), DL, VT); + // We need to create a build vector + SmallVector<SDValue> Log2Ops; + for (const APInt &Pow2 : Pow2Constants) + Log2Ops.emplace_back( + DAG.getConstant(Pow2.logBase2(), DL, VT.getScalarType())); + return DAG.getBuildVector(VT, DL, Log2Ops); + } + + if (Depth >= DAG.MaxRecursionDepth) + return SDValue(); + + auto CastToVT = [&](EVT NewVT, SDValue ToCast) { + ToCast = PeekThroughCastsAndTrunc(ToCast); + EVT CurVT = ToCast.getValueType(); + if (NewVT == CurVT) + return ToCast; + + if (NewVT.getSizeInBits() == CurVT.getSizeInBits()) + return DAG.getBitcast(NewVT, ToCast); + + return DAG.getZExtOrTrunc(ToCast, DL, NewVT); + }; + + // log2(X << Y) -> log2(X) + Y + if (Op.getOpcode() == ISD::SHL) { + // 1 << Y and X nuw/nsw << Y are all non-zero. + if (AssumeNonZero || Op->getFlags().hasNoUnsignedWrap() || + Op->getFlags().hasNoSignedWrap() || isOneConstant(Op.getOperand(0))) + if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0), + Depth + 1, AssumeNonZero)) + return DAG.getNode(ISD::ADD, DL, VT, LogX, + CastToVT(VT, Op.getOperand(1))); + } + + // c ? X : Y -> c ? Log2(X) : Log2(Y) + if ((Op.getOpcode() == ISD::SELECT || Op.getOpcode() == ISD::VSELECT) && + Op.hasOneUse()) { + if (SDValue LogX = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1), + Depth + 1, AssumeNonZero)) + if (SDValue LogY = takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(2), + Depth + 1, AssumeNonZero)) + return DAG.getSelect(DL, VT, Op.getOperand(0), LogX, LogY); + } + + // log2(umin(X, Y)) -> umin(log2(X), log2(Y)) + // log2(umax(X, Y)) -> umax(log2(X), log2(Y)) + if ((Op.getOpcode() == ISD::UMIN || Op.getOpcode() == ISD::UMAX) && + Op.hasOneUse()) { + // Use AssumeNonZero as false here. Otherwise we can hit case where + // log2(umax(X, Y)) != umax(log2(X), log2(Y)) (because overflow). + if (SDValue LogX = + takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(0), Depth + 1, + /*AssumeNonZero*/ false)) + if (SDValue LogY = + takeInexpensiveLog2(DAG, DL, VT, Op.getOperand(1), Depth + 1, + /*AssumeNonZero*/ false)) + return DAG.getNode(Op.getOpcode(), DL, VT, LogX, LogY); + } + + return SDValue(); +} + /// Determines the LogBase2 value for a non-null input value using the /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V). -SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) { - EVT VT = V.getValueType(); +SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL, + bool KnownNonZero, bool InexpensiveOnly, + std::optional<EVT> OutVT) { + EVT VT = OutVT ? *OutVT : V.getValueType(); + SDValue InexpensiveLogBase2 = + takeInexpensiveLog2(DAG, DL, VT, V, /*Depth*/ 0, KnownNonZero); + if (InexpensiveLogBase2 || InexpensiveOnly || !DAG.isKnownToBeAPowerOfTwo(V)) + return InexpensiveLogBase2; + SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V); SDValue Base = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT); SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz); @@ -27328,7 +27989,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, } case ISD::CopyFromReg: - // Always forward past past CopyFromReg. + // Always forward past CopyFromReg. C = C.getOperand(0); return true; @@ -27400,7 +28061,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain /// (aliasing node.) SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { - if (OptLevel == CodeGenOpt::None) + if (OptLevel == CodeGenOptLevel::None) return OldChain; // Ops for replacing token factor. @@ -27410,7 +28071,7 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { GatherAllAliases(N, OldChain, Aliases); // If no operands then chain to entry token. - if (Aliases.size() == 0) + if (Aliases.empty()) return DAG.getEntryNode(); // If a single operand then chain to it. We don't need to revisit it. @@ -27506,7 +28167,7 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) { } // If we didn't find a chained store, exit. - if (ChainedStores.size() == 0) + if (ChainedStores.empty()) return false; // Improve all chained stores (St and ChainedStores members) starting from @@ -27557,7 +28218,7 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) { } bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { - if (OptLevel == CodeGenOpt::None) + if (OptLevel == CodeGenOptLevel::None) return false; const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG); @@ -27585,7 +28246,7 @@ bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) { /// This is the entry point for the file. void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA, - CodeGenOpt::Level OptLevel) { + CodeGenOptLevel OptLevel) { /// This is the main entry point to this class. DAGCombiner(*this, AA, OptLevel).Run(Level); } diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index f0affce7b6b8..a83129586339 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1327,6 +1327,14 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { << *DI << "\n"); return true; } + if (auto SI = FuncInfo.StaticAllocaMap.find(dyn_cast<AllocaInst>(V)); + SI != FuncInfo.StaticAllocaMap.end()) { + MachineOperand FrameIndexOp = MachineOperand::CreateFI(SI->second); + bool IsIndirect = false; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD.getDL(), II, IsIndirect, + FrameIndexOp, Var, Expr); + return true; + } if (Register Reg = lookUpRegForValue(V)) { // FIXME: This does not handle register-indirect values at offset 0. if (!FuncInfo.MF->useDebugInstrRef()) { diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 1d0a03ccfcdc..1128ecfd860d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -64,11 +64,18 @@ static ISD::NodeType getPreferredExtendForValue(const Instruction *I) { // can be exposed. ISD::NodeType ExtendKind = ISD::ANY_EXTEND; unsigned NumOfSigned = 0, NumOfUnsigned = 0; - for (const User *U : I->users()) { - if (const auto *CI = dyn_cast<CmpInst>(U)) { + for (const Use &U : I->uses()) { + if (const auto *CI = dyn_cast<CmpInst>(U.getUser())) { NumOfSigned += CI->isSigned(); NumOfUnsigned += CI->isUnsigned(); } + if (const auto *CallI = dyn_cast<CallBase>(U.getUser())) { + if (!CallI->isArgOperand(&U)) + continue; + unsigned ArgNo = CallI->getArgOperandNo(&U); + NumOfUnsigned += CallI->paramHasAttr(ArgNo, Attribute::ZExt); + NumOfSigned += CallI->paramHasAttr(ArgNo, Attribute::SExt); + } } if (NumOfSigned > NumOfUnsigned) ExtendKind = ISD::SIGN_EXTEND; diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 4e7895c0b3cf..a27febe15db8 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -1311,15 +1311,15 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { unsigned Flags = cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); - const unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + const InlineAsm::Flag F(Flags); + const unsigned NumVals = F.getNumOperandRegisters(); GroupIdx.push_back(MIB->getNumOperands()); MIB.addImm(Flags); ++i; // Skip the ID value. - switch (InlineAsm::getKind(Flags)) { - default: llvm_unreachable("Bad flags!"); - case InlineAsm::Kind_RegDef: + switch (F.getKind()) { + case InlineAsm::Kind::RegDef: for (unsigned j = 0; j != NumVals; ++j, ++i) { Register Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); // FIXME: Add dead flags for physical and virtual registers defined. @@ -1328,8 +1328,8 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, MIB.addReg(Reg, RegState::Define | getImplRegState(Reg.isPhysical())); } break; - case InlineAsm::Kind_RegDefEarlyClobber: - case InlineAsm::Kind_Clobber: + case InlineAsm::Kind::RegDefEarlyClobber: + case InlineAsm::Kind::Clobber: for (unsigned j = 0; j != NumVals; ++j, ++i) { Register Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); MIB.addReg(Reg, RegState::Define | RegState::EarlyClobber | @@ -1337,9 +1337,9 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, ECRegs.push_back(Reg); } break; - case InlineAsm::Kind_RegUse: // Use of register. - case InlineAsm::Kind_Imm: // Immediate. - case InlineAsm::Kind_Mem: // Non-function addressing mode. + case InlineAsm::Kind::RegUse: // Use of register. + case InlineAsm::Kind::Imm: // Immediate. + case InlineAsm::Kind::Mem: // Non-function addressing mode. // The addressing mode has been selected, just add all of the // operands to the machine instruction. for (unsigned j = 0; j != NumVals; ++j, ++i) @@ -1347,9 +1347,9 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, /*IsDebug=*/false, IsClone, IsCloned); // Manually set isTied bits. - if (InlineAsm::getKind(Flags) == InlineAsm::Kind_RegUse) { - unsigned DefGroup = 0; - if (InlineAsm::isUseOperandTiedToDef(Flags, DefGroup)) { + if (F.isRegUseKind()) { + unsigned DefGroup; + if (F.isUseOperandTiedToDef(DefGroup)) { unsigned DefIdx = GroupIdx[DefGroup] + 1; unsigned UseIdx = GroupIdx.back() + 1; for (unsigned j = 0; j != NumVals; ++j) @@ -1357,7 +1357,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, } } break; - case InlineAsm::Kind_Func: // Function addressing mode. + case InlineAsm::Kind::Func: // Function addressing mode. for (unsigned j = 0; j != NumVals; ++j, ++i) { SDValue Op = Node->getOperand(i); AddOperand(MIB, Op, 0, nullptr, VRBaseMap, diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 61fc31715d71..5e1f9fbcdde0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineFunction.h" @@ -324,7 +325,8 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { TLI.isLoadExtLegal(ISD::EXTLOAD, OrigVT, SVT) && TLI.ShouldShrinkFPConstant(OrigVT)) { Type *SType = SVT.getTypeForEVT(*DAG.getContext()); - LLVMC = cast<ConstantFP>(ConstantExpr::getFPTrunc(LLVMC, SType)); + LLVMC = cast<ConstantFP>(ConstantFoldCastOperand( + Instruction::FPTrunc, LLVMC, SType, DAG.getDataLayout())); VT = SVT; Extend = true; } @@ -459,7 +461,8 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { ST->getOriginalAlign(), MMOFlags, AAInfo); } - if (CFP->getValueType(0) == MVT::f64) { + if (CFP->getValueType(0) == MVT::f64 && + !TLI.isFPImmLegal(CFP->getValueAPF(), MVT::f64)) { // If this target supports 64-bit registers, do a single 64-bit store. if (TLI.isTypeLegal(MVT::i64)) { SDValue Con = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). @@ -480,7 +483,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), ST->getOriginalAlign(), MMOFlags, AAInfo); - Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(4), dl); + Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(4), dl); Hi = DAG.getStore(Chain, dl, Hi, Ptr, ST->getPointerInfo().getWithOffset(4), ST->getOriginalAlign(), MMOFlags, AAInfo); @@ -589,7 +592,8 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; - Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(IncrementSize), dl); + Ptr = + DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(IncrementSize), dl); Hi = DAG.getNode( ISD::SRL, dl, Value.getValueType(), Value, DAG.getConstant(RoundWidth, dl, @@ -802,7 +806,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // Load the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; - Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(IncrementSize), dl); + Ptr = + DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(IncrementSize), dl); Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, LD->getOriginalAlign(), MMOFlags, AAInfo); @@ -830,7 +835,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // Load the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; - Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(IncrementSize), dl); + Ptr = + DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(IncrementSize), dl); Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, LD->getOriginalAlign(), MMOFlags, AAInfo); @@ -1007,6 +1013,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other); break; case ISD::SET_FPENV: + case ISD::SET_FPMODE: Action = TLI.getOperationAction(Node->getOpcode(), Node->getOperand(1).getValueType()); break; @@ -1042,7 +1049,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { } case ISD::ATOMIC_STORE: Action = TLI.getOperationAction(Node->getOpcode(), - Node->getOperand(2).getValueType()); + Node->getOperand(1).getValueType()); break; case ISD::SELECT_CC: case ISD::STRICT_FSETCC: @@ -1518,7 +1525,8 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { unsigned Offset = TypeByteSize*i; - SDValue Idx = DAG.getMemBasePlusOffset(FIPtr, TypeSize::Fixed(Offset), dl); + SDValue Idx = + DAG.getMemBasePlusOffset(FIPtr, TypeSize::getFixed(Offset), dl); if (Truncate) Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl, @@ -1580,7 +1588,7 @@ void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State, // Advance the pointer so that the loaded byte will contain the sign bit. unsigned ByteOffset = (NumBits / 8) - 1; IntPtr = - DAG.getMemBasePlusOffset(StackPtr, TypeSize::Fixed(ByteOffset), DL); + DAG.getMemBasePlusOffset(StackPtr, TypeSize::getFixed(ByteOffset), DL); State.IntPointerInfo = MachinePointerInfo::getFixedStack(MF, FI, ByteOffset); } @@ -2250,7 +2258,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, // Also pass the return address of the remainder. SDValue FIPtr = DAG.CreateStackTemporary(RetVT); Entry.Node = FIPtr; - Entry.Ty = RetTy->getPointerTo(); + Entry.Ty = PointerType::getUnqual(RetTy->getContext()); Entry.IsSExt = isSigned; Entry.IsZExt = !isSigned; Args.push_back(Entry); @@ -2341,7 +2349,7 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node, // Pass the return address of sin. SDValue SinPtr = DAG.CreateStackTemporary(RetVT); Entry.Node = SinPtr; - Entry.Ty = RetTy->getPointerTo(); + Entry.Ty = PointerType::getUnqual(RetTy->getContext()); Entry.IsSExt = false; Entry.IsZExt = false; Args.push_back(Entry); @@ -2349,7 +2357,7 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node, // Also pass the return address of the cos. SDValue CosPtr = DAG.CreateStackTemporary(RetVT); Entry.Node = CosPtr; - Entry.Ty = RetTy->getPointerTo(); + Entry.Ty = PointerType::getUnqual(RetTy->getContext()); Entry.IsSExt = false; Entry.IsZExt = false; Args.push_back(Entry); @@ -2649,7 +2657,8 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(SDNode *Node, SDValue Store1 = DAG.getStore(MemChain, dl, Lo, StackSlot, MachinePointerInfo()); // Store the hi of the constructed double. - SDValue HiPtr = DAG.getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), dl); + SDValue HiPtr = + DAG.getMemBasePlusOffset(StackSlot, TypeSize::getFixed(4), dl); SDValue Store2 = DAG.getStore(MemChain, dl, Hi, HiPtr, MachinePointerInfo()); MemChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); @@ -3079,11 +3088,10 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { } case ISD::ATOMIC_STORE: { // There is no libcall for atomic store; fake it with ATOMIC_SWAP. - SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl, - cast<AtomicSDNode>(Node)->getMemoryVT(), - Node->getOperand(0), - Node->getOperand(1), Node->getOperand(2), - cast<AtomicSDNode>(Node)->getMemOperand()); + SDValue Swap = DAG.getAtomic( + ISD::ATOMIC_SWAP, dl, cast<AtomicSDNode>(Node)->getMemoryVT(), + Node->getOperand(0), Node->getOperand(2), Node->getOperand(1), + cast<AtomicSDNode>(Node)->getMemOperand()); Results.push_back(Swap.getValue(1)); break; } @@ -3133,6 +3141,23 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Res.getValue(1)); break; } + case ISD::ATOMIC_LOAD_SUB: { + SDLoc DL(Node); + EVT VT = Node->getValueType(0); + SDValue RHS = Node->getOperand(2); + AtomicSDNode *AN = cast<AtomicSDNode>(Node); + if (RHS->getOpcode() == ISD::SIGN_EXTEND_INREG && + cast<VTSDNode>(RHS->getOperand(1))->getVT() == AN->getMemoryVT()) + RHS = RHS->getOperand(0); + SDValue NewRHS = + DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), RHS); + SDValue Res = DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, AN->getMemoryVT(), + Node->getOperand(0), Node->getOperand(1), + NewRHS, AN->getMemOperand()); + Results.push_back(Res); + Results.push_back(Res.getValue(1)); + break; + } case ISD::DYNAMIC_STACKALLOC: ExpandDYNAMIC_STACKALLOC(Node, Results); break; @@ -3333,7 +3358,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(DAG.expandVACopy(Node)); break; case ISD::EXTRACT_VECTOR_ELT: - if (Node->getOperand(0).getValueType().getVectorNumElements() == 1) + if (Node->getOperand(0).getValueType().getVectorElementCount().isScalar()) // This must be an access of the only element. Return it. Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), Node->getOperand(0)); @@ -3904,6 +3929,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { SDValue Chain = Node->getOperand(0); SDValue Table = Node->getOperand(1); SDValue Index = Node->getOperand(2); + int JTI = cast<JumpTableSDNode>(Table.getNode())->getIndex(); const DataLayout &TD = DAG.getDataLayout(); EVT PTy = TLI.getPointerTy(TD); @@ -3938,7 +3964,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { TLI.getPICJumpTableRelocBase(Table, DAG)); } - Tmp1 = TLI.expandIndirectJTBranch(dl, LD.getValue(1), Addr, DAG); + Tmp1 = TLI.expandIndirectJTBranch(dl, LD.getValue(1), Addr, JTI, DAG); Results.push_back(Tmp1); break; } @@ -4418,6 +4444,10 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64, RTLIB::EXP2_F80, RTLIB::EXP2_F128, RTLIB::EXP2_PPCF128, Results); break; + case ISD::FEXP10: + ExpandFPLibCall(Node, RTLIB::EXP10_F32, RTLIB::EXP10_F64, RTLIB::EXP10_F80, + RTLIB::EXP10_F128, RTLIB::EXP10_PPCF128, Results); + break; case ISD::FTRUNC: case ISD::STRICT_FTRUNC: ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, @@ -4820,6 +4850,46 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { DAG.makeStateFunctionCall(RTLIB::FESETENV, EnvPtr, Chain, dl)); break; } + case ISD::GET_FPMODE: { + // Call fegetmode, which saves control modes into a stack slot. Then load + // the value to return from the stack. + EVT ModeVT = Node->getValueType(0); + SDValue StackPtr = DAG.CreateStackTemporary(ModeVT); + int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); + SDValue Chain = DAG.makeStateFunctionCall(RTLIB::FEGETMODE, StackPtr, + Node->getOperand(0), dl); + SDValue LdInst = DAG.getLoad( + ModeVT, dl, Chain, StackPtr, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI)); + Results.push_back(LdInst); + Results.push_back(LdInst.getValue(1)); + break; + } + case ISD::SET_FPMODE: { + // Move control modes to stack slot and then call fesetmode with the pointer + // to the slot as argument. + SDValue Mode = Node->getOperand(1); + EVT ModeVT = Mode.getValueType(); + SDValue StackPtr = DAG.CreateStackTemporary(ModeVT); + int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); + SDValue StInst = DAG.getStore( + Node->getOperand(0), dl, Mode, StackPtr, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI)); + Results.push_back( + DAG.makeStateFunctionCall(RTLIB::FESETMODE, StackPtr, StInst, dl)); + break; + } + case ISD::RESET_FPMODE: { + // It is legalized to a call 'fesetmode(FE_DFL_MODE)'. On most targets + // FE_DFL_MODE is defined as '((const femode_t *) -1)' in glibc. If not, the + // target must provide custom lowering. + const DataLayout &DL = DAG.getDataLayout(); + EVT PtrTy = TLI.getPointerTy(DL); + SDValue Mode = DAG.getConstant(-1LL, dl, PtrTy); + Results.push_back(DAG.makeStateFunctionCall(RTLIB::FESETMODE, Mode, + Node->getOperand(0), dl)); + break; + } } // Replace the original node with the legalized result. @@ -4961,6 +5031,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { case ISD::SREM: case ISD::UDIV: case ISD::UREM: + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: case ISD::AND: case ISD::OR: case ISD::XOR: { @@ -4977,12 +5051,21 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { break; case ISD::SDIV: case ISD::SREM: + case ISD::SMIN: + case ISD::SMAX: ExtOp = ISD::SIGN_EXTEND; break; case ISD::UDIV: case ISD::UREM: ExtOp = ISD::ZERO_EXTEND; break; + case ISD::UMIN: + case ISD::UMAX: + if (TLI.isSExtCheaperThanZExt(OVT, NVT)) + ExtOp = ISD::SIGN_EXTEND; + else + ExtOp = ISD::ZERO_EXTEND; + break; } TruncOp = ISD::TRUNCATE; } @@ -5104,7 +5187,11 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { unsigned ExtOp = ISD::FP_EXTEND; if (NVT.isInteger()) { ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get(); - ExtOp = isSignedIntSetCC(CCCode) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + if (isSignedIntSetCC(CCCode) || + TLI.isSExtCheaperThanZExt(Node->getOperand(0).getValueType(), NVT)) + ExtOp = ISD::SIGN_EXTEND; + else + ExtOp = ISD::ZERO_EXTEND; } if (Node->isStrictFPOpcode()) { SDValue InChain = Node->getOperand(0); @@ -5261,6 +5348,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { case ISD::FABS: case ISD::FEXP: case ISD::FEXP2: + case ISD::FEXP10: Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); Results.push_back( @@ -5459,6 +5547,23 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Results.push_back(NewAtomic.getValue(1)); break; } + case ISD::SPLAT_VECTOR: { + SDValue Scalar = Node->getOperand(0); + MVT ScalarType = Scalar.getSimpleValueType(); + MVT NewScalarType = NVT.getVectorElementType(); + if (ScalarType.isInteger()) { + Tmp1 = DAG.getNode(ISD::ANY_EXTEND, dl, NewScalarType, Scalar); + Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); + Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp2)); + break; + } + Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NewScalarType, Scalar); + Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); + Results.push_back( + DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp2, + DAG.getIntPtrConstant(0, dl, /*isTarget=*/true))); + break; + } } // Replace the original node with the legalized result. diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 7e035d21ef71..c4605a6b9598 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -49,8 +49,7 @@ static RTLIB::Libcall GetFPLibCall(EVT VT, //===----------------------------------------------------------------------===// void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { - LLVM_DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG); - dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG)); SDValue R = SDValue(); switch (N->getOpcode()) { @@ -88,6 +87,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FEXP: R = SoftenFloatRes_FEXP(N); break; case ISD::STRICT_FEXP2: case ISD::FEXP2: R = SoftenFloatRes_FEXP2(N); break; + case ISD::FEXP10: R = SoftenFloatRes_FEXP10(N); break; case ISD::STRICT_FFLOOR: case ISD::FFLOOR: R = SoftenFloatRes_FFLOOR(N); break; case ISD::STRICT_FLOG: @@ -414,6 +414,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) { RTLIB::EXP2_PPCF128)); } +SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP10(SDNode *N) { + return SoftenFloatRes_Unary( + N, + GetFPLibCall(N->getValueType(0), RTLIB::EXP10_F32, RTLIB::EXP10_F64, + RTLIB::EXP10_F80, RTLIB::EXP10_F128, RTLIB::EXP10_PPCF128)); +} + SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) { return SoftenFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), RTLIB::FLOOR_F32, @@ -890,8 +897,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_VECREDUCE_SEQ(SDNode *N) { //===----------------------------------------------------------------------===// bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { - LLVM_DEBUG(dbgs() << "Soften float operand " << OpNo << ": "; N->dump(&DAG); - dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Soften float operand " << OpNo << ": "; N->dump(&DAG)); SDValue Res = SDValue(); switch (N->getOpcode()) { @@ -1257,7 +1263,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_LLRINT(SDNode *N) { /// have invalid operands or may have other results that need promotion, we just /// know that (at least) one result needs expansion. void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { - LLVM_DEBUG(dbgs() << "Expand float result: "; N->dump(&DAG); dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Expand float result: "; N->dump(&DAG)); SDValue Lo, Hi; Lo = Hi = SDValue(); @@ -1305,6 +1311,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { case ISD::FEXP: ExpandFloatRes_FEXP(N, Lo, Hi); break; case ISD::STRICT_FEXP2: case ISD::FEXP2: ExpandFloatRes_FEXP2(N, Lo, Hi); break; + case ISD::FEXP10: ExpandFloatRes_FEXP10(N, Lo, Hi); break; case ISD::STRICT_FFLOOR: case ISD::FFLOOR: ExpandFloatRes_FFLOOR(N, Lo, Hi); break; case ISD::STRICT_FLOG: @@ -1500,6 +1507,15 @@ void DAGTypeLegalizer::ExpandFloatRes_FEXP2(SDNode *N, RTLIB::EXP2_PPCF128), Lo, Hi); } +void DAGTypeLegalizer::ExpandFloatRes_FEXP10(SDNode *N, SDValue &Lo, + SDValue &Hi) { + ExpandFloatRes_Unary(N, + GetFPLibCall(N->getValueType(0), RTLIB::EXP10_F32, + RTLIB::EXP10_F64, RTLIB::EXP10_F80, + RTLIB::EXP10_F128, RTLIB::EXP10_PPCF128), + Lo, Hi); +} + void DAGTypeLegalizer::ExpandFloatRes_FFLOOR(SDNode *N, SDValue &Lo, SDValue &Hi) { ExpandFloatRes_Unary(N, GetFPLibCall(N->getValueType(0), @@ -1852,7 +1868,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, /// types of the node are known to be legal, but other operands of the node may /// need promotion or expansion as well as the specified one. bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) { - LLVM_DEBUG(dbgs() << "Expand float operand: "; N->dump(&DAG); dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Expand float operand: "; N->dump(&DAG)); SDValue Res = SDValue(); // See if the target wants to custom expand this node. @@ -2166,8 +2182,7 @@ static ISD::NodeType GetPromotionOpcode(EVT OpVT, EVT RetVT) { } bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) { - LLVM_DEBUG(dbgs() << "Promote float operand " << OpNo << ": "; N->dump(&DAG); - dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Promote float operand " << OpNo << ": "; N->dump(&DAG)); SDValue R = SDValue(); if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) { @@ -2180,6 +2195,7 @@ bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) { // to use the promoted float operand. Nodes that produce at least one // promotion-requiring floating point result have their operands legalized as // a part of PromoteFloatResult. + // clang-format off switch (N->getOpcode()) { default: #ifndef NDEBUG @@ -2191,7 +2207,9 @@ bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) { case ISD::BITCAST: R = PromoteFloatOp_BITCAST(N, OpNo); break; case ISD::FCOPYSIGN: R = PromoteFloatOp_FCOPYSIGN(N, OpNo); break; case ISD::FP_TO_SINT: - case ISD::FP_TO_UINT: R = PromoteFloatOp_FP_TO_XINT(N, OpNo); break; + case ISD::FP_TO_UINT: + case ISD::LRINT: + case ISD::LLRINT: R = PromoteFloatOp_UnaryOp(N, OpNo); break; case ISD::FP_TO_SINT_SAT: case ISD::FP_TO_UINT_SAT: R = PromoteFloatOp_FP_TO_XINT_SAT(N, OpNo); break; @@ -2200,6 +2218,7 @@ bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) { case ISD::SETCC: R = PromoteFloatOp_SETCC(N, OpNo); break; case ISD::STORE: R = PromoteFloatOp_STORE(N, OpNo); break; } + // clang-format on if (R.getNode()) ReplaceValueWith(SDValue(N, 0), R); @@ -2233,7 +2252,7 @@ SDValue DAGTypeLegalizer::PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo) { } // Convert the promoted float value to the desired integer type -SDValue DAGTypeLegalizer::PromoteFloatOp_FP_TO_XINT(SDNode *N, unsigned OpNo) { +SDValue DAGTypeLegalizer::PromoteFloatOp_UnaryOp(SDNode *N, unsigned OpNo) { SDValue Op = GetPromotedFloat(N->getOperand(0)); return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), Op); } @@ -2305,8 +2324,7 @@ SDValue DAGTypeLegalizer::PromoteFloatOp_STORE(SDNode *N, unsigned OpNo) { //===----------------------------------------------------------------------===// void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { - LLVM_DEBUG(dbgs() << "Promote float result " << ResNo << ": "; N->dump(&DAG); - dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Promote float result " << ResNo << ": "; N->dump(&DAG)); SDValue R = SDValue(); // See if the target wants to custom expand this node. @@ -2340,6 +2358,7 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { case ISD::FCOS: case ISD::FEXP: case ISD::FEXP2: + case ISD::FEXP10: case ISD::FFLOOR: case ISD::FLOG: case ISD::FLOG2: @@ -2688,7 +2707,7 @@ SDValue DAGTypeLegalizer::BitcastToInt_ATOMIC_SWAP(SDNode *N) { void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) { LLVM_DEBUG(dbgs() << "Soft promote half result " << ResNo << ": "; - N->dump(&DAG); dbgs() << "\n"); + N->dump(&DAG)); SDValue R = SDValue(); // See if the target wants to custom expand this node. @@ -2721,6 +2740,7 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) { case ISD::FCOS: case ISD::FEXP: case ISD::FEXP2: + case ISD::FEXP10: case ISD::FFLOOR: case ISD::FLOG: case ISD::FLOG2: @@ -2754,6 +2774,8 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) { case ISD::FPOWI: case ISD::FLDEXP: R = SoftPromoteHalfRes_ExpOp(N); break; + case ISD::FFREXP: R = SoftPromoteHalfRes_FFREXP(N); break; + case ISD::LOAD: R = SoftPromoteHalfRes_LOAD(N); break; case ISD::SELECT: R = SoftPromoteHalfRes_SELECT(N); break; case ISD::SELECT_CC: R = SoftPromoteHalfRes_SELECT_CC(N); break; @@ -2882,6 +2904,24 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_ExpOp(SDNode *N) { return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res); } +SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FFREXP(SDNode *N) { + EVT OVT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT); + SDValue Op = GetSoftPromotedHalf(N->getOperand(0)); + SDLoc dl(N); + + // Promote to the larger FP type. + Op = DAG.getNode(GetPromotionOpcode(OVT, NVT), dl, NVT, Op); + + SDValue Res = DAG.getNode(N->getOpcode(), dl, + DAG.getVTList(NVT, N->getValueType(1)), Op); + + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + + // Convert back to FP16 as an integer. + return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res); +} + SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FP_ROUND(SDNode *N) { EVT RVT = N->getValueType(0); EVT SVT = N->getOperand(0).getValueType(); @@ -2996,7 +3036,7 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_VECREDUCE_SEQ(SDNode *N) { bool DAGTypeLegalizer::SoftPromoteHalfOperand(SDNode *N, unsigned OpNo) { LLVM_DEBUG(dbgs() << "Soft promote half operand " << OpNo << ": "; - N->dump(&DAG); dbgs() << "\n"); + N->dump(&DAG)); SDValue Res = SDValue(); if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) { diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index df5878fcdf2e..362fa92dd44b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -39,8 +39,7 @@ using namespace llvm; /// may also have invalid operands or may have other results that need /// expansion, we just know that (at least) one result needs promotion. void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { - LLVM_DEBUG(dbgs() << "Promote integer result: "; N->dump(&DAG); - dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Promote integer result: "; N->dump(&DAG)); SDValue Res = SDValue(); // See if the target wants to custom expand this node. @@ -60,14 +59,21 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::AssertSext: Res = PromoteIntRes_AssertSext(N); break; case ISD::AssertZext: Res = PromoteIntRes_AssertZext(N); break; case ISD::BITCAST: Res = PromoteIntRes_BITCAST(N); break; + case ISD::VP_BITREVERSE: case ISD::BITREVERSE: Res = PromoteIntRes_BITREVERSE(N); break; + case ISD::VP_BSWAP: case ISD::BSWAP: Res = PromoteIntRes_BSWAP(N); break; case ISD::BUILD_PAIR: Res = PromoteIntRes_BUILD_PAIR(N); break; case ISD::Constant: Res = PromoteIntRes_Constant(N); break; + case ISD::VP_CTLZ_ZERO_UNDEF: + case ISD::VP_CTLZ: case ISD::CTLZ_ZERO_UNDEF: case ISD::CTLZ: Res = PromoteIntRes_CTLZ(N); break; case ISD::PARITY: + case ISD::VP_CTPOP: case ISD::CTPOP: Res = PromoteIntRes_CTPOP_PARITY(N); break; + case ISD::VP_CTTZ_ZERO_UNDEF: + case ISD::VP_CTTZ: case ISD::CTTZ_ZERO_UNDEF: case ISD::CTTZ: Res = PromoteIntRes_CTTZ(N); break; case ISD::EXTRACT_VECTOR_ELT: @@ -283,12 +289,22 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { Res = PromoteIntRes_FunnelShift(N); break; + case ISD::VP_FSHL: + case ISD::VP_FSHR: + Res = PromoteIntRes_VPFunnelShift(N); + break; + case ISD::IS_FPCLASS: Res = PromoteIntRes_IS_FPCLASS(N); break; case ISD::FFREXP: Res = PromoteIntRes_FFREXP(N); break; + + case ISD::LRINT: + case ISD::LLRINT: + Res = PromoteIntRes_XRINT(N); + break; } // If the result is null then the sub-method took care of registering it. @@ -359,7 +375,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N, N->getMemOperand()); ReplaceValueWith(SDValue(N, 0), Res.getValue(0)); ReplaceValueWith(SDValue(N, 2), Res.getValue(2)); - return Res.getValue(1); + return DAG.getSExtOrTrunc(Res.getValue(1), SDLoc(N), NVT); } // Op2 is used for the comparison and thus must be extended according to the @@ -516,8 +532,15 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) { } unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(); - return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op), - DAG.getShiftAmountConstant(DiffBits, NVT, dl)); + SDValue ShAmt = DAG.getShiftAmountConstant(DiffBits, NVT, dl); + if (N->getOpcode() == ISD::BSWAP) + return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op), + ShAmt); + SDValue Mask = N->getOperand(1); + SDValue EVL = N->getOperand(2); + return DAG.getNode(ISD::VP_LSHR, dl, NVT, + DAG.getNode(ISD::VP_BSWAP, dl, NVT, Op, Mask, EVL), ShAmt, + Mask, EVL); } SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) { @@ -537,9 +560,15 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) { } unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(); - return DAG.getNode(ISD::SRL, dl, NVT, - DAG.getNode(ISD::BITREVERSE, dl, NVT, Op), - DAG.getShiftAmountConstant(DiffBits, NVT, dl)); + SDValue ShAmt = DAG.getShiftAmountConstant(DiffBits, NVT, dl); + if (N->getOpcode() == ISD::BITREVERSE) + return DAG.getNode(ISD::SRL, dl, NVT, + DAG.getNode(ISD::BITREVERSE, dl, NVT, Op), ShAmt); + SDValue Mask = N->getOperand(1); + SDValue EVL = N->getOperand(2); + return DAG.getNode(ISD::VP_LSHR, dl, NVT, + DAG.getNode(ISD::VP_BITREVERSE, dl, NVT, Op, Mask, EVL), + ShAmt, Mask, EVL); } SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) { @@ -584,12 +613,19 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) { // Zero extend to the promoted type and do the count there. SDValue Op = ZExtPromotedInteger(N->getOperand(0)); - Op = DAG.getNode(N->getOpcode(), dl, NVT, Op); + // Subtract off the extra leading bits in the bigger type. - return DAG.getNode( - ISD::SUB, dl, NVT, Op, - DAG.getConstant(NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(), dl, - NVT)); + SDValue ExtractLeadingBits = DAG.getConstant( + NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(), dl, NVT); + if (!N->isVPOpcode()) + return DAG.getNode(ISD::SUB, dl, NVT, + DAG.getNode(N->getOpcode(), dl, NVT, Op), + ExtractLeadingBits); + SDValue Mask = N->getOperand(1); + SDValue EVL = N->getOperand(2); + return DAG.getNode(ISD::VP_SUB, dl, NVT, + DAG.getNode(N->getOpcode(), dl, NVT, Op, Mask, EVL), + ExtractLeadingBits, Mask, EVL); } SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP_PARITY(SDNode *N) { @@ -611,7 +647,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP_PARITY(SDNode *N) { // Zero extend to the promoted type and do the count or parity there. SDValue Op = ZExtPromotedInteger(N->getOperand(0)); - return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op); + if (!N->isVPOpcode()) + return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op); + return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op, + N->getOperand(1), N->getOperand(2)); } SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) { @@ -635,15 +674,23 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) { } } - if (N->getOpcode() == ISD::CTTZ) { + if (N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::VP_CTTZ) { // The count is the same in the promoted type except if the original // value was zero. This can be handled by setting the bit just off // the top of the original type. auto TopBit = APInt::getOneBitSet(NVT.getScalarSizeInBits(), OVT.getScalarSizeInBits()); - Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, dl, NVT)); + if (N->getOpcode() == ISD::CTTZ) + Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, dl, NVT)); + else + Op = + DAG.getNode(ISD::VP_OR, dl, NVT, Op, DAG.getConstant(TopBit, dl, NVT), + N->getOperand(1), N->getOperand(2)); } - return DAG.getNode(N->getOpcode(), dl, NVT, Op); + if (!N->isVPOpcode()) + return DAG.getNode(N->getOpcode(), dl, NVT, Op); + return DAG.getNode(N->getOpcode(), dl, NVT, Op, N->getOperand(1), + N->getOperand(2)); } SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) { @@ -740,6 +787,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_FP16_BF16(SDNode *N) { return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0)); } +SDValue DAGTypeLegalizer::PromoteIntRes_XRINT(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDLoc dl(N); + return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0)); +} + SDValue DAGTypeLegalizer::PromoteIntRes_GET_ROUNDING(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDLoc dl(N); @@ -1366,6 +1419,60 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FunnelShift(SDNode *N) { return DAG.getNode(Opcode, DL, VT, Hi, Lo, Amt); } +// A vp version of PromoteIntRes_FunnelShift. +SDValue DAGTypeLegalizer::PromoteIntRes_VPFunnelShift(SDNode *N) { + SDValue Hi = GetPromotedInteger(N->getOperand(0)); + SDValue Lo = GetPromotedInteger(N->getOperand(1)); + SDValue Amt = N->getOperand(2); + SDValue Mask = N->getOperand(3); + SDValue EVL = N->getOperand(4); + if (getTypeAction(Amt.getValueType()) == TargetLowering::TypePromoteInteger) + Amt = ZExtPromotedInteger(Amt); + EVT AmtVT = Amt.getValueType(); + + SDLoc DL(N); + EVT OldVT = N->getOperand(0).getValueType(); + EVT VT = Lo.getValueType(); + unsigned Opcode = N->getOpcode(); + bool IsFSHR = Opcode == ISD::VP_FSHR; + unsigned OldBits = OldVT.getScalarSizeInBits(); + unsigned NewBits = VT.getScalarSizeInBits(); + + // Amount has to be interpreted modulo the old bit width. + Amt = DAG.getNode(ISD::VP_UREM, DL, AmtVT, Amt, + DAG.getConstant(OldBits, DL, AmtVT), Mask, EVL); + + // If the promoted type is twice the size (or more), then we use the + // traditional funnel 'double' shift codegen. This isn't necessary if the + // shift amount is constant. + // fshl(x,y,z) -> (((aext(x) << bw) | zext(y)) << (z % bw)) >> bw. + // fshr(x,y,z) -> (((aext(x) << bw) | zext(y)) >> (z % bw)). + if (NewBits >= (2 * OldBits) && !isa<ConstantSDNode>(Amt) && + !TLI.isOperationLegalOrCustom(Opcode, VT)) { + SDValue HiShift = DAG.getConstant(OldBits, DL, VT); + Hi = DAG.getNode(ISD::VP_SHL, DL, VT, Hi, HiShift, Mask, EVL); + // FIXME: Replace it by vp operations. + Lo = DAG.getZeroExtendInReg(Lo, DL, OldVT); + SDValue Res = DAG.getNode(ISD::VP_OR, DL, VT, Hi, Lo, Mask, EVL); + Res = DAG.getNode(IsFSHR ? ISD::VP_LSHR : ISD::VP_SHL, DL, VT, Res, Amt, + Mask, EVL); + if (!IsFSHR) + Res = DAG.getNode(ISD::VP_LSHR, DL, VT, Res, HiShift, Mask, EVL); + return Res; + } + + // Shift Lo up to occupy the upper bits of the promoted type. + SDValue ShiftOffset = DAG.getConstant(NewBits - OldBits, DL, AmtVT); + Lo = DAG.getNode(ISD::VP_SHL, DL, VT, Lo, ShiftOffset, Mask, EVL); + + // Increase Amount to shift the result into the lower bits of the promoted + // type. + if (IsFSHR) + Amt = DAG.getNode(ISD::VP_ADD, DL, AmtVT, Amt, ShiftOffset, Mask, EVL); + + return DAG.getNode(Opcode, DL, VT, Hi, Lo, Amt, Mask, EVL); +} + SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Res; @@ -1638,8 +1745,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) { /// result types of the node are known to be legal, but other operands of the /// node may need promotion or expansion as well as the specified one. bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { - LLVM_DEBUG(dbgs() << "Promote integer operand: "; N->dump(&DAG); - dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Promote integer operand: "; N->dump(&DAG)); SDValue Res = SDValue(); if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) { LLVM_DEBUG(dbgs() << "Node has been custom lowered, done\n"); @@ -1721,8 +1827,6 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::FRAMEADDR: case ISD::RETURNADDR: Res = PromoteIntOp_FRAMERETURNADDR(N); break; - case ISD::PREFETCH: Res = PromoteIntOp_PREFETCH(N, OpNo); break; - case ISD::SMULFIX: case ISD::SMULFIXSAT: case ISD::UMULFIX: @@ -1859,9 +1963,9 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ANY_EXTEND(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntOp_ATOMIC_STORE(AtomicSDNode *N) { - SDValue Op2 = GetPromotedInteger(N->getOperand(2)); + SDValue Op1 = GetPromotedInteger(N->getOperand(1)); return DAG.getAtomic(N->getOpcode(), SDLoc(N), N->getMemoryVT(), - N->getChain(), N->getBasePtr(), Op2, N->getMemOperand()); + N->getChain(), Op1, N->getBasePtr(), N->getMemOperand()); } SDValue DAGTypeLegalizer::PromoteIntOp_BITCAST(SDNode *N) { @@ -2236,18 +2340,6 @@ SDValue DAGTypeLegalizer::PromoteIntOp_FRAMERETURNADDR(SDNode *N) { return SDValue(DAG.UpdateNodeOperands(N, Op), 0); } -SDValue DAGTypeLegalizer::PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo) { - assert(OpNo > 1 && "Don't know how to promote this operand!"); - // Promote the rw, locality, and cache type arguments to a supported integer - // width. - SDValue Op2 = ZExtPromotedInteger(N->getOperand(2)); - SDValue Op3 = ZExtPromotedInteger(N->getOperand(3)); - SDValue Op4 = ZExtPromotedInteger(N->getOperand(4)); - return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1), - Op2, Op3, Op4), - 0); -} - SDValue DAGTypeLegalizer::PromoteIntOp_ExpOp(SDNode *N) { bool IsStrict = N->isStrictFPOpcode(); SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); @@ -2466,8 +2558,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_VP_STRIDED(SDNode *N, unsigned OpNo) { /// have invalid operands or may have other results that need promotion, we just /// know that (at least) one result needs expansion. void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { - LLVM_DEBUG(dbgs() << "Expand integer result: "; N->dump(&DAG); - dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Expand integer result: "; N->dump(&DAG)); SDValue Lo, Hi; Lo = Hi = SDValue(); @@ -2512,9 +2603,9 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::CTTZ: ExpandIntRes_CTTZ(N, Lo, Hi); break; case ISD::GET_ROUNDING:ExpandIntRes_GET_ROUNDING(N, Lo, Hi); break; case ISD::STRICT_FP_TO_SINT: - case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break; + case ISD::FP_TO_SINT: case ISD::STRICT_FP_TO_UINT: - case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break; + case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_XINT(N, Lo, Hi); break; case ISD::FP_TO_SINT_SAT: case ISD::FP_TO_UINT_SAT: ExpandIntRes_FP_TO_XINT_SAT(N, Lo, Hi); break; case ISD::STRICT_LROUND: @@ -3591,43 +3682,24 @@ void DAGTypeLegalizer::ExpandIntRes_GET_ROUNDING(SDNode *N, SDValue &Lo, ReplaceValueWith(SDValue(N, 1), Chain); } -void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo, - SDValue &Hi) { - SDLoc dl(N); - EVT VT = N->getValueType(0); - - bool IsStrict = N->isStrictFPOpcode(); - SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); - SDValue Op = N->getOperand(IsStrict ? 1 : 0); - if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) - Op = GetPromotedFloat(Op); - - if (getTypeAction(Op.getValueType()) == TargetLowering::TypeSoftPromoteHalf) { - EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()); - Op = GetSoftPromotedHalf(Op); - Op = DAG.getNode(ISD::FP16_TO_FP, dl, NFPVT, Op); - Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op); - SplitInteger(Op, Lo, Hi); - return; +// Helper for producing an FP_EXTEND/STRICT_FP_EXTEND of Op. +static SDValue fpExtendHelper(SDValue Op, SDValue &Chain, bool IsStrict, EVT VT, + SDLoc DL, SelectionDAG &DAG) { + if (IsStrict) { + Op = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {VT, MVT::Other}, {Chain, Op}); + Chain = Op.getValue(1); + return Op; } - - RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT); - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!"); - TargetLowering::MakeLibCallOptions CallOptions; - CallOptions.setSExt(true); - std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, VT, Op, - CallOptions, dl, Chain); - SplitInteger(Tmp.first, Lo, Hi); - - if (IsStrict) - ReplaceValueWith(SDValue(N, 1), Tmp.second); + return DAG.getNode(ISD::FP_EXTEND, DL, VT, Op); } -void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo, +void DAGTypeLegalizer::ExpandIntRes_FP_TO_XINT(SDNode *N, SDValue &Lo, SDValue &Hi) { SDLoc dl(N); EVT VT = N->getValueType(0); + bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT || + N->getOpcode() == ISD::STRICT_FP_TO_SINT; bool IsStrict = N->isStrictFPOpcode(); SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); SDValue Op = N->getOperand(IsStrict ? 1 : 0); @@ -3635,17 +3707,26 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo, Op = GetPromotedFloat(Op); if (getTypeAction(Op.getValueType()) == TargetLowering::TypeSoftPromoteHalf) { - EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()); + EVT OFPVT = Op.getValueType(); + EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), OFPVT); Op = GetSoftPromotedHalf(Op); - Op = DAG.getNode(ISD::FP16_TO_FP, dl, NFPVT, Op); - Op = DAG.getNode(ISD::FP_TO_UINT, dl, VT, Op); + Op = DAG.getNode(OFPVT == MVT::f16 ? ISD::FP16_TO_FP : ISD::BF16_TO_FP, dl, + NFPVT, Op); + Op = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, VT, Op); SplitInteger(Op, Lo, Hi); return; } - RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT); - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!"); + if (Op.getValueType() == MVT::bf16) { + // Extend to f32 as there is no bf16 libcall. + Op = fpExtendHelper(Op, Chain, IsStrict, MVT::f32, dl, DAG); + } + + RTLIB::Libcall LC = IsSigned ? RTLIB::getFPTOSINT(Op.getValueType(), VT) + : RTLIB::getFPTOUINT(Op.getValueType(), VT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-xint conversion!"); TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setSExt(true); std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, VT, Op, CallOptions, dl, Chain); SplitInteger(Tmp.first, Lo, Hi); @@ -3673,14 +3754,9 @@ void DAGTypeLegalizer::ExpandIntRes_XROUND_XRINT(SDNode *N, SDValue &Lo, EVT VT = Op.getValueType(); if (VT == MVT::f16) { - VT = MVT::f32; // Extend to f32. - if (IsStrict) { - Op = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, { VT, MVT::Other }, {Chain, Op}); - Chain = Op.getValue(1); - } else { - Op = DAG.getNode(ISD::FP_EXTEND, dl, VT, Op); - } + VT = MVT::f32; + Op = fpExtendHelper(Op, Chain, IsStrict, VT, dl, DAG); } RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; @@ -3754,20 +3830,7 @@ void DAGTypeLegalizer::ExpandIntRes_XROUND_XRINT(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi) { - if (N->isAtomic()) { - // It's typical to have larger CAS than atomic load instructions. - SDLoc dl(N); - EVT VT = N->getMemoryVT(); - SDVTList VTs = DAG.getVTList(VT, MVT::i1, MVT::Other); - SDValue Zero = DAG.getConstant(0, dl, VT); - SDValue Swap = DAG.getAtomicCmpSwap( - ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, - VT, VTs, N->getOperand(0), - N->getOperand(1), Zero, Zero, N->getMemOperand()); - ReplaceValueWith(SDValue(N, 0), Swap.getValue(0)); - ReplaceValueWith(SDValue(N, 1), Swap.getValue(2)); - return; - } + assert(!N->isAtomic() && "Should have been a ATOMIC_LOAD?"); if (ISD::isNormalLoad(N)) { ExpandRes_NormalLoad(N, Lo, Hi); @@ -3822,7 +3885,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits()/8; - Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(IncrementSize), dl); + Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(IncrementSize), dl); Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), NEVT, N->getOriginalAlign(), MMOFlags, AAInfo); @@ -3846,7 +3909,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, N->getOriginalAlign(), MMOFlags, AAInfo); // Increment the pointer to the other half. - Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(IncrementSize), dl); + Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(IncrementSize), dl); // Load the rest of the low bits. Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), @@ -4760,7 +4823,7 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, // Also pass the address of the overflow check. Entry.Node = Temp; - Entry.Ty = PtrTy->getPointerTo(); + Entry.Ty = PointerType::getUnqual(PtrTy->getContext()); Entry.IsSExt = true; Entry.IsZExt = false; Args.push_back(Entry); @@ -4988,8 +5051,7 @@ void DAGTypeLegalizer::ExpandIntRes_VSCALE(SDNode *N, SDValue &Lo, /// result types of the node are known to be legal, but other operands of the /// node may need promotion or expansion as well as the specified one. bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { - LLVM_DEBUG(dbgs() << "Expand integer operand: "; N->dump(&DAG); - dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Expand integer operand: "; N->dump(&DAG)); SDValue Res = SDValue(); if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) @@ -5014,11 +5076,11 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break; case ISD::SETCCCARRY: Res = ExpandIntOp_SETCCCARRY(N); break; case ISD::STRICT_SINT_TO_FP: - case ISD::SINT_TO_FP: Res = ExpandIntOp_SINT_TO_FP(N); break; + case ISD::SINT_TO_FP: + case ISD::STRICT_UINT_TO_FP: + case ISD::UINT_TO_FP: Res = ExpandIntOp_XINT_TO_FP(N); break; case ISD::STORE: Res = ExpandIntOp_STORE(cast<StoreSDNode>(N), OpNo); break; case ISD::TRUNCATE: Res = ExpandIntOp_TRUNCATE(N); break; - case ISD::STRICT_UINT_TO_FP: - case ISD::UINT_TO_FP: Res = ExpandIntOp_UINT_TO_FP(N); break; case ISD::SHL: case ISD::SRA: @@ -5067,16 +5129,11 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, GetExpandedInteger(NewRHS, RHSLo, RHSHi); if (CCCode == ISD::SETEQ || CCCode == ISD::SETNE) { - if (RHSLo == RHSHi) { - if (ConstantSDNode *RHSCST = dyn_cast<ConstantSDNode>(RHSLo)) { - if (RHSCST->isAllOnes()) { - // Equality comparison to -1. - NewLHS = DAG.getNode(ISD::AND, dl, - LHSLo.getValueType(), LHSLo, LHSHi); - NewRHS = RHSLo; - return; - } - } + if (RHSLo == RHSHi && isAllOnesConstant(RHSLo)) { + // Equality comparison to -1. + NewLHS = DAG.getNode(ISD::AND, dl, LHSLo.getValueType(), LHSLo, LHSHi); + NewRHS = RHSLo; + return; } NewLHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSLo, RHSLo); @@ -5303,14 +5360,17 @@ SDValue DAGTypeLegalizer::ExpandIntOp_RETURNADDR(SDNode *N) { return SDValue(DAG.UpdateNodeOperands(N, Lo), 0); } -SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) { +SDValue DAGTypeLegalizer::ExpandIntOp_XINT_TO_FP(SDNode *N) { bool IsStrict = N->isStrictFPOpcode(); + bool IsSigned = N->getOpcode() == ISD::SINT_TO_FP || + N->getOpcode() == ISD::STRICT_SINT_TO_FP; SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); SDValue Op = N->getOperand(IsStrict ? 1 : 0); EVT DstVT = N->getValueType(0); - RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT); + RTLIB::Libcall LC = IsSigned ? RTLIB::getSINTTOFP(Op.getValueType(), DstVT) + : RTLIB::getUINTTOFP(Op.getValueType(), DstVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && - "Don't know how to expand this SINT_TO_FP!"); + "Don't know how to expand this XINT_TO_FP!"); TargetLowering::MakeLibCallOptions CallOptions; CallOptions.setSExt(true); std::pair<SDValue, SDValue> Tmp = @@ -5325,16 +5385,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) { } SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { - if (N->isAtomic()) { - // It's typical to have larger CAS than atomic store instructions. - SDLoc dl(N); - SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl, - N->getMemoryVT(), - N->getOperand(0), N->getOperand(2), - N->getOperand(1), - N->getMemOperand()); - return Swap.getValue(1); - } + assert(!N->isAtomic() && "Should have been a ATOMIC_STORE?"); + if (ISD::isNormalStore(N)) return ExpandOp_NormalStore(N, OpNo); @@ -5372,7 +5424,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits()/8; - Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize)); + Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize)); Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), NEVT, N->getOriginalAlign(), MMOFlags, AAInfo); @@ -5407,7 +5459,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { N->getOriginalAlign(), MMOFlags, AAInfo); // Increment the pointer to the other half. - Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize)); + Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize)); // Store the lowest ExcessBits bits in the second half. Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), @@ -5423,34 +5475,12 @@ SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) { return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), InL); } -SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { - bool IsStrict = N->isStrictFPOpcode(); - SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); - SDValue Op = N->getOperand(IsStrict ? 1 : 0); - EVT DstVT = N->getValueType(0); - RTLIB::Libcall LC = RTLIB::getUINTTOFP(Op.getValueType(), DstVT); - assert(LC != RTLIB::UNKNOWN_LIBCALL && - "Don't know how to expand this UINT_TO_FP!"); - TargetLowering::MakeLibCallOptions CallOptions; - CallOptions.setSExt(true); - std::pair<SDValue, SDValue> Tmp = - TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, SDLoc(N), Chain); - - if (!IsStrict) - return Tmp.first; - - ReplaceValueWith(SDValue(N, 1), Tmp.second); - ReplaceValueWith(SDValue(N, 0), Tmp.first); - return SDValue(); -} - SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) { SDLoc dl(N); - SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl, - cast<AtomicSDNode>(N)->getMemoryVT(), - N->getOperand(0), - N->getOperand(1), N->getOperand(2), - cast<AtomicSDNode>(N)->getMemOperand()); + SDValue Swap = + DAG.getAtomic(ISD::ATOMIC_SWAP, dl, cast<AtomicSDNode>(N)->getMemoryVT(), + N->getOperand(0), N->getOperand(2), N->getOperand(1), + cast<AtomicSDNode>(N)->getMemOperand()); return Swap.getValue(1); } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 328939e44dcb..8a93433c5e04 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -235,7 +235,7 @@ bool DAGTypeLegalizer::run() { assert(N->getNodeId() == ReadyToProcess && "Node should be ready if on worklist!"); - LLVM_DEBUG(dbgs() << "Legalizing node: "; N->dump(&DAG)); + LLVM_DEBUG(dbgs() << "\nLegalizing node: "; N->dump(&DAG)); if (IgnoreNodeResults(N)) { LLVM_DEBUG(dbgs() << "Ignoring node results\n"); goto ScanOperands; @@ -390,8 +390,7 @@ ScanOperands: } if (i == NumOperands) { - LLVM_DEBUG(dbgs() << "Legally typed node: "; N->dump(&DAG); - dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Legally typed node: "; N->dump(&DAG)); } } NodeDone: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index db8f61eee606..9d5931b44ac6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -326,6 +326,7 @@ private: SDValue PromoteIntRes_FP_TO_XINT(SDNode *N); SDValue PromoteIntRes_FP_TO_XINT_SAT(SDNode *N); SDValue PromoteIntRes_FP_TO_FP16_BF16(SDNode *N); + SDValue PromoteIntRes_XRINT(SDNode *N); SDValue PromoteIntRes_FREEZE(SDNode *N); SDValue PromoteIntRes_INT_EXTEND(SDNode *N); SDValue PromoteIntRes_LOAD(LoadSDNode *N); @@ -362,6 +363,7 @@ private: SDValue PromoteIntRes_ABS(SDNode *N); SDValue PromoteIntRes_Rotate(SDNode *N); SDValue PromoteIntRes_FunnelShift(SDNode *N); + SDValue PromoteIntRes_VPFunnelShift(SDNode *N); SDValue PromoteIntRes_IS_FPCLASS(SDNode *N); // Integer Operand Promotion. @@ -400,7 +402,6 @@ private: SDValue PromoteIntOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo); SDValue PromoteIntOp_ADDSUBO_CARRY(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_FRAMERETURNADDR(SDNode *N); - SDValue PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_FIX(SDNode *N); SDValue PromoteIntOp_ExpOp(SDNode *N); SDValue PromoteIntOp_VECREDUCE(SDNode *N); @@ -442,8 +443,7 @@ private: void ExpandIntRes_TRUNCATE (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_ZERO_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_GET_ROUNDING (SDNode *N, SDValue &Lo, SDValue &Hi); - void ExpandIntRes_FP_TO_SINT (SDNode *N, SDValue &Lo, SDValue &Hi); - void ExpandIntRes_FP_TO_UINT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_FP_TO_XINT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_FP_TO_XINT_SAT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_XROUND_XRINT (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -494,10 +494,9 @@ private: SDValue ExpandIntOp_SETCC(SDNode *N); SDValue ExpandIntOp_SETCCCARRY(SDNode *N); SDValue ExpandIntOp_Shift(SDNode *N); - SDValue ExpandIntOp_SINT_TO_FP(SDNode *N); SDValue ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo); SDValue ExpandIntOp_TRUNCATE(SDNode *N); - SDValue ExpandIntOp_UINT_TO_FP(SDNode *N); + SDValue ExpandIntOp_XINT_TO_FP(SDNode *N); SDValue ExpandIntOp_RETURNADDR(SDNode *N); SDValue ExpandIntOp_ATOMIC_STORE(SDNode *N); SDValue ExpandIntOp_SPLAT_VECTOR(SDNode *N); @@ -552,6 +551,7 @@ private: SDValue SoftenFloatRes_FDIV(SDNode *N); SDValue SoftenFloatRes_FEXP(SDNode *N); SDValue SoftenFloatRes_FEXP2(SDNode *N); + SDValue SoftenFloatRes_FEXP10(SDNode *N); SDValue SoftenFloatRes_FFLOOR(SDNode *N); SDValue SoftenFloatRes_FLOG(SDNode *N); SDValue SoftenFloatRes_FLOG2(SDNode *N); @@ -633,6 +633,7 @@ private: void ExpandFloatRes_FDIV (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FEXP (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FEXP2 (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FEXP10 (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FFLOOR (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FLOG (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FLOG2 (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -711,7 +712,7 @@ private: SDValue PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo); - SDValue PromoteFloatOp_FP_TO_XINT(SDNode *N, unsigned OpNo); + SDValue PromoteFloatOp_UnaryOp(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_FP_TO_XINT_SAT(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_STORE(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_SELECT_CC(SDNode *N, unsigned OpNo); @@ -737,6 +738,7 @@ private: SDValue SoftPromoteHalfRes_FCOPYSIGN(SDNode *N); SDValue SoftPromoteHalfRes_FMAD(SDNode *N); SDValue SoftPromoteHalfRes_ExpOp(SDNode *N); + SDValue SoftPromoteHalfRes_FFREXP(SDNode *N); SDValue SoftPromoteHalfRes_FP_ROUND(SDNode *N); SDValue SoftPromoteHalfRes_LOAD(SDNode *N); SDValue SoftPromoteHalfRes_SELECT(SDNode *N); @@ -888,6 +890,7 @@ private: void SplitVecRes_VECTOR_INTERLEAVE(SDNode *N); void SplitVecRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_FP_TO_XINT_SAT(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_VP_REVERSE(SDNode *N, SDValue &Lo, SDValue &Hi); // Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>. bool SplitVectorOperand(SDNode *N, unsigned OpNo); @@ -986,6 +989,7 @@ private: SDValue WidenVecRes_Convert(SDNode *N); SDValue WidenVecRes_Convert_StrictFP(SDNode *N); SDValue WidenVecRes_FP_TO_XINT_SAT(SDNode *N); + SDValue WidenVecRes_XRINT(SDNode *N); SDValue WidenVecRes_FCOPYSIGN(SDNode *N); SDValue WidenVecRes_IS_FPCLASS(SDNode *N); SDValue WidenVecRes_ExpOp(SDNode *N); @@ -1000,6 +1004,7 @@ private: SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue WidenVecOp_INSERT_SUBVECTOR(SDNode *N); SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N); + SDValue WidenVecOp_EXTEND_VECTOR_INREG(SDNode *N); SDValue WidenVecOp_STORE(SDNode* N); SDValue WidenVecOp_VP_STORE(SDNode *N, unsigned OpNo); SDValue WidenVecOp_VP_STRIDED_STORE(SDNode *N, unsigned OpNo); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 296242c00401..a55364ea2c4e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -176,7 +176,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { // Increment the pointer to the other half. unsigned IncrementSize = NOutVT.getSizeInBits() / 8; StackPtr = - DAG.getMemBasePlusOffset(StackPtr, TypeSize::Fixed(IncrementSize), dl); + DAG.getMemBasePlusOffset(StackPtr, TypeSize::getFixed(IncrementSize), dl); // Load the second half from the stack slot. Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, @@ -265,7 +265,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits() / 8; - Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(IncrementSize), dl); + Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(IncrementSize), dl); Hi = DAG.getLoad( NVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), LD->getOriginalAlign(), LD->getMemOperand()->getFlags(), AAInfo); @@ -479,7 +479,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { St->getOriginalAlign(), St->getMemOperand()->getFlags(), AAInfo); - Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize)); + Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize)); Hi = DAG.getStore( Chain, dl, Hi, Ptr, St->getPointerInfo().getWithOffset(IncrementSize), St->getOriginalAlign(), St->getMemOperand()->getFlags(), AAInfo); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 3862fd241897..1fbd6322f9ed 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -166,6 +166,21 @@ class VectorLegalizer { /// truncated back to the original type. void PromoteFP_TO_INT(SDNode *Node, SmallVectorImpl<SDValue> &Results); + /// Implements vector reduce operation promotion. + /// + /// All vector operands are promoted to a vector type with larger element + /// type, and the start value is promoted to a larger scalar type. Then the + /// result is truncated back to the original scalar type. + void PromoteReduction(SDNode *Node, SmallVectorImpl<SDValue> &Results); + + /// Implements vector setcc operation promotion. + /// + /// All vector operands are promoted to a vector type with larger element + /// type. + void PromoteSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results); + + void PromoteSTRICT(SDNode *Node, SmallVectorImpl<SDValue> &Results); + public: VectorLegalizer(SelectionDAG& dag) : DAG(dag), TLI(dag.getTargetLoweringInfo()) {} @@ -385,9 +400,12 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FLOG10: case ISD::FEXP: case ISD::FEXP2: + case ISD::FEXP10: case ISD::FCEIL: case ISD::FTRUNC: case ISD::FRINT: + case ISD::LRINT: + case ISD::LLRINT: case ISD::FNEARBYINT: case ISD::FROUND: case ISD::FROUNDEVEN: @@ -551,6 +569,116 @@ bool VectorLegalizer::LowerOperationWrapper(SDNode *Node, return true; } +void VectorLegalizer::PromoteReduction(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { + MVT VecVT = Node->getOperand(1).getSimpleValueType(); + MVT NewVecVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VecVT); + MVT ScalarVT = Node->getSimpleValueType(0); + MVT NewScalarVT = NewVecVT.getVectorElementType(); + + SDLoc DL(Node); + SmallVector<SDValue, 4> Operands(Node->getNumOperands()); + + // promote the initial value. + if (Node->getOperand(0).getValueType().isFloatingPoint()) + Operands[0] = + DAG.getNode(ISD::FP_EXTEND, DL, NewScalarVT, Node->getOperand(0)); + else + Operands[0] = + DAG.getNode(ISD::ANY_EXTEND, DL, NewScalarVT, Node->getOperand(0)); + + for (unsigned j = 1; j != Node->getNumOperands(); ++j) + if (Node->getOperand(j).getValueType().isVector() && + !(ISD::isVPOpcode(Node->getOpcode()) && + ISD::getVPMaskIdx(Node->getOpcode()) == j)) // Skip mask operand. + // promote the vector operand. + if (Node->getOperand(j).getValueType().isFloatingPoint()) + Operands[j] = + DAG.getNode(ISD::FP_EXTEND, DL, NewVecVT, Node->getOperand(j)); + else + Operands[j] = + DAG.getNode(ISD::ANY_EXTEND, DL, NewVecVT, Node->getOperand(j)); + else + Operands[j] = Node->getOperand(j); // Skip VL operand. + + SDValue Res = DAG.getNode(Node->getOpcode(), DL, NewScalarVT, Operands, + Node->getFlags()); + + if (ScalarVT.isFloatingPoint()) + Res = DAG.getNode(ISD::FP_ROUND, DL, ScalarVT, Res, + DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)); + else + Res = DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, Res); + + Results.push_back(Res); +} + +void VectorLegalizer::PromoteSETCC(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { + MVT VecVT = Node->getOperand(0).getSimpleValueType(); + MVT NewVecVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VecVT); + + unsigned ExtOp = VecVT.isFloatingPoint() ? ISD::FP_EXTEND : ISD::ANY_EXTEND; + + SDLoc DL(Node); + SmallVector<SDValue, 5> Operands(Node->getNumOperands()); + + Operands[0] = DAG.getNode(ExtOp, DL, NewVecVT, Node->getOperand(0)); + Operands[1] = DAG.getNode(ExtOp, DL, NewVecVT, Node->getOperand(1)); + Operands[2] = Node->getOperand(2); + + if (Node->getOpcode() == ISD::VP_SETCC) { + Operands[3] = Node->getOperand(3); // mask + Operands[4] = Node->getOperand(4); // evl + } + + SDValue Res = DAG.getNode(Node->getOpcode(), DL, Node->getSimpleValueType(0), + Operands, Node->getFlags()); + + Results.push_back(Res); +} + +void VectorLegalizer::PromoteSTRICT(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { + MVT VecVT = Node->getOperand(1).getSimpleValueType(); + MVT NewVecVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VecVT); + + assert(VecVT.isFloatingPoint()); + + SDLoc DL(Node); + SmallVector<SDValue, 5> Operands(Node->getNumOperands()); + SmallVector<SDValue, 2> Chains; + + for (unsigned j = 1; j != Node->getNumOperands(); ++j) + if (Node->getOperand(j).getValueType().isVector() && + !(ISD::isVPOpcode(Node->getOpcode()) && + ISD::getVPMaskIdx(Node->getOpcode()) == j)) // Skip mask operand. + { + // promote the vector operand. + SDValue Ext = + DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {NewVecVT, MVT::Other}, + {Node->getOperand(0), Node->getOperand(j)}); + Operands[j] = Ext.getValue(0); + Chains.push_back(Ext.getValue(1)); + } else + Operands[j] = Node->getOperand(j); // Skip no vector operand. + + SDVTList VTs = DAG.getVTList(NewVecVT, Node->getValueType(1)); + + Operands[0] = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); + + SDValue Res = + DAG.getNode(Node->getOpcode(), DL, VTs, Operands, Node->getFlags()); + + SDValue Round = + DAG.getNode(ISD::STRICT_FP_ROUND, DL, {VecVT, MVT::Other}, + {Res.getValue(1), Res.getValue(0), + DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)}); + + Results.push_back(Round.getValue(0)); + Results.push_back(Round.getValue(1)); +} + void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) { // For a few operations there is a specific concept for promotion based on // the operand's type. @@ -569,6 +697,36 @@ void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) { // Promote the operation by extending the operand. PromoteFP_TO_INT(Node, Results); return; + case ISD::VP_REDUCE_ADD: + case ISD::VP_REDUCE_MUL: + case ISD::VP_REDUCE_AND: + case ISD::VP_REDUCE_OR: + case ISD::VP_REDUCE_XOR: + case ISD::VP_REDUCE_SMAX: + case ISD::VP_REDUCE_SMIN: + case ISD::VP_REDUCE_UMAX: + case ISD::VP_REDUCE_UMIN: + case ISD::VP_REDUCE_FADD: + case ISD::VP_REDUCE_FMUL: + case ISD::VP_REDUCE_FMAX: + case ISD::VP_REDUCE_FMIN: + case ISD::VP_REDUCE_SEQ_FADD: + // Promote the operation by extending the operand. + PromoteReduction(Node, Results); + return; + case ISD::VP_SETCC: + case ISD::SETCC: + // Promote the operation by extending the operand. + PromoteSETCC(Node, Results); + return; + case ISD::STRICT_FADD: + case ISD::STRICT_FSUB: + case ISD::STRICT_FMUL: + case ISD::STRICT_FDIV: + case ISD::STRICT_FSQRT: + case ISD::STRICT_FMA: + PromoteSTRICT(Node, Results); + return; case ISD::FP_ROUND: case ISD::FP_EXTEND: // These operations are used to do promotion so they can't be promoted @@ -589,7 +747,10 @@ void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) { SmallVector<SDValue, 4> Operands(Node->getNumOperands()); for (unsigned j = 0; j != Node->getNumOperands(); ++j) { - if (Node->getOperand(j).getValueType().isVector()) + // Do not promote the mask operand of a VP OP. + bool SkipPromote = ISD::isVPOpcode(Node->getOpcode()) && + ISD::getVPMaskIdx(Node->getOpcode()) == j; + if (Node->getOperand(j).getValueType().isVector() && !SkipPromote) if (Node->getOperand(j) .getValueType() .getVectorElementType() diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 8c117c1c74dc..66461b26468f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -38,8 +38,8 @@ using namespace llvm; //===----------------------------------------------------------------------===// void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { - LLVM_DEBUG(dbgs() << "Scalarize node result " << ResNo << ": "; N->dump(&DAG); - dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Scalarize node result " << ResNo << ": "; + N->dump(&DAG)); SDValue R = SDValue(); switch (N->getOpcode()) { @@ -88,6 +88,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::FCOS: case ISD::FEXP: case ISD::FEXP2: + case ISD::FEXP10: case ISD::FFLOOR: case ISD::FLOG: case ISD::FLOG10: @@ -100,6 +101,8 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::FRINT: + case ISD::LRINT: + case ISD::LLRINT: case ISD::FROUND: case ISD::FROUNDEVEN: case ISD::FSIN: @@ -656,8 +659,8 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_IS_FPCLASS(SDNode *N) { //===----------------------------------------------------------------------===// bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { - LLVM_DEBUG(dbgs() << "Scalarize node operand " << OpNo << ": "; N->dump(&DAG); - dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Scalarize node operand " << OpNo << ": "; + N->dump(&DAG)); SDValue Res = SDValue(); switch (N->getOpcode()) { @@ -680,6 +683,8 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { case ISD::FP_TO_UINT: case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: + case ISD::LRINT: + case ISD::LLRINT: Res = ScalarizeVecOp_UnaryOp(N); break; case ISD::STRICT_SINT_TO_FP: @@ -965,7 +970,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_VECREDUCE_SEQ(SDNode *N) { /// invalid operands or may have other results that need legalization, we just /// know that (at least) one result needs vector splitting. void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { - LLVM_DEBUG(dbgs() << "Split node result: "; N->dump(&DAG); dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Split node result: "; N->dump(&DAG)); SDValue Lo, Hi; // See if the target wants to custom expand this node. @@ -1075,6 +1080,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FCOS: case ISD::FEXP: case ISD::FEXP2: + case ISD::FEXP10: case ISD::FFLOOR: case ISD::VP_FFLOOR: case ISD::FLOG: @@ -1095,6 +1101,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::VP_FP_TO_UINT: case ISD::FRINT: case ISD::VP_FRINT: + case ISD::LRINT: + case ISD::LLRINT: case ISD::FROUND: case ISD::VP_FROUND: case ISD::FROUNDEVEN: @@ -1201,6 +1209,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::UDIVFIXSAT: SplitVecRes_FIX(N, Lo, Hi); break; + case ISD::EXPERIMENTAL_VP_REVERSE: + SplitVecRes_VP_REVERSE(N, Lo, Hi); + break; } // If Lo/Hi is null, the sub-method took care of registering results etc. @@ -1228,7 +1239,7 @@ void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT, } else { MPI = N->getPointerInfo().getWithOffset(IncrementSize); // Increment the pointer to the other half. - Ptr = DAG.getObjectPtrOffset(DL, Ptr, TypeSize::Fixed(IncrementSize)); + Ptr = DAG.getObjectPtrOffset(DL, Ptr, TypeSize::getFixed(IncrementSize)); } } @@ -2849,6 +2860,56 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SPLICE(SDNode *N, SDValue &Lo, DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL)); } +void DAGTypeLegalizer::SplitVecRes_VP_REVERSE(SDNode *N, SDValue &Lo, + SDValue &Hi) { + EVT VT = N->getValueType(0); + SDValue Val = N->getOperand(0); + SDValue Mask = N->getOperand(1); + SDValue EVL = N->getOperand(2); + SDLoc DL(N); + + // Fallback to VP_STRIDED_STORE to stack followed by VP_LOAD. + Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false); + + EVT MemVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), + VT.getVectorElementCount()); + SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment); + EVT PtrVT = StackPtr.getValueType(); + auto &MF = DAG.getMachineFunction(); + auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); + auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex); + + MachineMemOperand *StoreMMO = DAG.getMachineFunction().getMachineMemOperand( + PtrInfo, MachineMemOperand::MOStore, MemoryLocation::UnknownSize, + Alignment); + MachineMemOperand *LoadMMO = DAG.getMachineFunction().getMachineMemOperand( + PtrInfo, MachineMemOperand::MOLoad, MemoryLocation::UnknownSize, + Alignment); + + unsigned EltWidth = VT.getScalarSizeInBits() / 8; + SDValue NumElemMinus1 = + DAG.getNode(ISD::SUB, DL, PtrVT, DAG.getZExtOrTrunc(EVL, DL, PtrVT), + DAG.getConstant(1, DL, PtrVT)); + SDValue StartOffset = DAG.getNode(ISD::MUL, DL, PtrVT, NumElemMinus1, + DAG.getConstant(EltWidth, DL, PtrVT)); + SDValue StorePtr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, StartOffset); + SDValue Stride = DAG.getConstant(-(int64_t)EltWidth, DL, PtrVT); + + SDValue TrueMask = DAG.getBoolConstant(true, DL, Mask.getValueType(), VT); + SDValue Store = DAG.getStridedStoreVP(DAG.getEntryNode(), DL, Val, StorePtr, + DAG.getUNDEF(PtrVT), Stride, TrueMask, + EVL, MemVT, StoreMMO, ISD::UNINDEXED); + + SDValue Load = DAG.getLoadVP(VT, DL, Store, StackPtr, Mask, EVL, LoadMMO); + + auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VT); + Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, Load, + DAG.getVectorIdxConstant(0, DL)); + Hi = + DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, Load, + DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL)); +} + void DAGTypeLegalizer::SplitVecRes_VECTOR_DEINTERLEAVE(SDNode *N) { SDValue Op0Lo, Op0Hi, Op1Lo, Op1Hi; @@ -2889,7 +2950,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_INTERLEAVE(SDNode *N) { /// the node are known to be legal, but other operands of the node may need /// legalization as well as the specified one. bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { - LLVM_DEBUG(dbgs() << "Split node operand: "; N->dump(&DAG); dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Split node operand: "; N->dump(&DAG)); SDValue Res = SDValue(); // See if the target wants to custom split this node. @@ -2972,6 +3033,8 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: case ISD::FTRUNC: + case ISD::LRINT: + case ISD::LLRINT: Res = SplitVecOp_UnaryOp(N); break; case ISD::FLDEXP: @@ -3973,8 +4036,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_TO_XINT_SAT(SDNode *N) { //===----------------------------------------------------------------------===// void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { - LLVM_DEBUG(dbgs() << "Widen node result " << ResNo << ": "; N->dump(&DAG); - dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Widen node result " << ResNo << ": "; N->dump(&DAG)); // See if the target wants to custom widen this node. if (CustomWidenLowerNode(N, N->getValueType(ResNo))) @@ -4195,11 +4257,17 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_FP_TO_XINT_SAT(N); break; + case ISD::LRINT: + case ISD::LLRINT: + Res = WidenVecRes_XRINT(N); + break; + case ISD::FABS: case ISD::FCEIL: case ISD::FCOS: case ISD::FEXP: case ISD::FEXP2: + case ISD::FEXP10: case ISD::FFLOOR: case ISD::FLOG: case ISD::FLOG10: @@ -4779,6 +4847,27 @@ SDValue DAGTypeLegalizer::WidenVecRes_FP_TO_XINT_SAT(SDNode *N) { return DAG.getNode(N->getOpcode(), dl, WidenVT, Src, N->getOperand(1)); } +SDValue DAGTypeLegalizer::WidenVecRes_XRINT(SDNode *N) { + SDLoc dl(N); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + ElementCount WidenNumElts = WidenVT.getVectorElementCount(); + + SDValue Src = N->getOperand(0); + EVT SrcVT = Src.getValueType(); + + // Also widen the input. + if (getTypeAction(SrcVT) == TargetLowering::TypeWidenVector) { + Src = GetWidenedVector(Src); + SrcVT = Src.getValueType(); + } + + // Input and output not widened to the same size, give up. + if (WidenNumElts != SrcVT.getVectorElementCount()) + return DAG.UnrollVectorOp(N, WidenNumElts.getKnownMinValue()); + + return DAG.getNode(N->getOpcode(), dl, WidenVT, Src); +} + SDValue DAGTypeLegalizer::WidenVecRes_Convert_StrictFP(SDNode *N) { SDValue InOp = N->getOperand(1); SDLoc DL(N); @@ -5919,8 +6008,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_STRICT_FSETCC(SDNode *N) { // Widen Vector Operand //===----------------------------------------------------------------------===// bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { - LLVM_DEBUG(dbgs() << "Widen node operand " << OpNo << ": "; N->dump(&DAG); - dbgs() << "\n"); + LLVM_DEBUG(dbgs() << "Widen node operand " << OpNo << ": "; N->dump(&DAG)); SDValue Res = SDValue(); // See if the target wants to custom widen this node. @@ -5946,6 +6034,11 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { case ISD::EXPERIMENTAL_VP_STRIDED_STORE: Res = WidenVecOp_VP_STRIDED_STORE(N, OpNo); break; + case ISD::ANY_EXTEND_VECTOR_INREG: + case ISD::SIGN_EXTEND_VECTOR_INREG: + case ISD::ZERO_EXTEND_VECTOR_INREG: + Res = WidenVecOp_EXTEND_VECTOR_INREG(N); + break; case ISD::MSTORE: Res = WidenVecOp_MSTORE(N, OpNo); break; case ISD::MGATHER: Res = WidenVecOp_MGATHER(N, OpNo); break; case ISD::MSCATTER: Res = WidenVecOp_MSCATTER(N, OpNo); break; @@ -5955,7 +6048,11 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { case ISD::STRICT_FSETCCS: Res = WidenVecOp_STRICT_FSETCC(N); break; case ISD::VSELECT: Res = WidenVecOp_VSELECT(N); break; case ISD::FLDEXP: - case ISD::FCOPYSIGN: Res = WidenVecOp_UnrollVectorOp(N); break; + case ISD::FCOPYSIGN: + case ISD::LRINT: + case ISD::LLRINT: + Res = WidenVecOp_UnrollVectorOp(N); + break; case ISD::IS_FPCLASS: Res = WidenVecOp_IS_FPCLASS(N); break; case ISD::ANY_EXTEND: @@ -6317,8 +6414,30 @@ SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) { if (getTypeAction(SubVec.getValueType()) == TargetLowering::TypeWidenVector) SubVec = GetWidenedVector(SubVec); - if (SubVec.getValueType().knownBitsLE(VT) && InVec.isUndef() && - N->getConstantOperandVal(2) == 0) + EVT SubVT = SubVec.getValueType(); + + // Whether or not all the elements of the widened SubVec will be inserted into + // valid indices of VT. + bool IndicesValid = false; + // If we statically know that VT can fit SubVT, the indices are valid. + if (VT.knownBitsGE(SubVT)) + IndicesValid = true; + else if (VT.isScalableVector() && SubVT.isFixedLengthVector()) { + // Otherwise, if we're inserting a fixed vector into a scalable vector and + // we know the minimum vscale we can work out if it's valid ourselves. + Attribute Attr = DAG.getMachineFunction().getFunction().getFnAttribute( + Attribute::VScaleRange); + if (Attr.isValid()) { + unsigned VScaleMin = Attr.getVScaleRangeMin(); + if (VT.getSizeInBits().getKnownMinValue() * VScaleMin >= + SubVT.getFixedSizeInBits()) + IndicesValid = true; + } + } + + // We need to make sure that the indices are still valid, otherwise we might + // widen what was previously well-defined to something undefined. + if (IndicesValid && InVec.isUndef() && N->getConstantOperandVal(2) == 0) return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, InVec, SubVec, N->getOperand(2)); @@ -6338,6 +6457,11 @@ SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { N->getValueType(0), InOp, N->getOperand(1)); } +SDValue DAGTypeLegalizer::WidenVecOp_EXTEND_VECTOR_INREG(SDNode *N) { + SDValue InOp = GetWidenedVector(N->getOperand(0)); + return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), InOp); +} + SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { // We have to widen the value, but we want only to store the original // vector type. @@ -6458,7 +6582,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_VP_STRIDED_STORE(SDNode *N, } SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) { - assert((OpNo == 1 || OpNo == 3) && + assert((OpNo == 1 || OpNo == 4) && "Can widen only data or mask operand of mstore"); MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N); SDValue Mask = MST->getMask(); @@ -7083,7 +7207,7 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain, unsigned i = 0, Offset = Increment; for (i=1; i < NumElts; ++i, Offset += Increment) { SDValue NewBasePtr = - DAG.getObjectPtrOffset(dl, BasePtr, TypeSize::Fixed(Offset)); + DAG.getObjectPtrOffset(dl, BasePtr, TypeSize::getFixed(Offset)); Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, LD->getPointerInfo().getWithOffset(Offset), LdEltVT, LD->getOriginalAlign(), MMOFlags, AAInfo); diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 5b01743d23e0..ab4c33c9e976 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -498,12 +498,12 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { unsigned Flags = cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); - unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + const InlineAsm::Flag F(Flags); + unsigned NumVals = F.getNumOperandRegisters(); ++i; // Skip the ID value. - if (InlineAsm::isRegDefKind(Flags) || - InlineAsm::isRegDefEarlyClobberKind(Flags) || - InlineAsm::isClobberKind(Flags)) { + if (F.isRegDefKind() || F.isRegDefEarlyClobberKind() || + F.isClobberKind()) { // Check for def of register or earlyclobber register. for (; NumVals; --NumVals, ++i) { unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); @@ -808,12 +808,12 @@ ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) { // Public Constructor Functions //===----------------------------------------------------------------------===// -llvm::ScheduleDAGSDNodes * -llvm::createFastDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { +llvm::ScheduleDAGSDNodes *llvm::createFastDAGScheduler(SelectionDAGISel *IS, + CodeGenOptLevel) { return new ScheduleDAGFast(*IS->MF); } -llvm::ScheduleDAGSDNodes * -llvm::createDAGLinearizer(SelectionDAGISel *IS, CodeGenOpt::Level) { +llvm::ScheduleDAGSDNodes *llvm::createDAGLinearizer(SelectionDAGISel *IS, + CodeGenOptLevel) { return new ScheduleDAGLinearize(*IS->MF); } diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 458f50c54824..47c137d2bcad 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -188,10 +188,9 @@ private: public: ScheduleDAGRRList(MachineFunction &mf, bool needlatency, SchedulingPriorityQueue *availqueue, - CodeGenOpt::Level OptLevel) - : ScheduleDAGSDNodes(mf), - NeedLatency(needlatency), AvailableQueue(availqueue), - Topo(SUnits, nullptr) { + CodeGenOptLevel OptLevel) + : ScheduleDAGSDNodes(mf), NeedLatency(needlatency), + AvailableQueue(availqueue), Topo(SUnits, nullptr) { const TargetSubtargetInfo &STI = mf.getSubtarget(); if (DisableSchedCycles || !NeedLatency) HazardRec = new ScheduleHazardRecognizer(); @@ -987,11 +986,6 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) { if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) return nullptr; - // unfolding an x86 DEC64m operation results in store, dec, load which - // can't be handled here so quit - if (NewNodes.size() == 3) - return nullptr; - assert(NewNodes.size() == 2 && "Expected a load folding node!"); N = NewNodes[1]; @@ -1377,12 +1371,12 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) { for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { unsigned Flags = cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); - unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + const InlineAsm::Flag F(Flags); + unsigned NumVals = F.getNumOperandRegisters(); ++i; // Skip the ID value. - if (InlineAsm::isRegDefKind(Flags) || - InlineAsm::isRegDefEarlyClobberKind(Flags) || - InlineAsm::isClobberKind(Flags)) { + if (F.isRegDefKind() || F.isRegDefEarlyClobberKind() || + F.isClobberKind()) { // Check for def of register or earlyclobber register. for (; NumVals; --NumVals, ++i) { Register Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); @@ -3150,9 +3144,8 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() { // Public Constructor Functions //===----------------------------------------------------------------------===// -ScheduleDAGSDNodes * -llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, - CodeGenOpt::Level OptLevel) { +ScheduleDAGSDNodes *llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, + CodeGenOptLevel OptLevel) { const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); const TargetInstrInfo *TII = STI.getInstrInfo(); const TargetRegisterInfo *TRI = STI.getRegisterInfo(); @@ -3166,7 +3159,7 @@ llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, ScheduleDAGSDNodes * llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, - CodeGenOpt::Level OptLevel) { + CodeGenOptLevel OptLevel) { const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); const TargetInstrInfo *TII = STI.getInstrInfo(); const TargetRegisterInfo *TRI = STI.getRegisterInfo(); @@ -3180,7 +3173,7 @@ llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, ScheduleDAGSDNodes * llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, - CodeGenOpt::Level OptLevel) { + CodeGenOptLevel OptLevel) { const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); const TargetInstrInfo *TII = STI.getInstrInfo(); const TargetRegisterInfo *TRI = STI.getRegisterInfo(); @@ -3194,9 +3187,8 @@ llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, return SD; } -ScheduleDAGSDNodes * -llvm::createILPListDAGScheduler(SelectionDAGISel *IS, - CodeGenOpt::Level OptLevel) { +ScheduleDAGSDNodes *llvm::createILPListDAGScheduler(SelectionDAGISel *IS, + CodeGenOptLevel OptLevel) { const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); const TargetInstrInfo *TII = STI.getInstrInfo(); const TargetRegisterInfo *TRI = STI.getRegisterInfo(); diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 0579c1664d5c..c9e2745f00c9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -659,18 +659,19 @@ void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use, if (Use->isMachineOpcode()) // Adjust the use operand index by num of defs. OpIdx += TII->get(Use->getMachineOpcode()).getNumDefs(); - int Latency = TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx); - if (Latency > 1 && Use->getOpcode() == ISD::CopyToReg && + std::optional<unsigned> Latency = + TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx); + if (Latency > 1U && Use->getOpcode() == ISD::CopyToReg && !BB->succ_empty()) { unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg(); if (Register::isVirtualRegister(Reg)) // This copy is a liveout value. It is likely coalesced, so reduce the // latency so not to penalize the def. // FIXME: need target specific adjustment here? - Latency = Latency - 1; + Latency = *Latency - 1; } - if (Latency >= 0) - dep.setLatency(Latency); + if (Latency) + dep.setLatency(*Latency); } void ScheduleDAGSDNodes::dumpNode(const SUnit &SU) const { diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp index 1ba1fd65b8c9..ae42a870ea2f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -265,7 +265,7 @@ void ScheduleDAGVLIW::listScheduleTopDown() { //===----------------------------------------------------------------------===// /// createVLIWDAGScheduler - This creates a top-down list scheduler. -ScheduleDAGSDNodes * -llvm::createVLIWDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { +ScheduleDAGSDNodes *llvm::createVLIWDAGScheduler(SelectionDAGISel *IS, + CodeGenOptLevel) { return new ScheduleDAGVLIW(*IS->MF, IS->AA, new ResourcePriorityQueue(IS)); } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 5c1b19eba1c1..5be1892a44f6 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -27,6 +27,7 @@ #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" +#include "llvm/BinaryFormat/Dwarf.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/ISDOpcodes.h" @@ -161,8 +162,13 @@ bool ISD::isConstantSplatVector(const SDNode *N, APInt &SplatVal) { unsigned SplatBitSize; bool HasUndefs; unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits(); + // Endianness does not matter here. We are checking for a splat given the + // element size of the vector, and if we find such a splat for little endian + // layout, then that should be valid also for big endian (as the full vector + // size is known to be a multiple of the element size). + const bool IsBigEndian = false; return BV->isConstantSplat(SplatVal, SplatUndef, SplatBitSize, HasUndefs, - EltSize) && + EltSize, IsBigEndian) && EltSize == SplatBitSize; } @@ -344,12 +350,13 @@ bool ISD::isFreezeUndef(const SDNode *N) { return N->getOpcode() == ISD::FREEZE && N->getOperand(0).isUndef(); } -bool ISD::matchUnaryPredicate(SDValue Op, - std::function<bool(ConstantSDNode *)> Match, - bool AllowUndefs) { +template <typename ConstNodeType> +bool ISD::matchUnaryPredicateImpl(SDValue Op, + std::function<bool(ConstNodeType *)> Match, + bool AllowUndefs) { // FIXME: Add support for scalar UNDEF cases? - if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) - return Match(Cst); + if (auto *C = dyn_cast<ConstNodeType>(Op)) + return Match(C); // FIXME: Add support for vector UNDEF cases? if (ISD::BUILD_VECTOR != Op.getOpcode() && @@ -364,12 +371,17 @@ bool ISD::matchUnaryPredicate(SDValue Op, continue; } - auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(i)); + auto *Cst = dyn_cast<ConstNodeType>(Op.getOperand(i)); if (!Cst || Cst->getValueType(0) != SVT || !Match(Cst)) return false; } return true; } +// Build used template types. +template bool ISD::matchUnaryPredicateImpl<ConstantSDNode>( + SDValue, std::function<bool(ConstantSDNode *)>, bool); +template bool ISD::matchUnaryPredicateImpl<ConstantFPSDNode>( + SDValue, std::function<bool(ConstantFPSDNode *)>, bool); bool ISD::matchBinaryPredicate( SDValue LHS, SDValue RHS, @@ -951,7 +963,7 @@ static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) { /// doNotCSE - Return true if CSE should not be performed for this node. static bool doNotCSE(SDNode *N) { if (N->getValueType(0) == MVT::Glue) - return true; // Never CSE anything that produces a flag. + return true; // Never CSE anything that produces a glue result. switch (N->getOpcode()) { default: break; @@ -963,7 +975,7 @@ static bool doNotCSE(SDNode *N) { // Check that remaining values produced are not flags. for (unsigned i = 1, e = N->getNumValues(); i != e; ++i) if (N->getValueType(i) == MVT::Glue) - return true; // Never CSE anything that produces a flag. + return true; // Never CSE anything that produces a glue result. return false; } @@ -1197,7 +1209,7 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) { } #ifndef NDEBUG // Verify that the node was actually in one of the CSE maps, unless it has a - // flag result (which cannot be CSE'd) or is one of the special cases that are + // glue result (which cannot be CSE'd) or is one of the special cases that are // not subject to CSE. if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Glue && !N->isMachineOpcode() && !doNotCSE(N)) { @@ -1296,17 +1308,16 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, ArrayRef<SDValue> Ops, } Align SelectionDAG::getEVTAlign(EVT VT) const { - Type *Ty = VT == MVT::iPTR ? - PointerType::get(Type::getInt8Ty(*getContext()), 0) : - VT.getTypeForEVT(*getContext()); + Type *Ty = VT == MVT::iPTR ? PointerType::get(*getContext(), 0) + : VT.getTypeForEVT(*getContext()); return getDataLayout().getABITypeAlign(Ty); } // EntryNode could meaningfully have debug info if we can find it... -SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) - : TM(tm), OptLevel(OL), - EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other, MVT::Glue)), +SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOptLevel OL) + : TM(tm), OptLevel(OL), EntryNode(ISD::EntryToken, 0, DebugLoc(), + getVTList(MVT::Other, MVT::Glue)), Root(getEntryNode()) { InsertNode(&EntryNode); DbgInfo = new SDDbgInfo(); @@ -1454,6 +1465,51 @@ SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) { getNode(ISD::TRUNCATE, DL, VT, Op); } +SDValue SelectionDAG::getBitcastedAnyExtOrTrunc(SDValue Op, const SDLoc &DL, + EVT VT) { + assert(!VT.isVector()); + auto Type = Op.getValueType(); + SDValue DestOp; + if (Type == VT) + return Op; + auto Size = Op.getValueSizeInBits(); + DestOp = getBitcast(MVT::getIntegerVT(Size), Op); + if (DestOp.getValueType() == VT) + return DestOp; + + return getAnyExtOrTrunc(DestOp, DL, VT); +} + +SDValue SelectionDAG::getBitcastedSExtOrTrunc(SDValue Op, const SDLoc &DL, + EVT VT) { + assert(!VT.isVector()); + auto Type = Op.getValueType(); + SDValue DestOp; + if (Type == VT) + return Op; + auto Size = Op.getValueSizeInBits(); + DestOp = getBitcast(MVT::getIntegerVT(Size), Op); + if (DestOp.getValueType() == VT) + return DestOp; + + return getSExtOrTrunc(DestOp, DL, VT); +} + +SDValue SelectionDAG::getBitcastedZExtOrTrunc(SDValue Op, const SDLoc &DL, + EVT VT) { + assert(!VT.isVector()); + auto Type = Op.getValueType(); + SDValue DestOp; + if (Type == VT) + return Op; + auto Size = Op.getValueSizeInBits(); + DestOp = getBitcast(MVT::getIntegerVT(Size), Op); + if (DestOp.getValueType() == VT) + return DestOp; + + return getZExtOrTrunc(DestOp, DL, VT); +} + SDValue SelectionDAG::getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT) { if (VT.bitsLE(Op.getValueType())) @@ -1570,7 +1626,11 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL, if (VT.isVector() && TLI->getTypeAction(*getContext(), EltVT) == TargetLowering::TypePromoteInteger) { EltVT = TLI->getTypeToTransformTo(*getContext(), EltVT); - APInt NewVal = Elt->getValue().zextOrTrunc(EltVT.getSizeInBits()); + APInt NewVal; + if (TLI->isSExtCheaperThanZExt(VT.getScalarType(), EltVT)) + NewVal = Elt->getValue().sextOrTrunc(EltVT.getSizeInBits()); + else + NewVal = Elt->getValue().zextOrTrunc(EltVT.getSizeInBits()); Elt = ConstantInt::get(*getContext(), NewVal); } // In other cases the element type is illegal and needs to be expanded, for @@ -1587,7 +1647,8 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL, unsigned ViaEltSizeInBits = ViaEltVT.getSizeInBits(); // For scalable vectors, try to use a SPLAT_VECTOR_PARTS node. - if (VT.isScalableVector()) { + if (VT.isScalableVector() || + TLI->isOperationLegal(ISD::SPLAT_VECTOR, VT)) { assert(EltVT.getSizeInBits() % ViaEltSizeInBits == 0 && "Can only handle an even split!"); unsigned Parts = EltVT.getSizeInBits() / ViaEltSizeInBits; @@ -1801,6 +1862,13 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget, return SDValue(N, 0); } +SDValue SelectionDAG::getJumpTableDebugInfo(int JTI, SDValue Chain, + const SDLoc &DL) { + EVT PTy = getTargetLoweringInfo().getPointerTy(getDataLayout()); + return getNode(ISD::JUMP_TABLE_DEBUG_INFO, DL, MVT::Glue, Chain, + getTargetConstant(static_cast<uint64_t>(JTI), DL, PTy, true)); +} + SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, MaybeAlign Alignment, int Offset, bool isTarget, unsigned TargetFlags) { @@ -1855,23 +1923,6 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, return SDValue(N, 0); } -SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset, - unsigned TargetFlags) { - FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), std::nullopt); - ID.AddInteger(Index); - ID.AddInteger(Offset); - ID.AddInteger(TargetFlags); - void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, IP)) - return SDValue(E, 0); - - auto *N = newSDNode<TargetIndexSDNode>(Index, VT, Offset, TargetFlags); - CSEMap.InsertNode(N, IP); - InsertNode(N); - return SDValue(N, 0); -} - SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) { FoldingSetNodeID ID; AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), std::nullopt); @@ -1945,15 +1996,15 @@ SDValue SelectionDAG::getVScale(const SDLoc &DL, EVT VT, APInt MulImm, assert(MulImm.getBitWidth() == VT.getSizeInBits() && "APInt size does not match type size!"); + if (MulImm == 0) + return getConstant(0, DL, VT); + if (ConstantFold) { const MachineFunction &MF = getMachineFunction(); - auto Attr = MF.getFunction().getFnAttribute(Attribute::VScaleRange); - if (Attr.isValid()) { - unsigned VScaleMin = Attr.getVScaleRangeMin(); - if (std::optional<unsigned> VScaleMax = Attr.getVScaleRangeMax()) - if (*VScaleMax == VScaleMin) - return getConstant(MulImm * VScaleMin, DL, VT); - } + const Function &F = MF.getFunction(); + ConstantRange CR = getVScaleRange(&F, 64); + if (const APInt *C = CR.getSingleElement()) + return getConstant(MulImm * C->getZExtValue(), DL, VT); } return getNode(ISD::VSCALE, DL, VT, getConstant(MulImm, DL, VT)); @@ -2118,11 +2169,8 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, if (Splat && UndefElements.none()) { // Splat of <x, x, ..., x>, return <x, x, ..., x>, provided that the // number of elements match or the value splatted is a zero constant. - if (SameNumElts) + if (SameNumElts || isNullConstant(Splat)) return N1; - if (auto *C = dyn_cast<ConstantSDNode>(Splat)) - if (C->isZero()) - return N1; } // If the shuffle itself creates a splat, build the vector directly. @@ -2487,7 +2535,7 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2, // icmp X, X -> true/false // icmp X, undef -> true/false because undef could be X. - if (N1 == N2) + if (N1.isUndef() || N2.isUndef() || N1 == N2) return getBoolConstant(ISD::isTrueWhenEqual(Cond), dl, VT, OpVT); } @@ -2833,6 +2881,12 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, } } + // Fallback - this is a splat if all demanded elts are the same constant. + if (computeKnownBits(V, DemandedElts, Depth).isConstant()) { + UndefElts = ~DemandedElts; + return true; + } + return false; } @@ -3054,6 +3108,15 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known = computeKnownBits(SrcOp, Depth + 1).trunc(BitWidth); break; } + case ISD::SPLAT_VECTOR_PARTS: { + unsigned ScalarSize = Op.getOperand(0).getScalarValueSizeInBits(); + assert(ScalarSize * Op.getNumOperands() == BitWidth && + "Expected SPLAT_VECTOR_PARTS scalars to cover element width"); + for (auto [I, SrcOp] : enumerate(Op->ops())) { + Known.insertBits(computeKnownBits(SrcOp, Depth + 1), ScalarSize * I); + } + break; + } case ISD::BUILD_VECTOR: assert(!Op.getValueType().isScalableVector()); // Collect the known bits that are shared by every demanded vector element. @@ -3685,14 +3748,19 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, assert(Op.getResNo() == 0 && "We only compute knownbits for the difference here."); - // TODO: Compute influence of the carry operand. - if (Opcode == ISD::USUBO_CARRY || Opcode == ISD::SSUBO_CARRY) - break; + // With USUBO_CARRY and SSUBO_CARRY a borrow bit may be added in. + KnownBits Borrow(1); + if (Opcode == ISD::USUBO_CARRY || Opcode == ISD::SSUBO_CARRY) { + Borrow = computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1); + // Borrow has bit width 1 + Borrow = Borrow.trunc(1); + } else { + Borrow.setAllZero(); + } Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); - Known = KnownBits::computeForAddSub(/* Add */ false, /* NSW */ false, - Known, Known2); + Known = KnownBits::computeForSubBorrow(Known, Known2, Borrow); break; } case ISD::UADDO: @@ -3717,15 +3785,13 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, if (Opcode == ISD::ADDE) // Can't track carry from glue, set carry to unknown. Carry.resetAll(); - else if (Opcode == ISD::UADDO_CARRY || Opcode == ISD::SADDO_CARRY) - // TODO: Compute known bits for the carry operand. Not sure if it is worth - // the trouble (how often will we find a known carry bit). And I haven't - // tested this very much yet, but something like this might work: - // Carry = computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1); - // Carry = Carry.zextOrTrunc(1, false); - Carry.resetAll(); - else + else if (Opcode == ISD::UADDO_CARRY || Opcode == ISD::SADDO_CARRY) { + Carry = computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1); + // Carry has bit width 1 + Carry = Carry.trunc(1); + } else { Carry.setAllZero(); + } Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); @@ -4044,8 +4110,11 @@ SelectionDAG::computeOverflowForSignedSub(SDValue N0, SDValue N1) const { if (ComputeNumSignBits(N0) > 1 && ComputeNumSignBits(N1) > 1) return OFK_Never; - // TODO: Add ConstantRange::signedSubMayOverflow handling. - return OFK_Sometime; + KnownBits N0Known = computeKnownBits(N0); + KnownBits N1Known = computeKnownBits(N1); + ConstantRange N0Range = ConstantRange::fromKnownBits(N0Known, true); + ConstantRange N1Range = ConstantRange::fromKnownBits(N1Known, true); + return mapOverflowResult(N0Range.signedSubMayOverflow(N1Range)); } SelectionDAG::OverflowKind @@ -4054,7 +4123,53 @@ SelectionDAG::computeOverflowForUnsignedSub(SDValue N0, SDValue N1) const { if (isNullConstant(N1)) return OFK_Never; - // TODO: Add ConstantRange::unsignedSubMayOverflow handling. + KnownBits N0Known = computeKnownBits(N0); + KnownBits N1Known = computeKnownBits(N1); + ConstantRange N0Range = ConstantRange::fromKnownBits(N0Known, false); + ConstantRange N1Range = ConstantRange::fromKnownBits(N1Known, false); + return mapOverflowResult(N0Range.unsignedSubMayOverflow(N1Range)); +} + +SelectionDAG::OverflowKind +SelectionDAG::computeOverflowForUnsignedMul(SDValue N0, SDValue N1) const { + // X * 0 and X * 1 never overflow. + if (isNullConstant(N1) || isOneConstant(N1)) + return OFK_Never; + + KnownBits N0Known = computeKnownBits(N0); + KnownBits N1Known = computeKnownBits(N1); + ConstantRange N0Range = ConstantRange::fromKnownBits(N0Known, false); + ConstantRange N1Range = ConstantRange::fromKnownBits(N1Known, false); + return mapOverflowResult(N0Range.unsignedMulMayOverflow(N1Range)); +} + +SelectionDAG::OverflowKind +SelectionDAG::computeOverflowForSignedMul(SDValue N0, SDValue N1) const { + // X * 0 and X * 1 never overflow. + if (isNullConstant(N1) || isOneConstant(N1)) + return OFK_Never; + + // Get the size of the result. + unsigned BitWidth = N0.getScalarValueSizeInBits(); + + // Sum of the sign bits. + unsigned SignBits = ComputeNumSignBits(N0) + ComputeNumSignBits(N1); + + // If we have enough sign bits, then there's no overflow. + if (SignBits > BitWidth + 1) + return OFK_Never; + + if (SignBits == BitWidth + 1) { + // The overflow occurs when the true multiplication of the + // the operands is the minimum negative number. + KnownBits N0Known = computeKnownBits(N0); + KnownBits N1Known = computeKnownBits(N1); + // If one of the operands is non-negative, then there's no + // overflow. + if (N0Known.isNonNegative() || N1Known.isNonNegative()) + return OFK_Never; + } + return OFK_Sometime; } @@ -4066,8 +4181,10 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth) const { unsigned BitWidth = OpVT.getScalarSizeInBits(); // Is the constant a known power of 2? - if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Val)) - return Const->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2(); + if (ISD::matchUnaryPredicate(Val, [BitWidth](ConstantSDNode *C) { + return C->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2(); + })) + return true; // A left-shift of a constant one will have exactly one bit set because // shifting the bit off the end is undefined. @@ -4075,6 +4192,8 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth) const { auto *C = isConstOrConstSplat(Val.getOperand(0)); if (C && C->getAPIntValue() == 1) return true; + return isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1) && + isKnownNeverZero(Val, Depth); } // Similarly, a logical right-shift of a constant sign-bit will have exactly @@ -4083,8 +4202,13 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth) const { auto *C = isConstOrConstSplat(Val.getOperand(0)); if (C && C->getAPIntValue().isSignMask()) return true; + return isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1) && + isKnownNeverZero(Val, Depth); } + if (Val.getOpcode() == ISD::ROTL || Val.getOpcode() == ISD::ROTR) + return isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1); + // Are all operands of a build vector constant powers of two? if (Val.getOpcode() == ISD::BUILD_VECTOR) if (llvm::all_of(Val->ops(), [BitWidth](SDValue E) { @@ -4106,6 +4230,34 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val, unsigned Depth) const { isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1)) return true; + if (Val.getOpcode() == ISD::SMIN || Val.getOpcode() == ISD::SMAX || + Val.getOpcode() == ISD::UMIN || Val.getOpcode() == ISD::UMAX) + return isKnownToBeAPowerOfTwo(Val.getOperand(1), Depth + 1) && + isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1); + + if (Val.getOpcode() == ISD::SELECT || Val.getOpcode() == ISD::VSELECT) + return isKnownToBeAPowerOfTwo(Val.getOperand(2), Depth + 1) && + isKnownToBeAPowerOfTwo(Val.getOperand(1), Depth + 1); + + if (Val.getOpcode() == ISD::AND) { + // Looking for `x & -x` pattern: + // If x == 0: + // x & -x -> 0 + // If x != 0: + // x & -x -> non-zero pow2 + // so if we find the pattern return whether we know `x` is non-zero. + for (unsigned OpIdx = 0; OpIdx < 2; ++OpIdx) { + SDValue NegOp = Val.getOperand(OpIdx); + if (NegOp.getOpcode() == ISD::SUB && + NegOp.getOperand(1) == Val.getOperand(1 - OpIdx) && + isNullOrNullSplat(NegOp.getOperand(0))) + return isKnownNeverZero(Val.getOperand(1 - OpIdx), Depth); + } + } + + if (Val.getOpcode() == ISD::ZERO_EXTEND) + return isKnownToBeAPowerOfTwo(Val.getOperand(0), Depth + 1); + // More could be done here, though the above checks are enough // to handle some common cases. return false; @@ -4866,8 +5018,6 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, unsigned Opcode = Op.getOpcode(); switch (Opcode) { - case ISD::AssertSext: - case ISD::AssertZext: case ISD::FREEZE: case ISD::CONCAT_VECTORS: case ISD::INSERT_SUBVECTOR: @@ -4883,7 +5033,6 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, case ISD::BITREVERSE: case ISD::PARITY: case ISD::SIGN_EXTEND: - case ISD::ZERO_EXTEND: case ISD::TRUNCATE: case ISD::SIGN_EXTEND_INREG: case ISD::SIGN_EXTEND_VECTOR_INREG: @@ -4893,6 +5042,10 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, case ISD::BUILD_PAIR: return false; + // Matches hasPoisonGeneratingFlags(). + case ISD::ZERO_EXTEND: + return ConsiderFlags && Op->getFlags().hasNonNeg(); + case ISD::ADD: case ISD::SUB: case ISD::MUL: @@ -4929,6 +5082,15 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, return true; } +bool SelectionDAG::isADDLike(SDValue Op) const { + unsigned Opcode = Op.getOpcode(); + if (Opcode == ISD::OR) + return haveNoCommonBitsSet(Op.getOperand(0), Op.getOperand(1)); + if (Opcode == ISD::XOR) + return isMinSignedConstant(Op.getOperand(1)); + return false; +} + bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const { if ((Op.getOpcode() != ISD::ADD && Op.getOpcode() != ISD::OR) || !isa<ConstantSDNode>(Op.getOperand(1))) @@ -4974,12 +5136,15 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const case ISD::FCANONICALIZE: case ISD::FEXP: case ISD::FEXP2: + case ISD::FEXP10: case ISD::FTRUNC: case ISD::FFLOOR: case ISD::FCEIL: case ISD::FROUND: case ISD::FROUNDEVEN: case ISD::FRINT: + case ISD::LRINT: + case ISD::LLRINT: case ISD::FNEARBYINT: case ISD::FLDEXP: { if (SNaN) @@ -5109,21 +5274,29 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const { return isKnownNeverZero(Op.getOperand(1), Depth + 1) && isKnownNeverZero(Op.getOperand(2), Depth + 1); - case ISD::SHL: + case ISD::SHL: { if (Op->getFlags().hasNoSignedWrap() || Op->getFlags().hasNoUnsignedWrap()) return isKnownNeverZero(Op.getOperand(0), Depth + 1); - - // 1 << X is never zero. TODO: This can be expanded if we can bound X. - // The expression is really !Known.One[BitWidth-MaxLog2(Known):0].isZero() - if (computeKnownBits(Op.getOperand(0), Depth + 1).One[0]) + KnownBits ValKnown = computeKnownBits(Op.getOperand(0), Depth + 1); + // 1 << X is never zero. + if (ValKnown.One[0]) + return true; + // If max shift cnt of known ones is non-zero, result is non-zero. + APInt MaxCnt = computeKnownBits(Op.getOperand(1), Depth + 1).getMaxValue(); + if (MaxCnt.ult(ValKnown.getBitWidth()) && + !ValKnown.One.shl(MaxCnt).isZero()) return true; break; - + } case ISD::UADDSAT: case ISD::UMAX: return isKnownNeverZero(Op.getOperand(1), Depth + 1) || isKnownNeverZero(Op.getOperand(0), Depth + 1); + // TODO for smin/smax: If either operand is known negative/positive + // respectively we don't need the other to be known at all. + case ISD::SMAX: + case ISD::SMIN: case ISD::UMIN: return isKnownNeverZero(Op.getOperand(1), Depth + 1) && isKnownNeverZero(Op.getOperand(0), Depth + 1); @@ -5137,16 +5310,19 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const { return isKnownNeverZero(Op.getOperand(0), Depth + 1); case ISD::SRA: - case ISD::SRL: + case ISD::SRL: { if (Op->getFlags().hasExact()) return isKnownNeverZero(Op.getOperand(0), Depth + 1); - // Signed >> X is never zero. TODO: This can be expanded if we can bound X. - // The expression is really - // !Known.One[SignBit:SignBit-(BitWidth-MaxLog2(Known))].isZero() - if (computeKnownBits(Op.getOperand(0), Depth + 1).isNegative()) + KnownBits ValKnown = computeKnownBits(Op.getOperand(0), Depth + 1); + if (ValKnown.isNegative()) + return true; + // If max shift cnt of known ones is non-zero, result is non-zero. + APInt MaxCnt = computeKnownBits(Op.getOperand(1), Depth + 1).getMaxValue(); + if (MaxCnt.ult(ValKnown.getBitWidth()) && + !ValKnown.One.lshr(MaxCnt).isZero()) return true; break; - + } case ISD::UDIV: case ISD::SDIV: // div exact can only produce a zero if the dividend is zero. @@ -5422,161 +5598,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDValue N1, const SDNodeFlags Flags) { assert(N1.getOpcode() != ISD::DELETED_NODE && "Operand is DELETED_NODE!"); - // Constant fold unary operations with an integer constant operand. Even - // opaque constant will be folded, because the folding of unary operations - // doesn't create new constants with different values. Nevertheless, the - // opaque flag is preserved during folding to prevent future folding with - // other constants. - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { - const APInt &Val = C->getAPIntValue(); - switch (Opcode) { - default: break; - case ISD::SIGN_EXTEND: - return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT, - C->isTargetOpcode(), C->isOpaque()); - case ISD::TRUNCATE: - if (C->isOpaque()) - break; - [[fallthrough]]; - case ISD::ZERO_EXTEND: - return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT, - C->isTargetOpcode(), C->isOpaque()); - case ISD::ANY_EXTEND: - // Some targets like RISCV prefer to sign extend some types. - if (TLI->isSExtCheaperThanZExt(N1.getValueType(), VT)) - return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT, - C->isTargetOpcode(), C->isOpaque()); - return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT, - C->isTargetOpcode(), C->isOpaque()); - case ISD::UINT_TO_FP: - case ISD::SINT_TO_FP: { - APFloat apf(EVTToAPFloatSemantics(VT), - APInt::getZero(VT.getSizeInBits())); - (void)apf.convertFromAPInt(Val, - Opcode==ISD::SINT_TO_FP, - APFloat::rmNearestTiesToEven); - return getConstantFP(apf, DL, VT); - } - case ISD::BITCAST: - if (VT == MVT::f16 && C->getValueType(0) == MVT::i16) - return getConstantFP(APFloat(APFloat::IEEEhalf(), Val), DL, VT); - if (VT == MVT::f32 && C->getValueType(0) == MVT::i32) - return getConstantFP(APFloat(APFloat::IEEEsingle(), Val), DL, VT); - if (VT == MVT::f64 && C->getValueType(0) == MVT::i64) - return getConstantFP(APFloat(APFloat::IEEEdouble(), Val), DL, VT); - if (VT == MVT::f128 && C->getValueType(0) == MVT::i128) - return getConstantFP(APFloat(APFloat::IEEEquad(), Val), DL, VT); - break; - case ISD::ABS: - return getConstant(Val.abs(), DL, VT, C->isTargetOpcode(), - C->isOpaque()); - case ISD::BITREVERSE: - return getConstant(Val.reverseBits(), DL, VT, C->isTargetOpcode(), - C->isOpaque()); - case ISD::BSWAP: - return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(), - C->isOpaque()); - case ISD::CTPOP: - return getConstant(Val.popcount(), DL, VT, C->isTargetOpcode(), - C->isOpaque()); - case ISD::CTLZ: - case ISD::CTLZ_ZERO_UNDEF: - return getConstant(Val.countl_zero(), DL, VT, C->isTargetOpcode(), - C->isOpaque()); - case ISD::CTTZ: - case ISD::CTTZ_ZERO_UNDEF: - return getConstant(Val.countr_zero(), DL, VT, C->isTargetOpcode(), - C->isOpaque()); - case ISD::FP16_TO_FP: - case ISD::BF16_TO_FP: { - bool Ignored; - APFloat FPV(Opcode == ISD::FP16_TO_FP ? APFloat::IEEEhalf() - : APFloat::BFloat(), - (Val.getBitWidth() == 16) ? Val : Val.trunc(16)); - - // This can return overflow, underflow, or inexact; we don't care. - // FIXME need to be more flexible about rounding mode. - (void)FPV.convert(EVTToAPFloatSemantics(VT), - APFloat::rmNearestTiesToEven, &Ignored); - return getConstantFP(FPV, DL, VT); - } - case ISD::STEP_VECTOR: { - if (SDValue V = FoldSTEP_VECTOR(DL, VT, N1, *this)) - return V; - break; - } - } - } - - // Constant fold unary operations with a floating point constant operand. - if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N1)) { - APFloat V = C->getValueAPF(); // make copy - switch (Opcode) { - case ISD::FNEG: - V.changeSign(); - return getConstantFP(V, DL, VT); - case ISD::FABS: - V.clearSign(); - return getConstantFP(V, DL, VT); - case ISD::FCEIL: { - APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive); - if (fs == APFloat::opOK || fs == APFloat::opInexact) - return getConstantFP(V, DL, VT); - break; - } - case ISD::FTRUNC: { - APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero); - if (fs == APFloat::opOK || fs == APFloat::opInexact) - return getConstantFP(V, DL, VT); - break; - } - case ISD::FFLOOR: { - APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative); - if (fs == APFloat::opOK || fs == APFloat::opInexact) - return getConstantFP(V, DL, VT); - break; - } - case ISD::FP_EXTEND: { - bool ignored; - // This can return overflow, underflow, or inexact; we don't care. - // FIXME need to be more flexible about rounding mode. - (void)V.convert(EVTToAPFloatSemantics(VT), - APFloat::rmNearestTiesToEven, &ignored); - return getConstantFP(V, DL, VT); - } - case ISD::FP_TO_SINT: - case ISD::FP_TO_UINT: { - bool ignored; - APSInt IntVal(VT.getSizeInBits(), Opcode == ISD::FP_TO_UINT); - // FIXME need to be more flexible about rounding mode. - APFloat::opStatus s = - V.convertToInteger(IntVal, APFloat::rmTowardZero, &ignored); - if (s == APFloat::opInvalidOp) // inexact is OK, in fact usual - break; - return getConstant(IntVal, DL, VT); - } - case ISD::BITCAST: - if (VT == MVT::i16 && C->getValueType(0) == MVT::f16) - return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), DL, VT); - if (VT == MVT::i16 && C->getValueType(0) == MVT::bf16) - return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), DL, VT); - if (VT == MVT::i32 && C->getValueType(0) == MVT::f32) - return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), DL, VT); - if (VT == MVT::i64 && C->getValueType(0) == MVT::f64) - return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT); - break; - case ISD::FP_TO_FP16: - case ISD::FP_TO_BF16: { - bool Ignored; - // This can return overflow, underflow, or inexact; we don't care. - // FIXME need to be more flexible about rounding mode. - (void)V.convert(Opcode == ISD::FP_TO_FP16 ? APFloat::IEEEhalf() - : APFloat::BFloat(), - APFloat::rmNearestTiesToEven, &Ignored); - return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT); - } - } - } // Constant fold unary operations with a vector integer or float operand. switch (Opcode) { @@ -5592,12 +5613,17 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::FP_EXTEND: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: + case ISD::FP_TO_FP16: + case ISD::FP_TO_BF16: case ISD::TRUNCATE: case ISD::ANY_EXTEND: case ISD::ZERO_EXTEND: case ISD::SIGN_EXTEND: case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: + case ISD::FP16_TO_FP: + case ISD::BF16_TO_FP: + case ISD::BITCAST: case ISD::ABS: case ISD::BITREVERSE: case ISD::BSWAP: @@ -5605,7 +5631,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::CTLZ_ZERO_UNDEF: case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: - case ISD::CTPOP: { + case ISD::CTPOP: + case ISD::STEP_VECTOR: { SDValue Ops = {N1}; if (SDValue Fold = FoldConstantArithmetic(Opcode, DL, VT, Ops)) return Fold; @@ -5694,6 +5721,24 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (OpOpcode == ISD::UNDEF) // zext(undef) = 0, because the top bits will be zero. return getConstant(0, DL, VT); + + // Skip unnecessary zext_inreg pattern: + // (zext (trunc x)) -> x iff the upper bits are known zero. + // TODO: Remove (zext (trunc (and x, c))) exception which some targets + // use to recognise zext_inreg patterns. + if (OpOpcode == ISD::TRUNCATE) { + SDValue OpOp = N1.getOperand(0); + if (OpOp.getValueType() == VT) { + if (OpOp.getOpcode() != ISD::AND) { + APInt HiBits = APInt::getBitsSetFrom(VT.getScalarSizeInBits(), + N1.getScalarValueSizeInBits()); + if (MaskedValueIsZero(OpOp, HiBits)) { + transferDbgValues(N1, OpOp); + return OpOp; + } + } + } + } break; case ISD::ANY_EXTEND: assert(VT.isInteger() && N1.getValueType().isInteger() && @@ -5850,7 +5895,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, SDNode *N; SDVTList VTs = getVTList(VT); SDValue Ops[] = {N1}; - if (VT != MVT::Glue) { // Don't CSE flag producing nodes + if (VT != MVT::Glue) { // Don't CSE glue producing nodes FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTs, Ops); void *IP = nullptr; @@ -6037,9 +6082,174 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, if (isUndef(Opcode, Ops)) return getUNDEF(VT); + // Handle unary special cases. + if (NumOps == 1) { + SDValue N1 = Ops[0]; + + // Constant fold unary operations with an integer constant operand. Even + // opaque constant will be folded, because the folding of unary operations + // doesn't create new constants with different values. Nevertheless, the + // opaque flag is preserved during folding to prevent future folding with + // other constants. + if (auto *C = dyn_cast<ConstantSDNode>(N1)) { + const APInt &Val = C->getAPIntValue(); + switch (Opcode) { + case ISD::SIGN_EXTEND: + return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT, + C->isTargetOpcode(), C->isOpaque()); + case ISD::TRUNCATE: + if (C->isOpaque()) + break; + [[fallthrough]]; + case ISD::ZERO_EXTEND: + return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT, + C->isTargetOpcode(), C->isOpaque()); + case ISD::ANY_EXTEND: + // Some targets like RISCV prefer to sign extend some types. + if (TLI->isSExtCheaperThanZExt(N1.getValueType(), VT)) + return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT, + C->isTargetOpcode(), C->isOpaque()); + return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT, + C->isTargetOpcode(), C->isOpaque()); + case ISD::ABS: + return getConstant(Val.abs(), DL, VT, C->isTargetOpcode(), + C->isOpaque()); + case ISD::BITREVERSE: + return getConstant(Val.reverseBits(), DL, VT, C->isTargetOpcode(), + C->isOpaque()); + case ISD::BSWAP: + return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(), + C->isOpaque()); + case ISD::CTPOP: + return getConstant(Val.popcount(), DL, VT, C->isTargetOpcode(), + C->isOpaque()); + case ISD::CTLZ: + case ISD::CTLZ_ZERO_UNDEF: + return getConstant(Val.countl_zero(), DL, VT, C->isTargetOpcode(), + C->isOpaque()); + case ISD::CTTZ: + case ISD::CTTZ_ZERO_UNDEF: + return getConstant(Val.countr_zero(), DL, VT, C->isTargetOpcode(), + C->isOpaque()); + case ISD::UINT_TO_FP: + case ISD::SINT_TO_FP: { + APFloat apf(EVTToAPFloatSemantics(VT), + APInt::getZero(VT.getSizeInBits())); + (void)apf.convertFromAPInt(Val, Opcode == ISD::SINT_TO_FP, + APFloat::rmNearestTiesToEven); + return getConstantFP(apf, DL, VT); + } + case ISD::FP16_TO_FP: + case ISD::BF16_TO_FP: { + bool Ignored; + APFloat FPV(Opcode == ISD::FP16_TO_FP ? APFloat::IEEEhalf() + : APFloat::BFloat(), + (Val.getBitWidth() == 16) ? Val : Val.trunc(16)); + + // This can return overflow, underflow, or inexact; we don't care. + // FIXME need to be more flexible about rounding mode. + (void)FPV.convert(EVTToAPFloatSemantics(VT), + APFloat::rmNearestTiesToEven, &Ignored); + return getConstantFP(FPV, DL, VT); + } + case ISD::STEP_VECTOR: + if (SDValue V = FoldSTEP_VECTOR(DL, VT, N1, *this)) + return V; + break; + case ISD::BITCAST: + if (VT == MVT::f16 && C->getValueType(0) == MVT::i16) + return getConstantFP(APFloat(APFloat::IEEEhalf(), Val), DL, VT); + if (VT == MVT::f32 && C->getValueType(0) == MVT::i32) + return getConstantFP(APFloat(APFloat::IEEEsingle(), Val), DL, VT); + if (VT == MVT::f64 && C->getValueType(0) == MVT::i64) + return getConstantFP(APFloat(APFloat::IEEEdouble(), Val), DL, VT); + if (VT == MVT::f128 && C->getValueType(0) == MVT::i128) + return getConstantFP(APFloat(APFloat::IEEEquad(), Val), DL, VT); + break; + } + } + + // Constant fold unary operations with a floating point constant operand. + if (auto *C = dyn_cast<ConstantFPSDNode>(N1)) { + APFloat V = C->getValueAPF(); // make copy + switch (Opcode) { + case ISD::FNEG: + V.changeSign(); + return getConstantFP(V, DL, VT); + case ISD::FABS: + V.clearSign(); + return getConstantFP(V, DL, VT); + case ISD::FCEIL: { + APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive); + if (fs == APFloat::opOK || fs == APFloat::opInexact) + return getConstantFP(V, DL, VT); + return SDValue(); + } + case ISD::FTRUNC: { + APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero); + if (fs == APFloat::opOK || fs == APFloat::opInexact) + return getConstantFP(V, DL, VT); + return SDValue(); + } + case ISD::FFLOOR: { + APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative); + if (fs == APFloat::opOK || fs == APFloat::opInexact) + return getConstantFP(V, DL, VT); + return SDValue(); + } + case ISD::FP_EXTEND: { + bool ignored; + // This can return overflow, underflow, or inexact; we don't care. + // FIXME need to be more flexible about rounding mode. + (void)V.convert(EVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven, + &ignored); + return getConstantFP(V, DL, VT); + } + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: { + bool ignored; + APSInt IntVal(VT.getSizeInBits(), Opcode == ISD::FP_TO_UINT); + // FIXME need to be more flexible about rounding mode. + APFloat::opStatus s = + V.convertToInteger(IntVal, APFloat::rmTowardZero, &ignored); + if (s == APFloat::opInvalidOp) // inexact is OK, in fact usual + break; + return getConstant(IntVal, DL, VT); + } + case ISD::FP_TO_FP16: + case ISD::FP_TO_BF16: { + bool Ignored; + // This can return overflow, underflow, or inexact; we don't care. + // FIXME need to be more flexible about rounding mode. + (void)V.convert(Opcode == ISD::FP_TO_FP16 ? APFloat::IEEEhalf() + : APFloat::BFloat(), + APFloat::rmNearestTiesToEven, &Ignored); + return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT); + } + case ISD::BITCAST: + if (VT == MVT::i16 && C->getValueType(0) == MVT::f16) + return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), DL, + VT); + if (VT == MVT::i16 && C->getValueType(0) == MVT::bf16) + return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), DL, + VT); + if (VT == MVT::i32 && C->getValueType(0) == MVT::f32) + return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), DL, + VT); + if (VT == MVT::i64 && C->getValueType(0) == MVT::f64) + return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT); + break; + } + } + + // Early-out if we failed to constant fold a bitcast. + if (Opcode == ISD::BITCAST) + return SDValue(); + } + // Handle binops special cases. if (NumOps == 2) { - if (SDValue CFP = foldConstantFPMath(Opcode, DL, VT, Ops[0], Ops[1])) + if (SDValue CFP = foldConstantFPMath(Opcode, DL, VT, Ops)) return CFP; if (auto *C1 = dyn_cast<ConstantSDNode>(Ops[0])) { @@ -6232,11 +6442,17 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, } SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL, - EVT VT, SDValue N1, SDValue N2) { + EVT VT, ArrayRef<SDValue> Ops) { + // TODO: Add support for unary/ternary fp opcodes. + if (Ops.size() != 2) + return SDValue(); + // TODO: We don't do any constant folding for strict FP opcodes here, but we // should. That will require dealing with a potentially non-default // rounding mode, checking the "opStatus" return value from the APFloat // math calculations, and possibly other variations. + SDValue N1 = Ops[0]; + SDValue N2 = Ops[1]; ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, /*AllowUndefs*/ false); ConstantFPSDNode *N2CFP = isConstOrConstSplatFP(N2, /*AllowUndefs*/ false); if (N1CFP && N2CFP) { @@ -6597,6 +6813,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, } return getBuildVector(VT, DL, Ops); } + + if (N1.getOpcode() == ISD::SPLAT_VECTOR && + isa<ConstantSDNode>(N1.getOperand(0))) + return getNode( + ISD::SPLAT_VECTOR, DL, VT, + SignExtendInReg(N1.getConstantOperandAPInt(0), + N1.getOperand(0).getValueType())); break; } case ISD::FP_TO_SINT_SAT: @@ -6865,7 +7088,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, "Operand is DELETED_NODE!"); // Perform various simplifications. switch (Opcode) { - case ISD::FMA: { + case ISD::FMA: + case ISD::FMAD: { assert(VT.isFloatingPoint() && "This operator only applies to FP types!"); assert(N1.getValueType() == VT && N2.getValueType() == VT && N3.getValueType() == VT && "FMA types must match!"); @@ -6876,7 +7100,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, APFloat V1 = N1CFP->getValueAPF(); const APFloat &V2 = N2CFP->getValueAPF(); const APFloat &V3 = N3CFP->getValueAPF(); - V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven); + if (Opcode == ISD::FMAD) { + V1.multiply(V2, APFloat::rmNearestTiesToEven); + V1.add(V3, APFloat::rmNearestTiesToEven); + } else + V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven); return getConstantFP(V1, DL, VT); } break; @@ -6998,7 +7226,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, break; } - // Memoize node if it doesn't produce a flag. + // Memoize node if it doesn't produce a glue result. SDNode *N; SDVTList VTs = getVTList(VT); SDValue Ops[] = {N1, N2, N3}; @@ -7339,7 +7567,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, if (Value.getNode()) { Store = DAG.getStore( Chain, dl, Value, - DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl), + DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl), DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags, NewAAInfo); OutChains.push_back(Store); } @@ -7364,14 +7592,14 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, Value = DAG.getExtLoad( ISD::EXTLOAD, dl, NVT, Chain, - DAG.getMemBasePlusOffset(Src, TypeSize::Fixed(SrcOff), dl), + DAG.getMemBasePlusOffset(Src, TypeSize::getFixed(SrcOff), dl), SrcPtrInfo.getWithOffset(SrcOff), VT, commonAlignment(*SrcAlign, SrcOff), SrcMMOFlags, NewAAInfo); OutLoadChains.push_back(Value.getValue(1)); Store = DAG.getTruncStore( Chain, dl, Value, - DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl), + DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl), DstPtrInfo.getWithOffset(DstOff), VT, Alignment, MMOFlags, NewAAInfo); OutStoreChains.push_back(Store); } @@ -7508,7 +7736,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, Value = DAG.getLoad( VT, dl, Chain, - DAG.getMemBasePlusOffset(Src, TypeSize::Fixed(SrcOff), dl), + DAG.getMemBasePlusOffset(Src, TypeSize::getFixed(SrcOff), dl), SrcPtrInfo.getWithOffset(SrcOff), *SrcAlign, SrcMMOFlags, NewAAInfo); LoadValues.push_back(Value); LoadChains.push_back(Value.getValue(1)); @@ -7523,7 +7751,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, Store = DAG.getStore( Chain, dl, LoadValues[i], - DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl), + DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl), DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags, NewAAInfo); OutChains.push_back(Store); DstOff += VTSize; @@ -7628,19 +7856,34 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, } // If this store is smaller than the largest store see whether we can get - // the smaller value for free with a truncate. + // the smaller value for free with a truncate or extract vector element and + // then store. SDValue Value = MemSetValue; if (VT.bitsLT(LargestVT)) { + unsigned Index; + unsigned NElts = LargestVT.getSizeInBits() / VT.getSizeInBits(); + EVT SVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), NElts); if (!LargestVT.isVector() && !VT.isVector() && TLI.isTruncateFree(LargestVT, VT)) Value = DAG.getNode(ISD::TRUNCATE, dl, VT, MemSetValue); - else + else if (LargestVT.isVector() && !VT.isVector() && + TLI.shallExtractConstSplatVectorElementToStore( + LargestVT.getTypeForEVT(*DAG.getContext()), + VT.getSizeInBits(), Index) && + TLI.isTypeLegal(SVT) && + LargestVT.getSizeInBits() == SVT.getSizeInBits()) { + // Target which can combine store(extractelement VectorTy, Idx) can get + // the smaller value for free. + SDValue TailValue = DAG.getNode(ISD::BITCAST, dl, SVT, MemSetValue); + Value = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, TailValue, + DAG.getVectorIdxConstant(Index, dl)); + } else Value = getMemsetValue(Src, VT, DAG, dl); } assert(Value.getValueType() == VT && "Value with wrong type."); SDValue Store = DAG.getStore( Chain, dl, Value, - DAG.getMemBasePlusOffset(Dst, TypeSize::Fixed(DstOff), dl), + DAG.getMemBasePlusOffset(Dst, TypeSize::getFixed(DstOff), dl), DstPtrInfo.getWithOffset(DstOff), Alignment, isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone, NewAAInfo); @@ -7714,7 +7957,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, // Emit a library call. TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - Entry.Ty = Type::getInt8PtrTy(*getContext()); + Entry.Ty = PointerType::getUnqual(*getContext()); Entry.Node = Dst; Args.push_back(Entry); Entry.Node = Src; Args.push_back(Entry); @@ -7816,7 +8059,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, // Emit a library call. TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - Entry.Ty = Type::getInt8PtrTy(*getContext()); + Entry.Ty = PointerType::getUnqual(*getContext()); Entry.Node = Dst; Args.push_back(Entry); Entry.Node = Src; Args.push_back(Entry); @@ -7930,8 +8173,6 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, // FIXME: pass in SDLoc CLI.setDebugLoc(dl).setChain(Chain); - ConstantSDNode *ConstantSrc = dyn_cast<ConstantSDNode>(Src); - const bool SrcIsZero = ConstantSrc && ConstantSrc->isZero(); const char *BzeroName = getTargetLoweringInfo().getLibcallName(RTLIB::BZERO); // Helper function to create an Entry from Node and Type. @@ -7943,16 +8184,16 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, }; // If zeroing out and bzero is present, use it. - if (SrcIsZero && BzeroName) { + if (isNullConstant(Src) && BzeroName) { TargetLowering::ArgListTy Args; - Args.push_back(CreateEntry(Dst, Type::getInt8PtrTy(Ctx))); + Args.push_back(CreateEntry(Dst, PointerType::getUnqual(Ctx))); Args.push_back(CreateEntry(Size, DL.getIntPtrType(Ctx))); CLI.setLibCallee( TLI->getLibcallCallingConv(RTLIB::BZERO), Type::getVoidTy(Ctx), getExternalSymbol(BzeroName, TLI->getPointerTy(DL)), std::move(Args)); } else { TargetLowering::ArgListTy Args; - Args.push_back(CreateEntry(Dst, Type::getInt8PtrTy(Ctx))); + Args.push_back(CreateEntry(Dst, PointerType::getUnqual(Ctx))); Args.push_back(CreateEntry(Src, Src.getValueType().getTypeForEVT(Ctx))); Args.push_back(CreateEntry(Size, DL.getIntPtrType(Ctx))); CLI.setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET), @@ -8124,7 +8365,7 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, (int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) && "Opcode is not a memory-accessing opcode!"); - // Memoize the node unless it returns a flag. + // Memoize the node unless it returns a glue result. MemIntrinsicSDNode *N; if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) { FoldingSetNodeID ID; @@ -9642,6 +9883,27 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, SDValue ZeroOverFlow = getConstant(0, DL, VTList.VTs[1]); return getNode(ISD::MERGE_VALUES, DL, VTList, {N1, ZeroOverFlow}, Flags); } + + if (VTList.VTs[0].isVector() && + VTList.VTs[0].getVectorElementType() == MVT::i1 && + VTList.VTs[1].getVectorElementType() == MVT::i1) { + SDValue F1 = getFreeze(N1); + SDValue F2 = getFreeze(N2); + // {vXi1,vXi1} (u/s)addo(vXi1 x, vXi1y) -> {xor(x,y),and(x,y)} + if (Opcode == ISD::UADDO || Opcode == ISD::SADDO) + return getNode(ISD::MERGE_VALUES, DL, VTList, + {getNode(ISD::XOR, DL, VTList.VTs[0], F1, F2), + getNode(ISD::AND, DL, VTList.VTs[1], F1, F2)}, + Flags); + // {vXi1,vXi1} (u/s)subo(vXi1 x, vXi1y) -> {xor(x,y),and(~x,y)} + if (Opcode == ISD::USUBO || Opcode == ISD::SSUBO) { + SDValue NotF1 = getNOT(DL, F1, VTList.VTs[0]); + return getNode(ISD::MERGE_VALUES, DL, VTList, + {getNode(ISD::XOR, DL, VTList.VTs[0], F1, F2), + getNode(ISD::AND, DL, VTList.VTs[1], NotF1, F2)}, + Flags); + } + } break; } case ISD::SMUL_LOHI: @@ -9651,6 +9913,28 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, VTList.VTs[0] == Ops[0].getValueType() && VTList.VTs[0] == Ops[1].getValueType() && "Binary operator types must match!"); + // Constant fold. + ConstantSDNode *LHS = dyn_cast<ConstantSDNode>(Ops[0]); + ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ops[1]); + if (LHS && RHS) { + unsigned Width = VTList.VTs[0].getScalarSizeInBits(); + unsigned OutWidth = Width * 2; + APInt Val = LHS->getAPIntValue(); + APInt Mul = RHS->getAPIntValue(); + if (Opcode == ISD::SMUL_LOHI) { + Val = Val.sext(OutWidth); + Mul = Mul.sext(OutWidth); + } else { + Val = Val.zext(OutWidth); + Mul = Mul.zext(OutWidth); + } + Val *= Mul; + + SDValue Hi = + getConstant(Val.extractBits(Width, Width), DL, VTList.VTs[0]); + SDValue Lo = getConstant(Val.trunc(Width), DL, VTList.VTs[0]); + return getNode(ISD::MERGE_VALUES, DL, VTList, {Lo, Hi}, Flags); + } break; } case ISD::FFREXP: { @@ -9724,7 +10008,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, #endif } - // Memoize the node unless it returns a flag. + // Memoize the node unless it returns a glue result. SDNode *N; if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) { FoldingSetNodeID ID; @@ -10097,7 +10381,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, /// For IROrder, we keep the smaller of the two SDNode *SelectionDAG::UpdateSDLocOnMergeSDNode(SDNode *N, const SDLoc &OLoc) { DebugLoc NLoc = N->getDebugLoc(); - if (NLoc && OptLevel == CodeGenOpt::None && OLoc.getDebugLoc() != NLoc) { + if (NLoc && OptLevel == CodeGenOptLevel::None && OLoc.getDebugLoc() != NLoc) { N->setDebugLoc(DebugLoc()); } unsigned Order = std::min(N->getIROrder(), OLoc.getIROrder()); @@ -10566,11 +10850,18 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) { switch (N.getOpcode()) { default: break; - case ISD::ADD: + case ISD::ADD: { SDValue N0 = N.getOperand(0); SDValue N1 = N.getOperand(1); - if (!isa<ConstantSDNode>(N0) && isa<ConstantSDNode>(N1)) { - uint64_t Offset = N.getConstantOperandVal(1); + if (!isa<ConstantSDNode>(N0)) { + bool RHSConstant = isa<ConstantSDNode>(N1); + uint64_t Offset; + if (RHSConstant) + Offset = N.getConstantOperandVal(1); + // We are not allowed to turn indirect debug values variadic, so + // don't salvage those. + if (!RHSConstant && DV->isIndirect()) + continue; // Rewrite an ADD constant node into a DIExpression. Since we are // performing arithmetic to compute the variable's *value* in the @@ -10579,7 +10870,8 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) { auto *DIExpr = DV->getExpression(); auto NewLocOps = DV->copyLocationOps(); bool Changed = false; - for (size_t i = 0; i < NewLocOps.size(); ++i) { + size_t OrigLocOpsSize = NewLocOps.size(); + for (size_t i = 0; i < OrigLocOpsSize; ++i) { // We're not given a ResNo to compare against because the whole // node is going away. We know that any ISD::ADD only has one // result, so we can assume any node match is using the result. @@ -10587,19 +10879,37 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) { NewLocOps[i].getSDNode() != &N) continue; NewLocOps[i] = SDDbgOperand::fromNode(N0.getNode(), N0.getResNo()); - SmallVector<uint64_t, 3> ExprOps; - DIExpression::appendOffset(ExprOps, Offset); - DIExpr = DIExpression::appendOpsToArg(DIExpr, ExprOps, i, true); + if (RHSConstant) { + SmallVector<uint64_t, 3> ExprOps; + DIExpression::appendOffset(ExprOps, Offset); + DIExpr = DIExpression::appendOpsToArg(DIExpr, ExprOps, i, true); + } else { + // Convert to a variadic expression (if not already). + // convertToVariadicExpression() returns a const pointer, so we use + // a temporary const variable here. + const auto *TmpDIExpr = + DIExpression::convertToVariadicExpression(DIExpr); + SmallVector<uint64_t, 3> ExprOps; + ExprOps.push_back(dwarf::DW_OP_LLVM_arg); + ExprOps.push_back(NewLocOps.size()); + ExprOps.push_back(dwarf::DW_OP_plus); + SDDbgOperand RHS = + SDDbgOperand::fromNode(N1.getNode(), N1.getResNo()); + NewLocOps.push_back(RHS); + DIExpr = DIExpression::appendOpsToArg(TmpDIExpr, ExprOps, i, true); + } Changed = true; } (void)Changed; assert(Changed && "Salvage target doesn't use N"); + bool IsVariadic = + DV->isVariadic() || OrigLocOpsSize != NewLocOps.size(); + auto AdditionalDependencies = DV->getAdditionalDependencies(); - SDDbgValue *Clone = getDbgValueList(DV->getVariable(), DIExpr, - NewLocOps, AdditionalDependencies, - DV->isIndirect(), DV->getDebugLoc(), - DV->getOrder(), DV->isVariadic()); + SDDbgValue *Clone = getDbgValueList( + DV->getVariable(), DIExpr, NewLocOps, AdditionalDependencies, + DV->isIndirect(), DV->getDebugLoc(), DV->getOrder(), IsVariadic); ClonedDVs.push_back(Clone); DV->setIsInvalidated(); DV->setIsEmitted(); @@ -10607,6 +10917,41 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) { N0.getNode()->dumprFull(this); dbgs() << " into " << *DIExpr << '\n'); } + break; + } + case ISD::TRUNCATE: { + SDValue N0 = N.getOperand(0); + TypeSize FromSize = N0.getValueSizeInBits(); + TypeSize ToSize = N.getValueSizeInBits(0); + + DIExpression *DbgExpression = DV->getExpression(); + auto ExtOps = DIExpression::getExtOps(FromSize, ToSize, false); + auto NewLocOps = DV->copyLocationOps(); + bool Changed = false; + for (size_t i = 0; i < NewLocOps.size(); ++i) { + if (NewLocOps[i].getKind() != SDDbgOperand::SDNODE || + NewLocOps[i].getSDNode() != &N) + continue; + + NewLocOps[i] = SDDbgOperand::fromNode(N0.getNode(), N0.getResNo()); + DbgExpression = DIExpression::appendOpsToArg(DbgExpression, ExtOps, i); + Changed = true; + } + assert(Changed && "Salvage target doesn't use N"); + (void)Changed; + + SDDbgValue *Clone = + getDbgValueList(DV->getVariable(), DbgExpression, NewLocOps, + DV->getAdditionalDependencies(), DV->isIndirect(), + DV->getDebugLoc(), DV->getOrder(), DV->isVariadic()); + + ClonedDVs.push_back(Clone); + DV->setIsInvalidated(); + DV->setIsEmitted(); + LLVM_DEBUG(dbgs() << "SALVAGE: Rewriting"; N0.getNode()->dumprFull(this); + dbgs() << " into " << *DbgExpression << '\n'); + break; + } } } @@ -12110,6 +12455,10 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, APInt &SplatUndef, // FIXME: This does not work for vectors with elements less than 8 bits. while (VecWidth > 8) { + // If we can't split in half, stop here. + if (VecWidth & 1) + break; + unsigned HalfSize = VecWidth / 2; APInt HighValue = SplatValue.extractBits(HalfSize, HalfSize); APInt LowValue = SplatValue.extractBits(HalfSize, 0); @@ -12127,6 +12476,12 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, APInt &SplatUndef, VecWidth = HalfSize; } + // FIXME: The loop above only tries to split in halves. But if the input + // vector for example is <3 x i16> it wouldn't be able to detect a + // SplatBitSize of 16. No idea if that is a design flaw currently limiting + // optimizations. I guess that back in the days when this helper was created + // vectors normally was power-of-2 sized. + SplatBitSize = VecWidth; return true; } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp index a432d8e92bca..39a1e09e83c5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -130,7 +130,7 @@ bool BaseIndexOffset::computeAliasing(const SDNode *Op0, MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); // If the base are the same frame index but the we couldn't find a // constant offset, (indices are different) be conservative. - if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) || + if (A->getIndex() != B->getIndex() && (!MFI.isFixedObjectIndex(A->getIndex()) || !MFI.isFixedObjectIndex(B->getIndex()))) { IsAlias = false; return true; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 9595da9d0d8a..4fd76d012a16 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -76,6 +76,7 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsAArch64.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" @@ -989,15 +990,15 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); } -void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, +void RegsForValue::AddInlineAsmOperands(InlineAsm::Kind Code, bool HasMatching, unsigned MatchingIdx, const SDLoc &dl, SelectionDAG &DAG, std::vector<SDValue> &Ops) const { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size()); + InlineAsm::Flag Flag(Code, Regs.size()); if (HasMatching) - Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx); + Flag.setMatchingOp(MatchingIdx); else if (!Regs.empty() && Register::isVirtualRegister(Regs.front())) { // Put the register class of the virtual registers in the flag word. That // way, later passes can recompute register class constraints for inline @@ -1006,13 +1007,13 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, // from the def. const MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); const TargetRegisterClass *RC = MRI.getRegClass(Regs.front()); - Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID()); + Flag.setRegClass(RC->getID()); } SDValue Res = DAG.getTargetConstant(Flag, dl, MVT::i32); Ops.push_back(Res); - if (Code == InlineAsm::Kind_Clobber) { + if (Code == InlineAsm::Kind::Clobber) { // Clobbers should always have a 1:1 mapping with registers, and may // reference registers that have illegal (e.g. vector) types. Hence, we // shouldn't try to apply any sort of splitting logic to them. @@ -1147,12 +1148,7 @@ SDValue SelectionDAGBuilder::getControlRoot() { return updateRoot(PendingExports); } -void SelectionDAGBuilder::visit(const Instruction &I) { - // Set up outgoing PHI node register values before emitting the terminator. - if (I.isTerminator()) { - HandlePHINodesInSuccessorBlocks(I.getParent()); - } - +void SelectionDAGBuilder::visitDbgInfo(const Instruction &I) { // Add SDDbgValue nodes for any var locs here. Do so before updating // SDNodeOrder, as this mapping is {Inst -> Locs BEFORE Inst}. if (FunctionVarLocs const *FnVarLocs = DAG.getFunctionVarLocs()) { @@ -1168,11 +1164,57 @@ void SelectionDAGBuilder::visit(const Instruction &I) { } SmallVector<Value *> Values(It->Values.location_ops()); if (!handleDebugValue(Values, Var, It->Expr, It->DL, SDNodeOrder, - It->Values.hasArgList())) - addDanglingDebugInfo(It, SDNodeOrder); + It->Values.hasArgList())) { + SmallVector<Value *, 4> Vals; + for (Value *V : It->Values.location_ops()) + Vals.push_back(V); + addDanglingDebugInfo(Vals, + FnVarLocs->getDILocalVariable(It->VariableID), + It->Expr, Vals.size() > 1, It->DL, SDNodeOrder); + } } } + // Is there is any debug-info attached to this instruction, in the form of + // DPValue non-instruction debug-info records. + for (DPValue &DPV : I.getDbgValueRange()) { + DILocalVariable *Variable = DPV.getVariable(); + DIExpression *Expression = DPV.getExpression(); + dropDanglingDebugInfo(Variable, Expression); + + // A DPValue with no locations is a kill location. + SmallVector<Value *, 4> Values(DPV.location_ops()); + if (Values.empty()) { + handleKillDebugValue(Variable, Expression, DPV.getDebugLoc(), + SDNodeOrder); + continue; + } + + // A DPValue with an undef or absent location is also a kill location. + if (llvm::any_of(Values, + [](Value *V) { return !V || isa<UndefValue>(V); })) { + handleKillDebugValue(Variable, Expression, DPV.getDebugLoc(), + SDNodeOrder); + continue; + } + + bool IsVariadic = DPV.hasArgList(); + if (!handleDebugValue(Values, Variable, Expression, DPV.getDebugLoc(), + SDNodeOrder, IsVariadic)) { + addDanglingDebugInfo(Values, Variable, Expression, IsVariadic, + DPV.getDebugLoc(), SDNodeOrder); + } + } +} + +void SelectionDAGBuilder::visit(const Instruction &I) { + visitDbgInfo(I); + + // Set up outgoing PHI node register values before emitting the terminator. + if (I.isTerminator()) { + HandlePHINodesInSuccessorBlocks(I.getParent()); + } + // Increase the SDNodeOrder if dealing with a non-debug instruction. if (!isa<DbgInfoIntrinsic>(I)) ++SDNodeOrder; @@ -1231,14 +1273,12 @@ void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) { static bool handleDanglingVariadicDebugInfo(SelectionDAG &DAG, DILocalVariable *Variable, DebugLoc DL, unsigned Order, - RawLocationWrapper Values, + SmallVectorImpl<Value *> &Values, DIExpression *Expression) { - if (!Values.hasArgList()) - return false; // For variadic dbg_values we will now insert an undef. // FIXME: We can potentially recover these! SmallVector<SDDbgOperand, 2> Locs; - for (const Value *V : Values.location_ops()) { + for (const Value *V : Values) { auto *Undef = UndefValue::get(V->getType()); Locs.push_back(SDDbgOperand::fromConst(Undef)); } @@ -1249,44 +1289,31 @@ static bool handleDanglingVariadicDebugInfo(SelectionDAG &DAG, return true; } -void SelectionDAGBuilder::addDanglingDebugInfo(const VarLocInfo *VarLoc, - unsigned Order) { - if (!handleDanglingVariadicDebugInfo( - DAG, - const_cast<DILocalVariable *>(DAG.getFunctionVarLocs() - ->getVariable(VarLoc->VariableID) - .getVariable()), - VarLoc->DL, Order, VarLoc->Values, VarLoc->Expr)) { - DanglingDebugInfoMap[VarLoc->Values.getVariableLocationOp(0)].emplace_back( - VarLoc, Order); - } -} - -void SelectionDAGBuilder::addDanglingDebugInfo(const DbgValueInst *DI, +void SelectionDAGBuilder::addDanglingDebugInfo(SmallVectorImpl<Value *> &Values, + DILocalVariable *Var, + DIExpression *Expr, + bool IsVariadic, DebugLoc DL, unsigned Order) { - // We treat variadic dbg_values differently at this stage. - if (!handleDanglingVariadicDebugInfo( - DAG, DI->getVariable(), DI->getDebugLoc(), Order, - DI->getWrappedLocation(), DI->getExpression())) { - // TODO: Dangling debug info will eventually either be resolved or produce - // an Undef DBG_VALUE. However in the resolution case, a gap may appear - // between the original dbg.value location and its resolved DBG_VALUE, - // which we should ideally fill with an extra Undef DBG_VALUE. - assert(DI->getNumVariableLocationOps() == 1 && - "DbgValueInst without an ArgList should have a single location " - "operand."); - DanglingDebugInfoMap[DI->getValue(0)].emplace_back(DI, Order); + if (IsVariadic) { + handleDanglingVariadicDebugInfo(DAG, Var, DL, Order, Values, Expr); + return; } + // TODO: Dangling debug info will eventually either be resolved or produce + // an Undef DBG_VALUE. However in the resolution case, a gap may appear + // between the original dbg.value location and its resolved DBG_VALUE, + // which we should ideally fill with an extra Undef DBG_VALUE. + assert(Values.size() == 1); + DanglingDebugInfoMap[Values[0]].emplace_back(Var, Expr, DL, Order); } void SelectionDAGBuilder::dropDanglingDebugInfo(const DILocalVariable *Variable, const DIExpression *Expr) { auto isMatchingDbgValue = [&](DanglingDebugInfo &DDI) { - DIVariable *DanglingVariable = DDI.getVariable(DAG.getFunctionVarLocs()); + DIVariable *DanglingVariable = DDI.getVariable(); DIExpression *DanglingExpr = DDI.getExpression(); if (DanglingVariable == Variable && Expr->fragmentsOverlap(DanglingExpr)) { - LLVM_DEBUG(dbgs() << "Dropping dangling debug info for " << printDDI(DDI) - << "\n"); + LLVM_DEBUG(dbgs() << "Dropping dangling debug info for " + << printDDI(nullptr, DDI) << "\n"); return true; } return false; @@ -1299,7 +1326,7 @@ void SelectionDAGBuilder::dropDanglingDebugInfo(const DILocalVariable *Variable, // whether it can be salvaged. for (auto &DDI : DDIV) if (isMatchingDbgValue(DDI)) - salvageUnresolvedDbgValue(DDI); + salvageUnresolvedDbgValue(DDIMI.first, DDI); erase_if(DDIV, isMatchingDbgValue); } @@ -1318,7 +1345,7 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, DebugLoc DL = DDI.getDebugLoc(); unsigned ValSDNodeOrder = Val.getNode()->getIROrder(); unsigned DbgSDNodeOrder = DDI.getSDNodeOrder(); - DILocalVariable *Variable = DDI.getVariable(DAG.getFunctionVarLocs()); + DILocalVariable *Variable = DDI.getVariable(); DIExpression *Expr = DDI.getExpression(); assert(Variable->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); @@ -1332,8 +1359,8 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, // calling EmitFuncArgumentDbgValue here. if (!EmitFuncArgumentDbgValue(V, Variable, Expr, DL, FuncArgumentDbgValueKind::Value, Val)) { - LLVM_DEBUG(dbgs() << "Resolve dangling debug info for " << printDDI(DDI) - << "\n"); + LLVM_DEBUG(dbgs() << "Resolve dangling debug info for " + << printDDI(V, DDI) << "\n"); LLVM_DEBUG(dbgs() << " By mapping to:\n "; Val.dump()); // Increase the SDNodeOrder for the DbgValue here to make sure it is // inserted after the definition of Val when emitting the instructions @@ -1347,9 +1374,11 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, DAG.AddDbgValue(SDV, false); } else LLVM_DEBUG(dbgs() << "Resolved dangling debug info for " - << printDDI(DDI) << " in EmitFuncArgumentDbgValue\n"); + << printDDI(V, DDI) + << " in EmitFuncArgumentDbgValue\n"); } else { - LLVM_DEBUG(dbgs() << "Dropping debug info for " << printDDI(DDI) << "\n"); + LLVM_DEBUG(dbgs() << "Dropping debug info for " << printDDI(V, DDI) + << "\n"); auto Undef = UndefValue::get(V->getType()); auto SDV = DAG.getConstantDbgValue(Variable, Expr, Undef, DL, DbgSDNodeOrder); @@ -1359,14 +1388,14 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, DDIV.clear(); } -void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) { +void SelectionDAGBuilder::salvageUnresolvedDbgValue(const Value *V, + DanglingDebugInfo &DDI) { // TODO: For the variadic implementation, instead of only checking the fail // state of `handleDebugValue`, we need know specifically which values were // invalid, so that we attempt to salvage only those values when processing // a DIArgList. - Value *V = DDI.getVariableLocationOp(0); - Value *OrigV = V; - DILocalVariable *Var = DDI.getVariable(DAG.getFunctionVarLocs()); + const Value *OrigV = V; + DILocalVariable *Var = DDI.getVariable(); DIExpression *Expr = DDI.getExpression(); DebugLoc DL = DDI.getDebugLoc(); unsigned SDOrder = DDI.getSDNodeOrder(); @@ -1383,11 +1412,12 @@ void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) { // a non-instruction is seen, such as a constant expression or global // variable. FIXME: Further work could recover those too. while (isa<Instruction>(V)) { - Instruction &VAsInst = *cast<Instruction>(V); + const Instruction &VAsInst = *cast<const Instruction>(V); // Temporary "0", awaiting real implementation. SmallVector<uint64_t, 16> Ops; SmallVector<Value *, 4> AdditionalValues; - V = salvageDebugInfoImpl(VAsInst, Expr->getNumLocationOperands(), Ops, + V = salvageDebugInfoImpl(const_cast<Instruction &>(VAsInst), + Expr->getNumLocationOperands(), Ops, AdditionalValues); // If we cannot salvage any further, and haven't yet found a suitable debug // expression, bail out. @@ -1420,8 +1450,8 @@ void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) { auto *Undef = UndefValue::get(OrigV->getType()); auto *SDV = DAG.getConstantDbgValue(Var, Expr, Undef, DL, SDNodeOrder); DAG.AddDbgValue(SDV, false); - LLVM_DEBUG(dbgs() << "Dropping debug value info for:\n " << printDDI(DDI) - << "\n"); + LLVM_DEBUG(dbgs() << "Dropping debug value info for:\n " + << printDDI(OrigV, DDI) << "\n"); } void SelectionDAGBuilder::handleKillDebugValue(DILocalVariable *Var, @@ -1571,7 +1601,7 @@ void SelectionDAGBuilder::resolveOrClearDbgInfo() { // Try to fixup any remaining dangling debug info -- and drop it if we can't. for (auto &Pair : DanglingDebugInfoMap) for (auto &DDI : Pair.second) - salvageUnresolvedDbgValue(DDI); + salvageUnresolvedDbgValue(const_cast<Value *>(Pair.first), DDI); clearDanglingDebugInfo(); } @@ -1738,6 +1768,12 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { if (const auto *NC = dyn_cast<NoCFIValue>(C)) return getValue(NC->getGlobalValue()); + if (VT == MVT::aarch64svcount) { + assert(C->isNullValue() && "Can only zero this target type!"); + return DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, + DAG.getConstant(0, getCurSDLoc(), MVT::nxv16i1)); + } + VectorType *VecTy = cast<VectorType>(V->getType()); // Now that we know the number and type of the elements, get that number of @@ -1822,7 +1858,7 @@ void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) { // If this is not a fall-through branch or optimizations are switched off, // emit the branch. if (TargetMBB != NextBlock(FuncInfo.MBB) || - TM.getOptLevel() == CodeGenOpt::None) + TM.getOptLevel() == CodeGenOptLevel::None) DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(), DAG.getBasicBlock(TargetMBB))); return; @@ -2049,7 +2085,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { // An aggregate return value cannot wrap around the address space, so // offsets to its parts don't wrap either. SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr, - TypeSize::Fixed(Offsets[i])); + TypeSize::getFixed(Offsets[i])); SDValue Val = RetOp.getValue(RetOp.getResNo() + i); if (MemVTs[i] != ValueVTs[i]) @@ -2478,7 +2514,8 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // If this is not a fall-through branch or optimizations are switched off, // emit the branch. - if (Succ0MBB != NextBlock(BrMBB) || TM.getOptLevel() == CodeGenOpt::None) { + if (Succ0MBB != NextBlock(BrMBB) || + TM.getOptLevel() == CodeGenOptLevel::None) { auto Br = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(), DAG.getBasicBlock(Succ0MBB)); setValue(&I, Br); @@ -2662,14 +2699,13 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, /// visitJumpTable - Emit JumpTable node in the current MBB void SelectionDAGBuilder::visitJumpTable(SwitchCG::JumpTable &JT) { // Emit the code for the jump table + assert(JT.SL && "Should set SDLoc for SelectionDAG!"); assert(JT.Reg != -1U && "Should lower JT Header first!"); EVT PTy = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); - SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(), - JT.Reg, PTy); + SDValue Index = DAG.getCopyFromReg(getControlRoot(), *JT.SL, JT.Reg, PTy); SDValue Table = DAG.getJumpTable(JT.JTI, PTy); - SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurSDLoc(), - MVT::Other, Index.getValue(1), - Table, Index); + SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, *JT.SL, MVT::Other, + Index.getValue(1), Table, Index); DAG.setRoot(BrJumpTable); } @@ -2678,7 +2714,8 @@ void SelectionDAGBuilder::visitJumpTable(SwitchCG::JumpTable &JT) { void SelectionDAGBuilder::visitJumpTableHeader(SwitchCG::JumpTable &JT, JumpTableHeader &JTH, MachineBasicBlock *SwitchBB) { - SDLoc dl = getCurSDLoc(); + assert(JT.SL && "Should set SDLoc for SelectionDAG!"); + const SDLoc &dl = *JT.SL; // Subtract the lowest switch case value from the value being switched on. SDValue SwitchOp = getValue(JTH.SValue); @@ -2775,7 +2812,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy); const Module &M = *ParentBB->getParent()->getFunction().getParent(); Align Align = - DAG.getDataLayout().getPrefTypeAlign(Type::getInt8PtrTy(M.getContext())); + DAG.getDataLayout().getPrefTypeAlign(PointerType::get(M.getContext(), 0)); // Generate code to load the content of the guard slot. SDValue GuardVal = DAG.getLoad( @@ -3225,14 +3262,9 @@ void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) { // We may be able to ignore unreachable behind a noreturn call. if (DAG.getTarget().Options.NoTrapAfterNoreturn) { - const BasicBlock &BB = *I.getParent(); - if (&I != &BB.front()) { - BasicBlock::const_iterator PredI = - std::prev(BasicBlock::const_iterator(&I)); - if (const CallInst *Call = dyn_cast<CallInst>(&*PredI)) { - if (Call->doesNotReturn()) - return; - } + if (const CallInst *Call = dyn_cast_or_null<CallInst>(I.getPrevNode())) { + if (Call->doesNotReturn()) + return; } } @@ -3466,7 +3498,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) { } if (!IsUnaryAbs && Opc != ISD::DELETED_NODE && - (TLI.isOperationLegalOrCustom(Opc, VT) || + (TLI.isOperationLegalOrCustomOrPromote(Opc, VT) || (UseScalarMinMax && TLI.isOperationLegalOrCustom(Opc, VT.getScalarType()))) && // If the underlying comparison instruction is used by any other @@ -3522,9 +3554,23 @@ void SelectionDAGBuilder::visitZExt(const User &I) { // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest). // ZExt also can't be a cast to bool for same reason. So, nothing much to do SDValue N = getValue(I.getOperand(0)); - EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), - I.getType()); - setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N)); + auto &TLI = DAG.getTargetLoweringInfo(); + EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + + SDNodeFlags Flags; + if (auto *PNI = dyn_cast<PossiblyNonNegInst>(&I)) + Flags.setNonNeg(PNI->hasNonNeg()); + + // Eagerly use nonneg information to canonicalize towards sign_extend if + // that is the target's preference. + // TODO: Let the target do this later. + if (Flags.hasNonNeg() && + TLI.isSExtCheaperThanZExt(N.getValueType(), DestVT)) { + setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N)); + return; + } + + setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N, Flags)); } void SelectionDAGBuilder::visitSExt(const User &I) { @@ -4111,7 +4157,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { SDValue AllocSize = getValue(I.getArraySize()); - EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout(), I.getAddressSpace()); + EVT IntPtr = TLI.getPointerTy(DL, I.getAddressSpace()); if (AllocSize.getValueType() != IntPtr) AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr); @@ -4120,10 +4166,12 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { DAG.getVScale(dl, IntPtr, APInt(IntPtr.getScalarSizeInBits(), TySize.getKnownMinValue()))); - else - AllocSize = - DAG.getNode(ISD::MUL, dl, IntPtr, AllocSize, - DAG.getConstant(TySize.getFixedValue(), dl, IntPtr)); + else { + SDValue TySizeValue = + DAG.getConstant(TySize.getFixedValue(), dl, MVT::getIntegerVT(64)); + AllocSize = DAG.getNode(ISD::MUL, dl, IntPtr, AllocSize, + DAG.getZExtOrTrunc(TySizeValue, dl, IntPtr)); + } // Handle alignment. If the requested alignment is less than or equal to // the stack alignment, ignore it. If the size is greater than or equal to @@ -4156,6 +4204,18 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { assert(FuncInfo.MF->getFrameInfo().hasVarSizedObjects()); } +static const MDNode *getRangeMetadata(const Instruction &I) { + // If !noundef is not present, then !range violation results in a poison + // value rather than immediate undefined behavior. In theory, transferring + // these annotations to SDAG is fine, but in practice there are key SDAG + // transforms that are known not to be poison-safe, such as folding logical + // and/or to bitwise and/or. For now, only transfer !range if !noundef is + // also present. + if (!I.hasMetadata(LLVMContext::MD_noundef)) + return nullptr; + return I.getMetadata(LLVMContext::MD_range); +} + void SelectionDAGBuilder::visitLoad(const LoadInst &I) { if (I.isAtomic()) return visitAtomicLoad(I); @@ -4180,7 +4240,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { Type *Ty = I.getType(); SmallVector<EVT, 4> ValueVTs, MemVTs; - SmallVector<uint64_t, 4> Offsets; + SmallVector<TypeSize, 4> Offsets; ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &MemVTs, &Offsets, 0); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) @@ -4188,7 +4248,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { Align Alignment = I.getAlign(); AAMDNodes AAInfo = I.getAAMetadata(); - const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); + const MDNode *Ranges = getRangeMetadata(I); bool isVolatile = I.isVolatile(); MachineMemOperand::Flags MMOFlags = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout(), AC, LibInfo); @@ -4219,14 +4279,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { if (isVolatile) Root = TLI.prepareVolatileOrAtomicLoad(Root, dl, DAG); - // An aggregate load cannot wrap around the address space, so offsets to its - // parts don't wrap either. - SDNodeFlags Flags; - Flags.setNoUnsignedWrap(true); - SmallVector<SDValue, 4> Values(NumValues); SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues)); - EVT PtrVT = Ptr.getValueType(); unsigned ChainI = 0; for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { @@ -4243,13 +4297,15 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { Root = Chain; ChainI = 0; } - SDValue A = DAG.getNode(ISD::ADD, dl, - PtrVT, Ptr, - DAG.getConstant(Offsets[i], dl, PtrVT), - Flags); - SDValue L = DAG.getLoad(MemVTs[i], dl, Root, A, - MachinePointerInfo(SV, Offsets[i]), Alignment, + // TODO: MachinePointerInfo only supports a fixed length offset. + MachinePointerInfo PtrInfo = + !Offsets[i].isScalable() || Offsets[i].isZero() + ? MachinePointerInfo(SV, Offsets[i].getKnownMinValue()) + : MachinePointerInfo(); + + SDValue A = DAG.getObjectPtrOffset(dl, Ptr, Offsets[i]); + SDValue L = DAG.getLoad(MemVTs[i], dl, Root, A, PtrInfo, Alignment, MMOFlags, AAInfo, Ranges); Chains[ChainI] = L.getValue(1); @@ -4351,7 +4407,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { } SmallVector<EVT, 4> ValueVTs, MemVTs; - SmallVector<uint64_t, 4> Offsets; + SmallVector<TypeSize, 4> Offsets; ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), SrcV->getType(), ValueVTs, &MemVTs, &Offsets, 0); unsigned NumValues = ValueVTs.size(); @@ -4372,11 +4428,6 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { auto MMOFlags = TLI.getStoreMemOperandFlags(I, DAG.getDataLayout()); - // An aggregate load cannot wrap around the address space, so offsets to its - // parts don't wrap either. - SDNodeFlags Flags; - Flags.setNoUnsignedWrap(true); - unsigned ChainI = 0; for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { // See visitLoad comments. @@ -4386,14 +4437,19 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { Root = Chain; ChainI = 0; } - SDValue Add = - DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(Offsets[i]), dl, Flags); + + // TODO: MachinePointerInfo only supports a fixed length offset. + MachinePointerInfo PtrInfo = + !Offsets[i].isScalable() || Offsets[i].isZero() + ? MachinePointerInfo(PtrV, Offsets[i].getKnownMinValue()) + : MachinePointerInfo(); + + SDValue Add = DAG.getObjectPtrOffset(dl, Ptr, Offsets[i]); SDValue Val = SDValue(Src.getNode(), Src.getResNo() + i); if (MemVTs[i] != ValueVTs[i]) Val = DAG.getPtrExtOrTrunc(Val, dl, MemVTs[i]); SDValue St = - DAG.getStore(Root, dl, Val, Add, MachinePointerInfo(PtrV, Offsets[i]), - Alignment, MMOFlags, AAInfo); + DAG.getStore(Root, dl, Val, Add, PtrInfo, Alignment, MMOFlags, AAInfo); Chains[ChainI] = St; } @@ -4607,7 +4663,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { Alignment = DAG.getEVTAlign(VT); AAMDNodes AAInfo = I.getAAMetadata(); - const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); + const MDNode *Ranges = getRangeMetadata(I); // Do not serialize masked loads of constant memory with anything. MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo); @@ -4641,7 +4697,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { ->getMaybeAlignValue() .value_or(DAG.getEVTAlign(VT.getScalarType())); - const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); + const MDNode *Ranges = getRangeMetadata(I); SDValue Root = DAG.getRoot(); SDValue Base; @@ -4801,23 +4857,6 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG); SDValue Ptr = getValue(I.getPointerOperand()); - - if (TLI.lowerAtomicLoadAsLoadSDNode(I)) { - // TODO: Once this is better exercised by tests, it should be merged with - // the normal path for loads to prevent future divergence. - SDValue L = DAG.getLoad(MemVT, dl, InChain, Ptr, MMO); - if (MemVT != VT) - L = DAG.getPtrExtOrTrunc(L, dl, VT); - - setValue(&I, L); - SDValue OutChain = L.getValue(1); - if (!I.isUnordered()) - DAG.setRoot(OutChain); - else - PendingLoads.push_back(OutChain); - return; - } - SDValue L = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, MemVT, MemVT, InChain, Ptr, MMO); @@ -4857,16 +4896,8 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { Val = DAG.getPtrExtOrTrunc(Val, dl, MemVT); SDValue Ptr = getValue(I.getPointerOperand()); - if (TLI.lowerAtomicStoreAsStoreSDNode(I)) { - // TODO: Once this is better exercised by tests, it should be merged with - // the normal path for stores to prevent future divergence. - SDValue S = DAG.getStore(InChain, dl, Val, Ptr, MMO); - setValue(&I, S); - DAG.setRoot(S); - return; - } - SDValue OutChain = DAG.getAtomic(ISD::ATOMIC_STORE, dl, MemVT, InChain, - Ptr, Val, MMO); + SDValue OutChain = + DAG.getAtomic(ISD::ATOMIC_STORE, dl, MemVT, InChain, Val, Ptr, MMO); setValue(&I, OutChain); DAG.setRoot(OutChain); @@ -5821,26 +5852,6 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( if (!Op) return false; - // If the expression refers to the entry value of an Argument, use the - // corresponding livein physical register. As per the Verifier, this is only - // allowed for swiftasync Arguments. - if (Op->isReg() && Expr->isEntryValue()) { - assert(Arg->hasAttribute(Attribute::AttrKind::SwiftAsync)); - auto OpReg = Op->getReg(); - for (auto [PhysReg, VirtReg] : FuncInfo.RegInfo->liveins()) - if (OpReg == VirtReg || OpReg == PhysReg) { - SDDbgValue *SDV = DAG.getVRegDbgValue( - Variable, Expr, PhysReg, - Kind != FuncArgumentDbgValueKind::Value /*is indirect*/, DL, - SDNodeOrder); - DAG.AddDbgValue(SDV, false /*treat as dbg.declare byval parameter*/); - return true; - } - LLVM_DEBUG(dbgs() << "Dropping dbg.value: expression is entry_value but " - "couldn't find a physical register\n"); - return true; - } - assert(Variable->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); MachineInstr *NewMI = nullptr; @@ -5929,6 +5940,41 @@ static const CallBase *FindPreallocatedCall(const Value *PreallocatedSetup) { llvm_unreachable("expected corresponding call to preallocated setup/arg"); } +/// If DI is a debug value with an EntryValue expression, lower it using the +/// corresponding physical register of the associated Argument value +/// (guaranteed to exist by the verifier). +bool SelectionDAGBuilder::visitEntryValueDbgValue(const DbgValueInst &DI) { + DILocalVariable *Variable = DI.getVariable(); + DIExpression *Expr = DI.getExpression(); + if (!Expr->isEntryValue() || !hasSingleElement(DI.getValues())) + return false; + + // These properties are guaranteed by the verifier. + Argument *Arg = cast<Argument>(DI.getValue(0)); + assert(Arg->hasAttribute(Attribute::AttrKind::SwiftAsync)); + + auto ArgIt = FuncInfo.ValueMap.find(Arg); + if (ArgIt == FuncInfo.ValueMap.end()) { + LLVM_DEBUG( + dbgs() << "Dropping dbg.value: expression is entry_value but " + "couldn't find an associated register for the Argument\n"); + return true; + } + Register ArgVReg = ArgIt->getSecond(); + + for (auto [PhysReg, VirtReg] : FuncInfo.RegInfo->liveins()) + if (ArgVReg == VirtReg || ArgVReg == PhysReg) { + SDDbgValue *SDV = + DAG.getVRegDbgValue(Variable, Expr, PhysReg, false /*IsIndidrect*/, + DI.getDebugLoc(), SDNodeOrder); + DAG.AddDbgValue(SDV, false /*treat as dbg.declare byval parameter*/); + return true; + } + LLVM_DEBUG(dbgs() << "Dropping dbg.value: expression is entry_value but " + "couldn't find a physical register\n"); + return true; +} + /// Lower the call to the specified intrinsic function. void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { @@ -6258,6 +6304,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, DIExpression *Expression = DI.getExpression(); dropDanglingDebugInfo(Variable, Expression); + if (visitEntryValueDbgValue(DI)) + return; + if (DI.isKillLocation()) { handleKillDebugValue(Variable, Expression, DI.getDebugLoc(), SDNodeOrder); return; @@ -6270,7 +6319,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, bool IsVariadic = DI.hasArgList(); if (!handleDebugValue(Values, Variable, Expression, DI.getDebugLoc(), SDNodeOrder, IsVariadic)) - addDanglingDebugInfo(&DI, SDNodeOrder); + addDanglingDebugInfo(Values, Variable, Expression, IsVariadic, + DI.getDebugLoc(), SDNodeOrder); return; } @@ -6383,6 +6433,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::fabs: case Intrinsic::sin: case Intrinsic::cos: + case Intrinsic::exp10: case Intrinsic::floor: case Intrinsic::ceil: case Intrinsic::trunc: @@ -6398,6 +6449,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::fabs: Opcode = ISD::FABS; break; case Intrinsic::sin: Opcode = ISD::FSIN; break; case Intrinsic::cos: Opcode = ISD::FCOS; break; + case Intrinsic::exp10: Opcode = ISD::FEXP10; break; case Intrinsic::floor: Opcode = ISD::FFLOOR; break; case Intrinsic::ceil: Opcode = ISD::FCEIL; break; case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; @@ -6657,6 +6709,25 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::reset_fpenv: DAG.setRoot(DAG.getNode(ISD::RESET_FPENV, sdl, MVT::Other, getRoot())); return; + case Intrinsic::get_fpmode: + Res = DAG.getNode( + ISD::GET_FPMODE, sdl, + DAG.getVTList(TLI.getValueType(DAG.getDataLayout(), I.getType()), + MVT::Other), + DAG.getRoot()); + setValue(&I, Res); + DAG.setRoot(Res.getValue(1)); + return; + case Intrinsic::set_fpmode: + Res = DAG.getNode(ISD::SET_FPMODE, sdl, MVT::Other, {DAG.getRoot()}, + getValue(I.getArgOperand(0))); + DAG.setRoot(Res); + return; + case Intrinsic::reset_fpmode: { + Res = DAG.getNode(ISD::RESET_FPMODE, sdl, MVT::Other, getRoot()); + DAG.setRoot(Res); + return; + } case Intrinsic::pcmarker: { SDValue Tmp = getValue(I.getArgOperand(0)); DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp)); @@ -7041,15 +7112,18 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, auto Flags = rw == 0 ? MachineMemOperand::MOLoad :MachineMemOperand::MOStore; Ops[0] = DAG.getRoot(); Ops[1] = getValue(I.getArgOperand(0)); - Ops[2] = getValue(I.getArgOperand(1)); - Ops[3] = getValue(I.getArgOperand(2)); - Ops[4] = getValue(I.getArgOperand(3)); + Ops[2] = DAG.getTargetConstant(*cast<ConstantInt>(I.getArgOperand(1)), sdl, + MVT::i32); + Ops[3] = DAG.getTargetConstant(*cast<ConstantInt>(I.getArgOperand(2)), sdl, + MVT::i32); + Ops[4] = DAG.getTargetConstant(*cast<ConstantInt>(I.getArgOperand(3)), sdl, + MVT::i32); SDValue Result = DAG.getMemIntrinsicNode( ISD::PREFETCH, sdl, DAG.getVTList(MVT::Other), Ops, EVT::getIntegerVT(*Context, 8), MachinePointerInfo(I.getArgOperand(0)), /* align */ std::nullopt, Flags); - // Chain the prefetch in parallell with any pending loads, to stay out of + // Chain the prefetch in parallel with any pending loads, to stay out of // the way of later optimizations. PendingLoads.push_back(Result); Result = getRoot(); @@ -7060,7 +7134,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::lifetime_end: { bool IsStart = (Intrinsic == Intrinsic::lifetime_start); // Stack coloring is not enabled in O0, discard region information. - if (TM.getOptLevel() == CodeGenOpt::None) + if (TM.getOptLevel() == CodeGenOptLevel::None) return; const int64_t ObjectSize = @@ -7145,6 +7219,12 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, llvm_unreachable("instrprof failed to lower a timestamp"); case Intrinsic::instrprof_value_profile: llvm_unreachable("instrprof failed to lower a value profiling call"); + case Intrinsic::instrprof_mcdc_parameters: + llvm_unreachable("instrprof failed to lower mcdc parameters"); + case Intrinsic::instrprof_mcdc_tvbitmap_update: + llvm_unreachable("instrprof failed to lower an mcdc tvbitmap update"); + case Intrinsic::instrprof_mcdc_condbitmap_update: + llvm_unreachable("instrprof failed to lower an mcdc condbitmap update"); case Intrinsic::localescape: { MachineFunction &MF = DAG.getMachineFunction(); const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); @@ -7372,13 +7452,62 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, setValue(&I, Val); return; } + case Intrinsic::amdgcn_cs_chain: { + assert(I.arg_size() == 5 && "Additional args not supported yet"); + assert(cast<ConstantInt>(I.getOperand(4))->isZero() && + "Non-zero flags not supported yet"); + + // At this point we don't care if it's amdgpu_cs_chain or + // amdgpu_cs_chain_preserve. + CallingConv::ID CC = CallingConv::AMDGPU_CS_Chain; + + Type *RetTy = I.getType(); + assert(RetTy->isVoidTy() && "Should not return"); + + SDValue Callee = getValue(I.getOperand(0)); + + // We only have 2 actual args: one for the SGPRs and one for the VGPRs. + // We'll also tack the value of the EXEC mask at the end. + TargetLowering::ArgListTy Args; + Args.reserve(3); + + for (unsigned Idx : {2, 3, 1}) { + TargetLowering::ArgListEntry Arg; + Arg.Node = getValue(I.getOperand(Idx)); + Arg.Ty = I.getOperand(Idx)->getType(); + Arg.setAttributes(&I, Idx); + Args.push_back(Arg); + } + + assert(Args[0].IsInReg && "SGPR args should be marked inreg"); + assert(!Args[1].IsInReg && "VGPR args should not be marked inreg"); + Args[2].IsInReg = true; // EXEC should be inreg + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(getCurSDLoc()) + .setChain(getRoot()) + .setCallee(CC, RetTy, Callee, std::move(Args)) + .setNoReturn(true) + .setTailCall(true) + .setConvergent(I.isConvergent()); + CLI.CB = &I; + std::pair<SDValue, SDValue> Result = + lowerInvokable(CLI, /*EHPadBB*/ nullptr); + (void)Result; + assert(!Result.first.getNode() && !Result.second.getNode() && + "Should've lowered as tail call"); + + HasTailCall = true; + return; + } case Intrinsic::ptrmask: { SDValue Ptr = getValue(I.getOperand(0)); - SDValue Const = getValue(I.getOperand(1)); + SDValue Mask = getValue(I.getOperand(1)); EVT PtrVT = Ptr.getValueType(); - setValue(&I, DAG.getNode(ISD::AND, sdl, PtrVT, Ptr, - DAG.getZExtOrTrunc(Const, sdl, PtrVT))); + assert(PtrVT == Mask.getValueType() && + "Pointers with different index type are not supported by SDAG"); + setValue(&I, DAG.getNode(ISD::AND, sdl, PtrVT, Ptr, Mask)); return; } case Intrinsic::threadlocal_address: { @@ -7396,7 +7525,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, } SDValue TripCount = getValue(I.getOperand(1)); - auto VecTy = CCVT.changeVectorElementType(ElementVT); + EVT VecTy = EVT::getVectorVT(*DAG.getContext(), ElementVT, + CCVT.getVectorElementCount()); SDValue VectorIndex = DAG.getSplat(VecTy, sdl, Index); SDValue VectorTripCount = DAG.getSplat(VecTy, sdl, TripCount); @@ -7442,6 +7572,62 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, setValue(&I, Trunc); return; } + case Intrinsic::experimental_cttz_elts: { + auto DL = getCurSDLoc(); + SDValue Op = getValue(I.getOperand(0)); + EVT OpVT = Op.getValueType(); + + if (!TLI.shouldExpandCttzElements(OpVT)) { + visitTargetIntrinsic(I, Intrinsic); + return; + } + + if (OpVT.getScalarType() != MVT::i1) { + // Compare the input vector elements to zero & use to count trailing zeros + SDValue AllZero = DAG.getConstant(0, DL, OpVT); + OpVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, + OpVT.getVectorElementCount()); + Op = DAG.getSetCC(DL, OpVT, Op, AllZero, ISD::SETNE); + } + + // Find the smallest "sensible" element type to use for the expansion. + ConstantRange CR( + APInt(64, OpVT.getVectorElementCount().getKnownMinValue())); + if (OpVT.isScalableVT()) + CR = CR.umul_sat(getVScaleRange(I.getCaller(), 64)); + + // If the zero-is-poison flag is set, we can assume the upper limit + // of the result is VF-1. + if (!cast<ConstantSDNode>(getValue(I.getOperand(1)))->isZero()) + CR = CR.subtract(APInt(64, 1)); + + unsigned EltWidth = I.getType()->getScalarSizeInBits(); + EltWidth = std::min(EltWidth, (unsigned)CR.getActiveBits()); + EltWidth = std::max(llvm::bit_ceil(EltWidth), (unsigned)8); + + MVT NewEltTy = MVT::getIntegerVT(EltWidth); + + // Create the new vector type & get the vector length + EVT NewVT = EVT::getVectorVT(*DAG.getContext(), NewEltTy, + OpVT.getVectorElementCount()); + + SDValue VL = + DAG.getElementCount(DL, NewEltTy, OpVT.getVectorElementCount()); + + SDValue StepVec = DAG.getStepVector(DL, NewVT); + SDValue SplatVL = DAG.getSplat(NewVT, DL, VL); + SDValue StepVL = DAG.getNode(ISD::SUB, DL, NewVT, SplatVL, StepVec); + SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, Op); + SDValue And = DAG.getNode(ISD::AND, DL, NewVT, StepVL, Ext); + SDValue Max = DAG.getNode(ISD::VECREDUCE_UMAX, DL, NewEltTy, And); + SDValue Sub = DAG.getNode(ISD::SUB, DL, NewEltTy, VL, Max); + + EVT RetTy = TLI.getValueType(DAG.getDataLayout(), I.getType()); + SDValue Ret = DAG.getZExtOrTrunc(Sub, DL, RetTy); + + setValue(&I, Ret); + return; + } case Intrinsic::vector_insert: { SDValue Vec = getValue(I.getOperand(0)); SDValue SubVec = getValue(I.getOperand(1)); @@ -7645,7 +7831,7 @@ void SelectionDAGBuilder::visitVPLoad( Value *PtrOperand = VPIntrin.getArgOperand(0); MaybeAlign Alignment = VPIntrin.getPointerAlignment(); AAMDNodes AAInfo = VPIntrin.getAAMetadata(); - const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range); + const MDNode *Ranges = getRangeMetadata(VPIntrin); SDValue LD; // Do not serialize variable-length loads of constant memory with // anything. @@ -7672,7 +7858,7 @@ void SelectionDAGBuilder::visitVPGather( Value *PtrOperand = VPIntrin.getArgOperand(0); MaybeAlign Alignment = VPIntrin.getPointerAlignment(); AAMDNodes AAInfo = VPIntrin.getAAMetadata(); - const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range); + const MDNode *Ranges = getRangeMetadata(VPIntrin); SDValue LD; if (!Alignment) Alignment = DAG.getEVTAlign(VT.getScalarType()); @@ -7779,7 +7965,7 @@ void SelectionDAGBuilder::visitVPStridedLoad( if (!Alignment) Alignment = DAG.getEVTAlign(VT.getScalarType()); AAMDNodes AAInfo = VPIntrin.getAAMetadata(); - const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range); + const MDNode *Ranges = getRangeMetadata(VPIntrin); MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo); bool AddToChain = !AA || !AA->pointsToConstantMemory(ML); SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); @@ -7929,6 +8115,16 @@ void SelectionDAGBuilder::visitVectorPredicationIntrinsic( } break; } + case ISD::VP_IS_FPCLASS: { + const DataLayout DLayout = DAG.getDataLayout(); + EVT DestVT = TLI.getValueType(DLayout, VPIntrin.getType()); + auto Constant = cast<ConstantSDNode>(OpValues[1])->getZExtValue(); + SDValue Check = DAG.getTargetConstant(Constant, DL, MVT::i32); + SDValue V = DAG.getNode(ISD::VP_IS_FPCLASS, DL, DestVT, + {OpValues[0], Check, OpValues[2], OpValues[3]}); + setValue(&VPIntrin, V); + return; + } case ISD::VP_INTTOPTR: { SDValue N = OpValues[0]; EVT DestVT = TLI.getValueType(DAG.getDataLayout(), VPIntrin.getType()); @@ -8660,6 +8856,12 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { if (visitUnaryFloatCall(I, ISD::FEXP2)) return; break; + case LibFunc_exp10: + case LibFunc_exp10f: + case LibFunc_exp10l: + if (visitUnaryFloatCall(I, ISD::FEXP10)) + return; + break; case LibFunc_ldexp: case LibFunc_ldexpf: case LibFunc_ldexpl: @@ -8955,11 +9157,11 @@ findMatchingInlineAsmOperand(unsigned OperandNo, // Advance to the next operand. unsigned OpFlag = cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue(); - assert((InlineAsm::isRegDefKind(OpFlag) || - InlineAsm::isRegDefEarlyClobberKind(OpFlag) || - InlineAsm::isMemKind(OpFlag)) && - "Skipped past definitions?"); - CurOp += InlineAsm::getNumOperandRegisters(OpFlag) + 1; + const InlineAsm::Flag F(OpFlag); + assert( + (F.isRegDefKind() || F.isRegDefEarlyClobberKind() || F.isMemKind()) && + "Skipped past definitions?"); + CurOp += F.getNumOperandRegisters() + 1; } return CurOp; } @@ -9217,14 +9419,14 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, switch (OpInfo.Type) { case InlineAsm::isOutput: if (OpInfo.ConstraintType == TargetLowering::C_Memory) { - unsigned ConstraintID = + const InlineAsm::ConstraintCode ConstraintID = TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode); - assert(ConstraintID != InlineAsm::Constraint_Unknown && + assert(ConstraintID != InlineAsm::ConstraintCode::Unknown && "Failed to convert memory constraint code to constraint id."); // Add information to the INLINEASM node to know about this output. - unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); - OpFlags = InlineAsm::getFlagWordForMem(OpFlags, ConstraintID); + InlineAsm::Flag OpFlags(InlineAsm::Kind::Mem, 1); + OpFlags.setMemConstraint(ConstraintID); AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, getCurSDLoc(), MVT::i32)); AsmNodeOperands.push_back(OpInfo.CallOperand); @@ -9245,8 +9447,8 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, // Add information to the INLINEASM node to know that this register is // set. OpInfo.AssignedRegs.AddInlineAsmOperands( - OpInfo.isEarlyClobber ? InlineAsm::Kind_RegDefEarlyClobber - : InlineAsm::Kind_RegDef, + OpInfo.isEarlyClobber ? InlineAsm::Kind::RegDefEarlyClobber + : InlineAsm::Kind::RegDef, false, 0, getCurSDLoc(), DAG, AsmNodeOperands); } break; @@ -9260,11 +9462,9 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, // just use its register. auto CurOp = findMatchingInlineAsmOperand(OpInfo.getMatchedOperand(), AsmNodeOperands); - unsigned OpFlag = - cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue(); - if (InlineAsm::isRegDefKind(OpFlag) || - InlineAsm::isRegDefEarlyClobberKind(OpFlag)) { - // Add (OpFlag&0xffff)>>3 registers to MatchedRegs. + InlineAsm::Flag Flag( + cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue()); + if (Flag.isRegDefKind() || Flag.isRegDefEarlyClobberKind()) { if (OpInfo.isIndirect) { // This happens on gcc/testsuite/gcc.dg/pr8788-1.c emitInlineAsmError(Call, "inline asm not supported yet: " @@ -9284,8 +9484,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, TiedReg.isVirtual() ? MRI.getRegClass(TiedReg) : RegVT != MVT::Untyped ? TLI.getRegClassFor(RegVT) : TRI.getMinimalPhysRegClass(TiedReg); - unsigned NumRegs = InlineAsm::getNumOperandRegisters(OpFlag); - for (unsigned i = 0; i != NumRegs; ++i) + for (unsigned i = 0, e = Flag.getNumOperandRegisters(); i != e; ++i) Regs.push_back(MRI.createVirtualRegister(RC)); RegsForValue MatchedRegs(Regs, RegVT, InOperandVal.getValueType()); @@ -9293,22 +9492,21 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, SDLoc dl = getCurSDLoc(); // Use the produced MatchedRegs object to MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Glue, &Call); - MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, - true, OpInfo.getMatchedOperand(), dl, - DAG, AsmNodeOperands); + MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind::RegUse, true, + OpInfo.getMatchedOperand(), dl, DAG, + AsmNodeOperands); break; } - assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!"); - assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 && + assert(Flag.isMemKind() && "Unknown matching constraint!"); + assert(Flag.getNumOperandRegisters() == 1 && "Unexpected number of operands"); // Add information to the INLINEASM node to know about this input. // See InlineAsm.h isUseOperandTiedToDef. - OpFlag = InlineAsm::convertMemFlagWordToMatchingFlagWord(OpFlag); - OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag, - OpInfo.getMatchedOperand()); + Flag.clearMemConstraint(); + Flag.setMatchingOp(OpInfo.getMatchedOperand()); AsmNodeOperands.push_back(DAG.getTargetConstant( - OpFlag, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout()))); + Flag, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout()))); AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]); break; } @@ -9338,8 +9536,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, } // Add information to the INLINEASM node to know about this input. - unsigned ResOpType = - InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size()); + InlineAsm::Flag ResOpType(InlineAsm::Kind::Imm, Ops.size()); AsmNodeOperands.push_back(DAG.getTargetConstant( ResOpType, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout()))); llvm::append_range(AsmNodeOperands, Ops); @@ -9354,14 +9551,14 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, TLI.getPointerTy(DAG.getDataLayout()) && "Memory operands expect pointer values"); - unsigned ConstraintID = + const InlineAsm::ConstraintCode ConstraintID = TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode); - assert(ConstraintID != InlineAsm::Constraint_Unknown && + assert(ConstraintID != InlineAsm::ConstraintCode::Unknown && "Failed to convert memory constraint code to constraint id."); // Add information to the INLINEASM node to know about this input. - unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); - ResOpType = InlineAsm::getFlagWordForMem(ResOpType, ConstraintID); + InlineAsm::Flag ResOpType(InlineAsm::Kind::Mem, 1); + ResOpType.setMemConstraint(ConstraintID); AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, getCurSDLoc(), MVT::i32)); @@ -9370,24 +9567,24 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, } if (OpInfo.ConstraintType == TargetLowering::C_Address) { - unsigned ConstraintID = + const InlineAsm::ConstraintCode ConstraintID = TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode); - assert(ConstraintID != InlineAsm::Constraint_Unknown && + assert(ConstraintID != InlineAsm::ConstraintCode::Unknown && "Failed to convert memory constraint code to constraint id."); - unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); + InlineAsm::Flag ResOpType(InlineAsm::Kind::Mem, 1); SDValue AsmOp = InOperandVal; if (isFunction(InOperandVal)) { auto *GA = cast<GlobalAddressSDNode>(InOperandVal); - ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Func, 1); + ResOpType = InlineAsm::Flag(InlineAsm::Kind::Func, 1); AsmOp = DAG.getTargetGlobalAddress(GA->getGlobal(), getCurSDLoc(), InOperandVal.getValueType(), GA->getOffset()); } // Add information to the INLINEASM node to know about this input. - ResOpType = InlineAsm::getFlagWordForMem(ResOpType, ConstraintID); + ResOpType.setMemConstraint(ConstraintID); AsmNodeOperands.push_back( DAG.getTargetConstant(ResOpType, getCurSDLoc(), MVT::i32)); @@ -9425,15 +9622,15 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, dl, Chain, &Glue, &Call); - OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0, - dl, DAG, AsmNodeOperands); + OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind::RegUse, false, + 0, dl, DAG, AsmNodeOperands); break; } case InlineAsm::isClobber: // Add the clobbered value to the operand list, so that the register // allocator is aware that the physreg got clobbered. if (!OpInfo.AssignedRegs.Regs.empty()) - OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber, + OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind::Clobber, false, 0, getCurSDLoc(), DAG, AsmNodeOperands); break; @@ -9626,7 +9823,7 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) { SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG, const Instruction &I, SDValue Op) { - const MDNode *Range = I.getMetadata(LLVMContext::MD_range); + const MDNode *Range = getRangeMetadata(I); if (!Range) return Op; @@ -9670,7 +9867,7 @@ SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG, void SelectionDAGBuilder::populateCallLoweringInfo( TargetLowering::CallLoweringInfo &CLI, const CallBase *Call, unsigned ArgIdx, unsigned NumArgs, SDValue Callee, Type *ReturnTy, - bool IsPatchPoint) { + AttributeSet RetAttrs, bool IsPatchPoint) { TargetLowering::ArgListTy Args; Args.reserve(NumArgs); @@ -9691,7 +9888,8 @@ void SelectionDAGBuilder::populateCallLoweringInfo( CLI.setDebugLoc(getCurSDLoc()) .setChain(getRoot()) - .setCallee(Call->getCallingConv(), ReturnTy, Callee, std::move(Args)) + .setCallee(Call->getCallingConv(), ReturnTy, Callee, std::move(Args), + RetAttrs) .setDiscardResult(Call->use_empty()) .setIsPatchPoint(IsPatchPoint) .setIsPreallocated( @@ -9840,7 +10038,7 @@ void SelectionDAGBuilder::visitPatchpoint(const CallBase &CB, TargetLowering::CallLoweringInfo CLI(DAG); populateCallLoweringInfo(CLI, &CB, NumMetaOpers, NumCallArgs, Callee, - ReturnTy, true); + ReturnTy, CB.getAttributes().getRetAttrs(), true); std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB); SDNode *CallEnd = Result.second.getNode(); @@ -11234,7 +11432,7 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, } } - if (TM.getOptLevel() != CodeGenOpt::None) { + if (TM.getOptLevel() != CodeGenOptLevel::None) { // Here, we order cases by probability so the most likely case will be // checked first. However, two clusters can have the same probability in // which case their relative ordering is non-deterministic. So we use Low @@ -11310,8 +11508,32 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, } } - if (FallthroughUnreachable) - JTH->FallthroughUnreachable = true; + // If the default clause is unreachable, propagate that knowledge into + // JTH->FallthroughUnreachable which will use it to suppress the range + // check. + // + // However, don't do this if we're doing branch target enforcement, + // because a table branch _without_ a range check can be a tempting JOP + // gadget - out-of-bounds inputs that are impossible in correct + // execution become possible again if an attacker can influence the + // control flow. So if an attacker doesn't already have a BTI bypass + // available, we don't want them to be able to get one out of this + // table branch. + if (FallthroughUnreachable) { + Function &CurFunc = CurMF->getFunction(); + bool HasBranchTargetEnforcement = false; + if (CurFunc.hasFnAttribute("branch-target-enforcement")) { + HasBranchTargetEnforcement = + CurFunc.getFnAttribute("branch-target-enforcement") + .getValueAsBool(); + } else { + HasBranchTargetEnforcement = + CurMF->getMMI().getModule()->getModuleFlag( + "branch-target-enforcement"); + } + if (!HasBranchTargetEnforcement) + JTH->FallthroughUnreachable = true; + } if (!JTH->FallthroughUnreachable) addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb); @@ -11568,7 +11790,7 @@ MachineBasicBlock *SelectionDAGBuilder::peelDominantCaseCluster( MachineBasicBlock *SwitchMBB = FuncInfo.MBB; // Don't perform if there is only one cluster or optimizing for size. if (SwitchPeelThreshold > 100 || !FuncInfo.BPI || Clusters.size() < 2 || - TM.getOptLevel() == CodeGenOpt::None || + TM.getOptLevel() == CodeGenOptLevel::None || SwitchMBB->getParent()->getFunction().hasMinSize()) return SwitchMBB; @@ -11652,7 +11874,8 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { return; } - SL->findJumpTables(Clusters, &SI, DefaultMBB, DAG.getPSI(), DAG.getBFI()); + SL->findJumpTables(Clusters, &SI, getCurSDLoc(), DefaultMBB, DAG.getPSI(), + DAG.getBFI()); SL->findBitTestClusters(Clusters, &SI); LLVM_DEBUG({ @@ -11690,7 +11913,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { SwitchWorkListItem W = WorkList.pop_back_val(); unsigned NumClusters = W.LastCluster - W.FirstCluster + 1; - if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None && + if (NumClusters > 3 && TM.getOptLevel() != CodeGenOptLevel::None && !DefaultMBB->getParent()->getFunction().hasMinSize()) { // For optimized builds, lower large range as a balanced binary tree. splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index f2496f24973a..5b55c3461b0b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -106,54 +106,39 @@ class SelectionDAGBuilder { /// Helper type for DanglingDebugInfoMap. class DanglingDebugInfo { - using DbgValTy = const DbgValueInst *; - using VarLocTy = const VarLocInfo *; - PointerUnion<DbgValTy, VarLocTy> Info; unsigned SDNodeOrder = 0; public: + DILocalVariable *Variable; + DIExpression *Expression; + DebugLoc dl; DanglingDebugInfo() = default; - DanglingDebugInfo(const DbgValueInst *DI, unsigned SDNO) - : Info(DI), SDNodeOrder(SDNO) {} - DanglingDebugInfo(const VarLocInfo *VarLoc, unsigned SDNO) - : Info(VarLoc), SDNodeOrder(SDNO) {} + DanglingDebugInfo(DILocalVariable *Var, DIExpression *Expr, DebugLoc DL, + unsigned SDNO) + : SDNodeOrder(SDNO), Variable(Var), Expression(Expr), + dl(std::move(DL)) {} - DILocalVariable *getVariable(const FunctionVarLocs *Locs) const { - if (isa<VarLocTy>(Info)) - return Locs->getDILocalVariable(cast<VarLocTy>(Info)->VariableID); - return cast<DbgValTy>(Info)->getVariable(); - } - DIExpression *getExpression() const { - if (isa<VarLocTy>(Info)) - return cast<VarLocTy>(Info)->Expr; - return cast<DbgValTy>(Info)->getExpression(); - } - Value *getVariableLocationOp(unsigned Idx) const { - assert(Idx == 0 && "Dangling variadic debug values not supported yet"); - if (isa<VarLocTy>(Info)) - return cast<VarLocTy>(Info)->Values.getVariableLocationOp(Idx); - return cast<DbgValTy>(Info)->getVariableLocationOp(Idx); - } - DebugLoc getDebugLoc() const { - if (isa<VarLocTy>(Info)) - return cast<VarLocTy>(Info)->DL; - return cast<DbgValTy>(Info)->getDebugLoc(); - } + DILocalVariable *getVariable() const { return Variable; } + DIExpression *getExpression() const { return Expression; } + DebugLoc getDebugLoc() const { return dl; } unsigned getSDNodeOrder() const { return SDNodeOrder; } /// Helper for printing DanglingDebugInfo. This hoop-jumping is to - /// accommodate the fact that an argument is required for getVariable. + /// store a Value pointer, so that we can print a whole DDI as one object. /// Call SelectionDAGBuilder::printDDI instead of using directly. struct Print { - Print(const DanglingDebugInfo &DDI, const FunctionVarLocs *VarLocs) - : DDI(DDI), VarLocs(VarLocs) {} + Print(const Value *V, const DanglingDebugInfo &DDI) : V(V), DDI(DDI) {} + const Value *V; const DanglingDebugInfo &DDI; - const FunctionVarLocs *VarLocs; friend raw_ostream &operator<<(raw_ostream &OS, const DanglingDebugInfo::Print &P) { - OS << "DDI(var=" << *P.DDI.getVariable(P.VarLocs) - << ", val= " << *P.DDI.getVariableLocationOp(0) - << ", expr=" << *P.DDI.getExpression() + OS << "DDI(var=" << *P.DDI.getVariable(); + if (P.V) + OS << ", val=" << *P.V; + else + OS << ", val=nullptr"; + + OS << ", expr=" << *P.DDI.getExpression() << ", order=" << P.DDI.getSDNodeOrder() << ", loc=" << P.DDI.getDebugLoc() << ")"; return OS; @@ -164,8 +149,9 @@ class SelectionDAGBuilder { /// Returns an object that defines `raw_ostream &operator<<` for printing. /// Usage example: //// errs() << printDDI(MyDanglingInfo) << " is dangling\n"; - DanglingDebugInfo::Print printDDI(const DanglingDebugInfo &DDI) { - return DanglingDebugInfo::Print(DDI, DAG.getFunctionVarLocs()); + DanglingDebugInfo::Print printDDI(const Value *V, + const DanglingDebugInfo &DDI) { + return DanglingDebugInfo::Print(V, DDI); } /// Helper type for DanglingDebugInfoMap. @@ -295,10 +281,10 @@ public: LLVMContext *Context = nullptr; SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo, - SwiftErrorValueTracking &swifterror, CodeGenOpt::Level ol) + SwiftErrorValueTracking &swifterror, CodeGenOptLevel ol) : SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()), DAG(dag), - SL(std::make_unique<SDAGSwitchLowering>(this, funcinfo)), FuncInfo(funcinfo), - SwiftError(swifterror) {} + SL(std::make_unique<SDAGSwitchLowering>(this, funcinfo)), + FuncInfo(funcinfo), SwiftError(swifterror) {} void init(GCFunctionInfo *gfi, AAResults *AA, AssumptionCache *AC, const TargetLibraryInfo *li); @@ -344,6 +330,7 @@ public: ISD::NodeType ExtendType = ISD::ANY_EXTEND); void visit(const Instruction &I); + void visitDbgInfo(const Instruction &I); void visit(unsigned Opcode, const User &I); @@ -352,8 +339,9 @@ public: SDValue getCopyFromRegs(const Value *V, Type *Ty); /// Register a dbg_value which relies on a Value which we have not yet seen. - void addDanglingDebugInfo(const DbgValueInst *DI, unsigned Order); - void addDanglingDebugInfo(const VarLocInfo *VarLoc, unsigned Order); + void addDanglingDebugInfo(SmallVectorImpl<Value *> &Values, + DILocalVariable *Var, DIExpression *Expr, + bool IsVariadic, DebugLoc DL, unsigned Order); /// If we have dangling debug info that describes \p Variable, or an /// overlapping part of variable considering the \p Expr, then this method @@ -368,7 +356,7 @@ public: /// For the given dangling debuginfo record, perform last-ditch efforts to /// resolve the debuginfo to something that is represented in this DAG. If /// this cannot be done, produce an Undef debug value record. - void salvageUnresolvedDbgValue(DanglingDebugInfo &DDI); + void salvageUnresolvedDbgValue(const Value *V, DanglingDebugInfo &DDI); /// For a given list of Values, attempt to create and record a SDDbgValue in /// the SelectionDAG. @@ -426,7 +414,8 @@ public: void populateCallLoweringInfo(TargetLowering::CallLoweringInfo &CLI, const CallBase *Call, unsigned ArgIdx, unsigned NumArgs, SDValue Callee, - Type *ReturnTy, bool IsPatchPoint); + Type *ReturnTy, AttributeSet RetAttrs, + bool IsPatchPoint); std::pair<SDValue, SDValue> lowerInvokable(TargetLowering::CallLoweringInfo &CLI, @@ -625,6 +614,8 @@ private: void visitInlineAsm(const CallBase &Call, const BasicBlock *EHPadBB = nullptr); + + bool visitEntryValueDbgValue(const DbgValueInst &I); void visitIntrinsicCall(const CallInst &I, unsigned Intrinsic); void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic); void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI); @@ -785,7 +776,7 @@ struct RegsForValue { /// Add this value to the specified inlineasm node operand list. This adds the /// code marker, matching input operand index (if applicable), and includes /// the number of values added into it. - void AddInlineAsmOperands(unsigned Code, bool HasMatching, + void AddInlineAsmOperands(InlineAsm::Kind Code, bool HasMatching, unsigned MatchingIdx, const SDLoc &dl, SelectionDAG &DAG, std::vector<SDValue> &Ops) const; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 03a1ead5bbb4..78cc60084068 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -125,6 +125,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::GlobalTLSAddress: return "GlobalTLSAddress"; case ISD::FrameIndex: return "FrameIndex"; case ISD::JumpTable: return "JumpTable"; + case ISD::JUMP_TABLE_DEBUG_INFO: + return "JUMP_TABLE_DEBUG_INFO"; case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE"; case ISD::RETURNADDR: return "RETURNADDR"; case ISD::ADDROFRETURNADDR: return "ADDROFRETURNADDR"; @@ -222,6 +224,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::STRICT_FEXP: return "strict_fexp"; case ISD::FEXP2: return "fexp2"; case ISD::STRICT_FEXP2: return "strict_fexp2"; + case ISD::FEXP10: return "fexp10"; case ISD::FLOG: return "flog"; case ISD::STRICT_FLOG: return "strict_flog"; case ISD::FLOG2: return "flog2"; @@ -439,6 +442,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::RESET_FPENV: return "reset_fpenv"; case ISD::GET_FPENV_MEM: return "get_fpenv_mem"; case ISD::SET_FPENV_MEM: return "set_fpenv_mem"; + case ISD::GET_FPMODE: return "get_fpmode"; + case ISD::SET_FPMODE: return "set_fpmode"; + case ISD::RESET_FPMODE: return "reset_fpmode"; // Bit manipulation case ISD::ABS: return "abs"; @@ -591,6 +597,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { if (getFlags().hasExact()) OS << " exact"; + if (getFlags().hasNonNeg()) + OS << " nneg"; + if (getFlags().hasNoNaNs()) OS << " nnan"; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 35abd990f968..2018b5f0ee29 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -78,6 +78,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/PrintPasses.h" #include "llvm/IR/Statepoint.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" @@ -113,6 +114,7 @@ using namespace llvm; #define DEBUG_TYPE "isel" +#define ISEL_DUMP_DEBUG_TYPE DEBUG_TYPE "-dump" STATISTIC(NumFastIselFailures, "Number of instructions fast isel failed on"); STATISTIC(NumFastIselSuccess, "Number of instructions fast isel selected"); @@ -180,6 +182,19 @@ static const bool ViewDAGCombine1 = false, ViewLegalizeTypesDAGs = false, ViewSchedDAGs = false, ViewSUnitDAGs = false; #endif +#ifndef NDEBUG +#define ISEL_DUMP(X) \ + do { \ + if (llvm::DebugFlag && \ + (isCurrentDebugType(DEBUG_TYPE) || \ + (isCurrentDebugType(ISEL_DUMP_DEBUG_TYPE) && MatchFilterFuncName))) { \ + X; \ + } \ + } while (false) +#else +#define ISEL_DUMP(X) do { } while (false) +#endif + //===---------------------------------------------------------------------===// /// /// RegisterScheduler class - Track the registration of instruction schedulers. @@ -204,6 +219,16 @@ static RegisterScheduler defaultListDAGScheduler("default", "Best scheduler for the target", createDefaultScheduler); +static bool dontUseFastISelFor(const Function &Fn) { + // Don't enable FastISel for functions with swiftasync Arguments. + // Debug info on those is reliant on good Argument lowering, and FastISel is + // not capable of lowering the entire function. Mixing the two selectors tend + // to result in poor lowering of Arguments. + return any_of(Fn.args(), [](const Argument &Arg) { + return Arg.hasAttribute(Attribute::AttrKind::SwiftAsync); + }); +} + namespace llvm { //===--------------------------------------------------------------------===// @@ -211,29 +236,31 @@ namespace llvm { /// the optimization level on a per-function basis. class OptLevelChanger { SelectionDAGISel &IS; - CodeGenOpt::Level SavedOptLevel; + CodeGenOptLevel SavedOptLevel; bool SavedFastISel; public: - OptLevelChanger(SelectionDAGISel &ISel, - CodeGenOpt::Level NewOptLevel) : IS(ISel) { + OptLevelChanger(SelectionDAGISel &ISel, CodeGenOptLevel NewOptLevel) + : IS(ISel) { SavedOptLevel = IS.OptLevel; SavedFastISel = IS.TM.Options.EnableFastISel; - if (NewOptLevel == SavedOptLevel) - return; - IS.OptLevel = NewOptLevel; - IS.TM.setOptLevel(NewOptLevel); - LLVM_DEBUG(dbgs() << "\nChanging optimization level for Function " - << IS.MF->getFunction().getName() << "\n"); - LLVM_DEBUG(dbgs() << "\tBefore: -O" << SavedOptLevel << " ; After: -O" - << NewOptLevel << "\n"); - if (NewOptLevel == CodeGenOpt::None) { - IS.TM.setFastISel(IS.TM.getO0WantsFastISel()); - LLVM_DEBUG( - dbgs() << "\tFastISel is " - << (IS.TM.Options.EnableFastISel ? "enabled" : "disabled") - << "\n"); + if (NewOptLevel != SavedOptLevel) { + IS.OptLevel = NewOptLevel; + IS.TM.setOptLevel(NewOptLevel); + LLVM_DEBUG(dbgs() << "\nChanging optimization level for Function " + << IS.MF->getFunction().getName() << "\n"); + LLVM_DEBUG(dbgs() << "\tBefore: -O" << static_cast<int>(SavedOptLevel) + << " ; After: -O" << static_cast<int>(NewOptLevel) + << "\n"); + if (NewOptLevel == CodeGenOptLevel::None) + IS.TM.setFastISel(IS.TM.getO0WantsFastISel()); } + if (dontUseFastISelFor(IS.MF->getFunction())) + IS.TM.setFastISel(false); + LLVM_DEBUG( + dbgs() << "\tFastISel is " + << (IS.TM.Options.EnableFastISel ? "enabled" : "disabled") + << "\n"); } ~OptLevelChanger() { @@ -241,8 +268,8 @@ namespace llvm { return; LLVM_DEBUG(dbgs() << "\nRestoring optimization level for Function " << IS.MF->getFunction().getName() << "\n"); - LLVM_DEBUG(dbgs() << "\tBefore: -O" << IS.OptLevel << " ; After: -O" - << SavedOptLevel << "\n"); + LLVM_DEBUG(dbgs() << "\tBefore: -O" << static_cast<int>(IS.OptLevel) + << " ; After: -O" << static_cast<int>(SavedOptLevel) << "\n"); IS.OptLevel = SavedOptLevel; IS.TM.setOptLevel(SavedOptLevel); IS.TM.setFastISel(SavedFastISel); @@ -252,8 +279,8 @@ namespace llvm { //===--------------------------------------------------------------------===// /// createDefaultScheduler - This creates an instruction scheduler appropriate /// for the target. - ScheduleDAGSDNodes* createDefaultScheduler(SelectionDAGISel *IS, - CodeGenOpt::Level OptLevel) { + ScheduleDAGSDNodes *createDefaultScheduler(SelectionDAGISel *IS, + CodeGenOptLevel OptLevel) { const TargetLowering *TLI = IS->TLI; const TargetSubtargetInfo &ST = IS->MF->getSubtarget(); @@ -262,7 +289,7 @@ namespace llvm { return SchedulerCtor(IS, OptLevel); } - if (OptLevel == CodeGenOpt::None || + if (OptLevel == CodeGenOptLevel::None || (ST.enableMachineScheduler() && ST.enableMachineSchedDefaultSched()) || TLI->getSchedulingPreference() == Sched::Source) return createSourceListDAGScheduler(IS, OptLevel); @@ -315,7 +342,7 @@ void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, //===----------------------------------------------------------------------===// SelectionDAGISel::SelectionDAGISel(char &ID, TargetMachine &tm, - CodeGenOpt::Level OL) + CodeGenOptLevel OL) : MachineFunctionPass(ID), TM(tm), FuncInfo(new FunctionLoweringInfo()), SwiftError(new SwiftErrorValueTracking()), CurDAG(new SelectionDAG(tm, OL)), @@ -335,23 +362,23 @@ SelectionDAGISel::~SelectionDAGISel() { } void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { - if (OptLevel != CodeGenOpt::None) - AU.addRequired<AAResultsWrapperPass>(); + if (OptLevel != CodeGenOptLevel::None) + AU.addRequired<AAResultsWrapperPass>(); AU.addRequired<GCModuleInfo>(); AU.addRequired<StackProtector>(); AU.addPreserved<GCModuleInfo>(); AU.addRequired<TargetLibraryInfoWrapperPass>(); AU.addRequired<TargetTransformInfoWrapperPass>(); AU.addRequired<AssumptionCacheTracker>(); - if (UseMBPI && OptLevel != CodeGenOpt::None) - AU.addRequired<BranchProbabilityInfoWrapperPass>(); + if (UseMBPI && OptLevel != CodeGenOptLevel::None) + AU.addRequired<BranchProbabilityInfoWrapperPass>(); AU.addRequired<ProfileSummaryInfoWrapperPass>(); // AssignmentTrackingAnalysis only runs if assignment tracking is enabled for // the module. AU.addRequired<AssignmentTrackingAnalysis>(); AU.addPreserved<AssignmentTrackingAnalysis>(); - if (OptLevel != CodeGenOpt::None) - LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU); + if (OptLevel != CodeGenOptLevel::None) + LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU); MachineFunctionPass::getAnalysisUsage(AU); } @@ -391,6 +418,13 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { const Function &Fn = mf.getFunction(); MF = &mf; +#ifndef NDEBUG + StringRef FuncName = Fn.getName(); + MatchFilterFuncName = isFunctionInPrintList(FuncName); +#else + (void)MatchFilterFuncName; +#endif + // Decide what flavour of variable location debug-info will be used, before // we change the optimisation level. bool InstrRef = mf.shouldUseDebugInstrRef(); @@ -403,9 +437,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // it wants to look at it. TM.resetTargetOptions(Fn); // Reset OptLevel to None for optnone functions. - CodeGenOpt::Level NewOptLevel = OptLevel; - if (OptLevel != CodeGenOpt::None && skipFunction(Fn)) - NewOptLevel = CodeGenOpt::None; + CodeGenOptLevel NewOptLevel = OptLevel; + if (OptLevel != CodeGenOptLevel::None && skipFunction(Fn)) + NewOptLevel = CodeGenOptLevel::None; OptLevelChanger OLC(*this, NewOptLevel); TII = MF->getSubtarget().getInstrInfo(); @@ -417,14 +451,14 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(mf.getFunction()); auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); BlockFrequencyInfo *BFI = nullptr; - if (PSI && PSI->hasProfileSummary() && OptLevel != CodeGenOpt::None) + if (PSI && PSI->hasProfileSummary() && OptLevel != CodeGenOptLevel::None) BFI = &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI(); FunctionVarLocs const *FnVarLocs = nullptr; if (isAssignmentTrackingEnabled(*Fn.getParent())) FnVarLocs = getAnalysis<AssignmentTrackingAnalysis>().getResults(); - LLVM_DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); + ISEL_DUMP(dbgs() << "\n\n\n=== " << FuncName << "\n"); UniformityInfo *UA = nullptr; if (auto *UAPass = getAnalysisIfAvailable<UniformityInfoWrapperPass>()) @@ -438,12 +472,12 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // into account). That's unfortunate but OK because it just means we won't // ask for passes that have been required anyway. - if (UseMBPI && OptLevel != CodeGenOpt::None) + if (UseMBPI && OptLevel != CodeGenOptLevel::None) FuncInfo->BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI(); else FuncInfo->BPI = nullptr; - if (OptLevel != CodeGenOpt::None) + if (OptLevel != CodeGenOptLevel::None) AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); else AA = nullptr; @@ -456,7 +490,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // We split CSR if the target supports it for the given function // and the function has only return exits. - if (OptLevel != CodeGenOpt::None && TLI->supportSplitCSR(MF)) { + if (OptLevel != CodeGenOptLevel::None && TLI->supportSplitCSR(MF)) { FuncInfo->SplitCSR = true; // Collect all the return blocks. @@ -656,8 +690,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // at this point. FuncInfo->clear(); - LLVM_DEBUG(dbgs() << "*** MachineFunction at end of ISel ***\n"); - LLVM_DEBUG(MF->print(dbgs())); + ISEL_DUMP(dbgs() << "*** MachineFunction at end of ISel ***\n"); + ISEL_DUMP(MF->print(dbgs())); return true; } @@ -685,10 +719,13 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin, CurDAG->NewNodesMustHaveLegalTypes = false; // Lower the instructions. If a call is emitted as a tail call, cease emitting - // nodes for this block. + // nodes for this block. If an instruction is elided, don't emit it, but do + // handle any debug-info attached to it. for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I) { if (!ElidedArgCopyInstrs.count(&*I)) SDB->visit(*I); + else + SDB->visitDbgInfo(*I); } // Make sure the root of the DAG is up-to-date. @@ -765,10 +802,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { BlockName = (MF->getName() + ":" + FuncInfo->MBB->getBasicBlock()->getName()).str(); } - LLVM_DEBUG(dbgs() << "Initial selection DAG: " - << printMBBReference(*FuncInfo->MBB) << " '" << BlockName - << "'\n"; - CurDAG->dump()); + ISEL_DUMP(dbgs() << "\nInitial selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); #ifndef NDEBUG if (TTI.hasBranchDivergence()) @@ -785,10 +822,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(BeforeLegalizeTypes, AA, OptLevel); } - LLVM_DEBUG(dbgs() << "Optimized lowered selection DAG: " - << printMBBReference(*FuncInfo->MBB) << " '" << BlockName - << "'\n"; - CurDAG->dump()); + ISEL_DUMP(dbgs() << "\nOptimized lowered selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); #ifndef NDEBUG if (TTI.hasBranchDivergence()) @@ -807,10 +844,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { Changed = CurDAG->LegalizeTypes(); } - LLVM_DEBUG(dbgs() << "Type-legalized selection DAG: " - << printMBBReference(*FuncInfo->MBB) << " '" << BlockName - << "'\n"; - CurDAG->dump()); + ISEL_DUMP(dbgs() << "\nType-legalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); #ifndef NDEBUG if (TTI.hasBranchDivergence()) @@ -831,10 +868,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(AfterLegalizeTypes, AA, OptLevel); } - LLVM_DEBUG(dbgs() << "Optimized type-legalized selection DAG: " - << printMBBReference(*FuncInfo->MBB) << " '" << BlockName - << "'\n"; - CurDAG->dump()); + ISEL_DUMP(dbgs() << "\nOptimized type-legalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); #ifndef NDEBUG if (TTI.hasBranchDivergence()) @@ -849,10 +886,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { } if (Changed) { - LLVM_DEBUG(dbgs() << "Vector-legalized selection DAG: " - << printMBBReference(*FuncInfo->MBB) << " '" << BlockName - << "'\n"; - CurDAG->dump()); + ISEL_DUMP(dbgs() << "\nVector-legalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); #ifndef NDEBUG if (TTI.hasBranchDivergence()) @@ -865,10 +902,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->LegalizeTypes(); } - LLVM_DEBUG(dbgs() << "Vector/type-legalized selection DAG: " - << printMBBReference(*FuncInfo->MBB) << " '" << BlockName - << "'\n"; - CurDAG->dump()); + ISEL_DUMP(dbgs() << "\nVector/type-legalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); #ifndef NDEBUG if (TTI.hasBranchDivergence()) @@ -885,10 +922,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(AfterLegalizeVectorOps, AA, OptLevel); } - LLVM_DEBUG(dbgs() << "Optimized vector-legalized selection DAG: " - << printMBBReference(*FuncInfo->MBB) << " '" << BlockName - << "'\n"; - CurDAG->dump()); + ISEL_DUMP(dbgs() << "\nOptimized vector-legalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); #ifndef NDEBUG if (TTI.hasBranchDivergence()) @@ -905,10 +942,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Legalize(); } - LLVM_DEBUG(dbgs() << "Legalized selection DAG: " - << printMBBReference(*FuncInfo->MBB) << " '" << BlockName - << "'\n"; - CurDAG->dump()); + ISEL_DUMP(dbgs() << "\nLegalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); #ifndef NDEBUG if (TTI.hasBranchDivergence()) @@ -925,17 +962,17 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(AfterLegalizeDAG, AA, OptLevel); } - LLVM_DEBUG(dbgs() << "Optimized legalized selection DAG: " - << printMBBReference(*FuncInfo->MBB) << " '" << BlockName - << "'\n"; - CurDAG->dump()); + ISEL_DUMP(dbgs() << "\nOptimized legalized selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); #ifndef NDEBUG if (TTI.hasBranchDivergence()) CurDAG->VerifyDAGDivergence(); #endif - if (OptLevel != CodeGenOpt::None) + if (OptLevel != CodeGenOptLevel::None) ComputeLiveOutVRegInfo(); if (ViewISelDAGs && MatchFilterBB) @@ -949,10 +986,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { DoInstructionSelection(); } - LLVM_DEBUG(dbgs() << "Selected selection DAG: " - << printMBBReference(*FuncInfo->MBB) << " '" << BlockName - << "'\n"; - CurDAG->dump()); + ISEL_DUMP(dbgs() << "\nSelected selection DAG: " + << printMBBReference(*FuncInfo->MBB) << " '" << BlockName + << "'\n"; + CurDAG->dump()); if (ViewSchedDAGs && MatchFilterBB) CurDAG->viewGraph("scheduler input for " + BlockName); @@ -1357,6 +1394,8 @@ static bool processIfEntryValueDbgDeclare(FunctionLoweringInfo &FuncInfo, // Find the corresponding livein physical register to this argument. for (auto [PhysReg, VirtReg] : FuncInfo.RegInfo->liveins()) if (VirtReg == ArgVReg) { + // Append an op deref to account for the fact that this is a dbg_declare. + Expr = DIExpression::append(Expr, dwarf::DW_OP_deref); FuncInfo.MF->setVariableDbgInfo(Var, Expr, PhysReg, DbgLoc); LLVM_DEBUG(dbgs() << "processDbgDeclare: setVariableDbgInfo Var=" << *Var << ", Expr=" << *Expr << ", MCRegister=" << PhysReg @@ -1510,7 +1549,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Iterate over all basic blocks in the function. StackProtector &SP = getAnalysis<StackProtector>(); for (const BasicBlock *LLVMBB : RPOT) { - if (OptLevel != CodeGenOpt::None) { + if (OptLevel != CodeGenOptLevel::None) { bool AllPredsVisited = true; for (const BasicBlock *Pred : predecessors(LLVMBB)) { if (!FuncInfo->VisitedBBs.count(Pred)) { @@ -2074,41 +2113,43 @@ void SelectionDAGISel::SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops, --e; // Don't process a glue operand if it is here. while (i != e) { - unsigned Flags = cast<ConstantSDNode>(InOps[i])->getZExtValue(); - if (!InlineAsm::isMemKind(Flags) && !InlineAsm::isFuncKind(Flags)) { + InlineAsm::Flag Flags(cast<ConstantSDNode>(InOps[i])->getZExtValue()); + if (!Flags.isMemKind() && !Flags.isFuncKind()) { // Just skip over this operand, copying the operands verbatim. - Ops.insert(Ops.end(), InOps.begin()+i, - InOps.begin()+i+InlineAsm::getNumOperandRegisters(Flags) + 1); - i += InlineAsm::getNumOperandRegisters(Flags) + 1; + Ops.insert(Ops.end(), InOps.begin() + i, + InOps.begin() + i + Flags.getNumOperandRegisters() + 1); + i += Flags.getNumOperandRegisters() + 1; } else { - assert(InlineAsm::getNumOperandRegisters(Flags) == 1 && + assert(Flags.getNumOperandRegisters() == 1 && "Memory operand with multiple values?"); unsigned TiedToOperand; - if (InlineAsm::isUseOperandTiedToDef(Flags, TiedToOperand)) { + if (Flags.isUseOperandTiedToDef(TiedToOperand)) { // We need the constraint ID from the operand this is tied to. unsigned CurOp = InlineAsm::Op_FirstOperand; - Flags = cast<ConstantSDNode>(InOps[CurOp])->getZExtValue(); + Flags = + InlineAsm::Flag(cast<ConstantSDNode>(InOps[CurOp])->getZExtValue()); for (; TiedToOperand; --TiedToOperand) { - CurOp += InlineAsm::getNumOperandRegisters(Flags)+1; - Flags = cast<ConstantSDNode>(InOps[CurOp])->getZExtValue(); + CurOp += Flags.getNumOperandRegisters() + 1; + Flags = InlineAsm::Flag( + cast<ConstantSDNode>(InOps[CurOp])->getZExtValue()); } } // Otherwise, this is a memory operand. Ask the target to select it. std::vector<SDValue> SelOps; - unsigned ConstraintID = InlineAsm::getMemoryConstraintID(Flags); + const InlineAsm::ConstraintCode ConstraintID = + Flags.getMemoryConstraintID(); if (SelectInlineAsmMemoryOperand(InOps[i+1], ConstraintID, SelOps)) report_fatal_error("Could not match memory address. Inline asm" " failure!"); // Add this to the output node. - unsigned NewFlags = - InlineAsm::isMemKind(Flags) - ? InlineAsm::getFlagWord(InlineAsm::Kind_Mem, SelOps.size()) - : InlineAsm::getFlagWord(InlineAsm::Kind_Func, SelOps.size()); - NewFlags = InlineAsm::getFlagWordForMem(NewFlags, ConstraintID); - Ops.push_back(CurDAG->getTargetConstant(NewFlags, DL, MVT::i32)); + Flags = InlineAsm::Flag(Flags.isMemKind() ? InlineAsm::Kind::Mem + : InlineAsm::Kind::Func, + SelOps.size()); + Flags.setMemConstraint(ConstraintID); + Ops.push_back(CurDAG->getTargetConstant(Flags, DL, MVT::i32)); llvm::append_range(Ops, SelOps); i += 2; } @@ -2176,18 +2217,20 @@ static bool findNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse, /// operand node N of U during instruction selection that starts at Root. bool SelectionDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const { - if (OptLevel == CodeGenOpt::None) return false; + if (OptLevel == CodeGenOptLevel::None) + return false; return N.hasOneUse(); } /// IsLegalToFold - Returns true if the specific operand node N of /// U can be folded during instruction selection that starts at Root. bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, - CodeGenOpt::Level OptLevel, + CodeGenOptLevel OptLevel, bool IgnoreChains) { - if (OptLevel == CodeGenOpt::None) return false; + if (OptLevel == CodeGenOptLevel::None) + return false; - // If Root use can somehow reach N through a path that that doesn't contain + // If Root use can somehow reach N through a path that doesn't contain // U then folding N would create a cycle. e.g. In the following // diagram, Root can reach N through X. If N is folded into Root, then // X is both a predecessor and a successor of U. @@ -2435,6 +2478,13 @@ GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) { return Val; } +void SelectionDAGISel::Select_JUMP_TABLE_DEBUG_INFO(SDNode *N) { + SDLoc dl(N); + CurDAG->SelectNodeTo(N, TargetOpcode::JUMP_TABLE_DEBUG_INFO, MVT::Glue, + CurDAG->getTargetConstant(N->getConstantOperandVal(1), + dl, MVT::i64, true)); +} + /// When a match is complete, this method updates uses of interior chain results /// to use the new results. void SelectionDAGISel::UpdateChains( @@ -2591,7 +2641,7 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList, unsigned ResNumResults = Res->getNumValues(); // Move the glue if needed. if ((EmitNodeInfo & OPFL_GlueOutput) && OldGlueResultNo != -1 && - (unsigned)OldGlueResultNo != ResNumResults-1) + static_cast<unsigned>(OldGlueResultNo) != ResNumResults - 1) ReplaceUses(SDValue(Node, OldGlueResultNo), SDValue(Res, ResNumResults - 1)); @@ -2600,7 +2650,7 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList, // Move the chain reference if needed. if ((EmitNodeInfo & OPFL_Chain) && OldChainResultNo != -1 && - (unsigned)OldChainResultNo != ResNumResults-1) + static_cast<unsigned>(OldChainResultNo) != ResNumResults - 1) ReplaceUses(SDValue(Node, OldChainResultNo), SDValue(Res, ResNumResults - 1)); @@ -2639,8 +2689,11 @@ LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckChildSame( /// CheckPatternPredicate - Implements OP_CheckPatternPredicate. LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex, - const SelectionDAGISel &SDISel) { - return SDISel.CheckPatternPredicate(MatcherTable[MatcherIndex++]); + const SelectionDAGISel &SDISel, bool TwoBytePredNo) { + unsigned PredNo = MatcherTable[MatcherIndex++]; + if (TwoBytePredNo) + PredNo |= MatcherTable[MatcherIndex++] << 8; + return SDISel.CheckPatternPredicate(PredNo); } /// CheckNodePredicate - Implements OP_CheckNodePredicate. @@ -2654,15 +2707,17 @@ LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDNode *N) { uint16_t Opc = MatcherTable[MatcherIndex++]; - Opc |= (unsigned short)MatcherTable[MatcherIndex++] << 8; + Opc |= static_cast<uint16_t>(MatcherTable[MatcherIndex++]) << 8; return N->getOpcode() == Opc; } LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const TargetLowering *TLI, const DataLayout &DL) { - MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; - if (N.getValueType() == VT) return true; + MVT::SimpleValueType VT = + static_cast<MVT::SimpleValueType>(MatcherTable[MatcherIndex++]); + if (N.getValueType() == VT) + return true; // Handle the case when VT is iPTR. return VT == MVT::iPTR && N.getValueType() == TLI->getPointerTy(DL); @@ -2682,7 +2737,7 @@ LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N) { return cast<CondCodeSDNode>(N)->get() == - (ISD::CondCode)MatcherTable[MatcherIndex++]; + static_cast<ISD::CondCode>(MatcherTable[MatcherIndex++]); } LLVM_ATTRIBUTE_ALWAYS_INLINE static bool @@ -2696,7 +2751,8 @@ CheckChild2CondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex, LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const TargetLowering *TLI, const DataLayout &DL) { - MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; + MVT::SimpleValueType VT = + static_cast<MVT::SimpleValueType>(MatcherTable[MatcherIndex++]); if (cast<VTSDNode>(N)->getVT() == VT) return true; @@ -2788,7 +2844,10 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table, Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Same); return Index; case SelectionDAGISel::OPC_CheckPatternPredicate: - Result = !::CheckPatternPredicate(Table, Index, SDISel); + case SelectionDAGISel::OPC_CheckPatternPredicate2: + Result = !::CheckPatternPredicate( + Table, Index, SDISel, + Table[Index - 1] == SelectionDAGISel::OPC_CheckPatternPredicate2); return Index; case SelectionDAGISel::OPC_CheckPredicate: Result = !::CheckNodePredicate(Table, Index, SDISel, N.getNode()); @@ -2981,6 +3040,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, case ISD::PATCHPOINT: Select_PATCHPOINT(NodeToMatch); return; + case ISD::JUMP_TABLE_DEBUG_INFO: + Select_JUMP_TABLE_DEBUG_INFO(NodeToMatch); + return; } assert(!NodeToMatch->isMachineOpcode() && "Node already selected!"); @@ -3042,7 +3104,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, // Get the opcode, add the index to the table. uint16_t Opc = MatcherTable[Idx++]; - Opc |= (unsigned short)MatcherTable[Idx++] << 8; + Opc |= static_cast<uint16_t>(MatcherTable[Idx++]) << 8; if (Opc >= OpcodeOffset.size()) OpcodeOffset.resize((Opc+1)*2); OpcodeOffset[Opc] = Idx; @@ -3059,7 +3121,8 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, #ifndef NDEBUG unsigned CurrentOpcodeIndex = MatcherIndex; #endif - BuiltinOpcodes Opcode = (BuiltinOpcodes)MatcherTable[MatcherIndex++]; + BuiltinOpcodes Opcode = + static_cast<BuiltinOpcodes>(MatcherTable[MatcherIndex++]); switch (Opcode) { case OPC_Scope: { // Okay, the semantics of this operation are that we should push a scope @@ -3198,7 +3261,10 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, continue; case OPC_CheckPatternPredicate: - if (!::CheckPatternPredicate(MatcherTable, MatcherIndex, *this)) break; + case OPC_CheckPatternPredicate2: + if (!::CheckPatternPredicate(MatcherTable, MatcherIndex, *this, + Opcode == OPC_CheckPatternPredicate2)) + break; continue; case OPC_CheckPredicate: if (!::CheckNodePredicate(MatcherTable, MatcherIndex, *this, @@ -3265,7 +3331,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, if (CaseSize == 0) break; uint16_t Opc = MatcherTable[MatcherIndex++]; - Opc |= (unsigned short)MatcherTable[MatcherIndex++] << 8; + Opc |= static_cast<uint16_t>(MatcherTable[MatcherIndex++]) << 8; // If the opcode matches, then we will execute this case. if (CurNodeOpcode == Opc) @@ -3295,7 +3361,8 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex); if (CaseSize == 0) break; - MVT CaseVT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; + MVT CaseVT = + static_cast<MVT::SimpleValueType>(MatcherTable[MatcherIndex++]); if (CaseVT == MVT::iPTR) CaseVT = TLI->getPointerTy(CurDAG->getDataLayout()); @@ -3390,22 +3457,43 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, continue; } case OPC_EmitInteger: - case OPC_EmitStringInteger: { - MVT::SimpleValueType VT = - (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; + case OPC_EmitInteger8: + case OPC_EmitInteger16: + case OPC_EmitInteger32: + case OPC_EmitInteger64: + case OPC_EmitStringInteger: + case OPC_EmitStringInteger32: { + MVT::SimpleValueType VT; + switch (Opcode) { + case OPC_EmitInteger8: + VT = MVT::i8; + break; + case OPC_EmitInteger16: + VT = MVT::i16; + break; + case OPC_EmitInteger32: + case OPC_EmitStringInteger32: + VT = MVT::i32; + break; + case OPC_EmitInteger64: + VT = MVT::i64; + break; + default: + VT = static_cast<MVT::SimpleValueType>(MatcherTable[MatcherIndex++]); + break; + } int64_t Val = MatcherTable[MatcherIndex++]; if (Val & 128) Val = GetVBR(Val, MatcherTable, MatcherIndex); - if (Opcode == OPC_EmitInteger) + if (Opcode >= OPC_EmitInteger && Opcode <= OPC_EmitInteger64) Val = decodeSignRotatedValue(Val); - RecordedNodes.push_back(std::pair<SDValue, SDNode*>( - CurDAG->getTargetConstant(Val, SDLoc(NodeToMatch), - VT), nullptr)); + RecordedNodes.push_back(std::pair<SDValue, SDNode *>( + CurDAG->getTargetConstant(Val, SDLoc(NodeToMatch), VT), nullptr)); continue; } case OPC_EmitRegister: { MVT::SimpleValueType VT = - (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; + static_cast<MVT::SimpleValueType>(MatcherTable[MatcherIndex++]); unsigned RegNo = MatcherTable[MatcherIndex++]; RecordedNodes.push_back(std::pair<SDValue, SDNode*>( CurDAG->getRegister(RegNo, VT), nullptr)); @@ -3416,7 +3504,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, // values are stored in two bytes in the matcher table (just like // opcodes). MVT::SimpleValueType VT = - (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; + static_cast<MVT::SimpleValueType>(MatcherTable[MatcherIndex++]); unsigned RegNo = MatcherTable[MatcherIndex++]; RegNo |= MatcherTable[MatcherIndex++] << 8; RecordedNodes.push_back(std::pair<SDValue, SDNode*>( @@ -3562,7 +3650,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, case OPC_EmitNode0: case OPC_EmitNode1: case OPC_EmitNode2: case OPC_MorphNodeTo0: case OPC_MorphNodeTo1: case OPC_MorphNodeTo2: { uint16_t TargetOpc = MatcherTable[MatcherIndex++]; - TargetOpc |= (unsigned short)MatcherTable[MatcherIndex++] << 8; + TargetOpc |= static_cast<uint16_t>(MatcherTable[MatcherIndex++]) << 8; unsigned EmitNodeInfo = MatcherTable[MatcherIndex++]; // Get the result VT list. unsigned NumVTs; @@ -3577,7 +3665,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, SmallVector<EVT, 4> VTs; for (unsigned i = 0; i != NumVTs; ++i) { MVT::SimpleValueType VT = - (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; + static_cast<MVT::SimpleValueType>(MatcherTable[MatcherIndex++]); if (VT == MVT::iPTR) VT = TLI->getPointerTy(CurDAG->getDataLayout()).SimpleTy; VTs.push_back(VT); @@ -3667,7 +3755,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, auto &Chain = ChainNodesMatched; assert((!E || !is_contained(Chain, N)) && "Chain node replaced during MorphNode"); - llvm::erase_value(Chain, N); + llvm::erase(Chain, N); }); Res = cast<MachineSDNode>(MorphNode(NodeToMatch, TargetOpc, VTList, Ops, EmitNodeInfo)); diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 5afd05648772..cf32350036d4 100644 --- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -62,15 +62,15 @@ STATISTIC(NumOfStatepoints, "Number of statepoint nodes encountered"); STATISTIC(StatepointMaxSlotsRequired, "Maximum number of stack slots required for a singe statepoint"); -cl::opt<bool> UseRegistersForDeoptValues( +static cl::opt<bool> UseRegistersForDeoptValues( "use-registers-for-deopt-values", cl::Hidden, cl::init(false), cl::desc("Allow using registers for non pointer deopt args")); -cl::opt<bool> UseRegistersForGCPointersInLandingPad( +static cl::opt<bool> UseRegistersForGCPointersInLandingPad( "use-registers-for-gc-values-in-landing-pad", cl::Hidden, cl::init(false), cl::desc("Allow using registers for gc pointer in landing pad")); -cl::opt<unsigned> MaxRegistersForGCPointers( +static cl::opt<unsigned> MaxRegistersForGCPointers( "max-registers-for-gc-values", cl::Hidden, cl::init(0), cl::desc("Max number of VRegs allowed to pass GC pointer meta args in")); @@ -525,7 +525,7 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, // deopt argument length, deopt arguments.., gc arguments... // Figure out what lowering strategy we're going to use for each part - // Note: Is is conservatively correct to lower both "live-in" and "live-out" + // Note: It is conservatively correct to lower both "live-in" and "live-out" // as "live-through". A "live-through" variable is one which is "live-in", // "live-out", and live throughout the lifetime of the call (i.e. we can find // it from any PC within the transitive callee of the statepoint). In @@ -715,7 +715,8 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( assert((GFI || SI.Bases.empty()) && "No gc specified, so cannot relocate pointers!"); - LLVM_DEBUG(dbgs() << "Lowering statepoint " << *SI.StatepointInstr << "\n"); + LLVM_DEBUG(if (SI.StatepointInstr) dbgs() + << "Lowering statepoint " << *SI.StatepointInstr << "\n"); #ifndef NDEBUG for (const auto *Reloc : SI.GCRelocates) if (Reloc->getParent() == SI.StatepointInstr->getParent()) @@ -1032,10 +1033,16 @@ SelectionDAGBuilder::LowerStatepoint(const GCStatepointInst &I, ActualCallee = Callee; } + const auto GCResultLocality = getGCResultLocality(I); + AttributeSet retAttrs; + if (GCResultLocality.first) + retAttrs = GCResultLocality.first->getAttributes().getRetAttrs(); + StatepointLoweringInfo SI(DAG); populateCallLoweringInfo(SI.CLI, &I, GCStatepointInst::CallArgsBeginPos, I.getNumCallArgs(), ActualCallee, - I.getActualReturnType(), false /* IsPatchPoint */); + I.getActualReturnType(), retAttrs, + /*IsPatchPoint=*/false); // There may be duplication in the gc.relocate list; such as two copies of // each relocation on normal and exceptional path for an invoke. We only @@ -1091,8 +1098,6 @@ SelectionDAGBuilder::LowerStatepoint(const GCStatepointInst &I, SDValue ReturnValue = LowerAsSTATEPOINT(SI); // Export the result value if needed - const auto GCResultLocality = getGCResultLocality(I); - if (!GCResultLocality.first && !GCResultLocality.second) { // The return value is not needed, just generate a poison value. // Note: This covers the void return case. @@ -1137,7 +1142,7 @@ void SelectionDAGBuilder::LowerCallSiteWithDeoptBundleImpl( populateCallLoweringInfo( SI.CLI, Call, ArgBeginIndex, Call->arg_size(), Callee, ForceVoidReturnTy ? Type::getVoidTy(*DAG.getContext()) : Call->getType(), - false); + Call->getAttributes().getRetAttrs(), /*IsPatchPoint=*/false); if (!VarArgDisallowed) SI.CLI.IsVarArg = Call->getFunctionType()->isVarArg(); @@ -1156,6 +1161,7 @@ void SelectionDAGBuilder::LowerCallSiteWithDeoptBundleImpl( // NB! The GC arguments are deliberately left empty. + LLVM_DEBUG(dbgs() << "Lowering call with deopt bundle " << *Call << "\n"); if (SDValue ReturnVal = LowerAsSTATEPOINT(SI)) { ReturnVal = lowerRangeToAssertZExt(DAG, *Call, ReturnVal); setValue(Call, ReturnVal); diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index a84d35a6ea4e..c5977546828f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineModuleInfoImpls.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -472,6 +473,17 @@ TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF, return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx); } +SDValue TargetLowering::expandIndirectJTBranch(const SDLoc &dl, SDValue Value, + SDValue Addr, int JTI, + SelectionDAG &DAG) const { + SDValue Chain = Value; + // Jump table debug info is only needed if CodeView is enabled. + if (DAG.getTarget().getTargetTriple().isOSBinFormatCOFF()) { + Chain = DAG.getJumpTableDebugInfo(JTI, Chain, dl); + } + return DAG.getNode(ISD::BRIND, dl, MVT::Other, Chain, Addr); +} + bool TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { const TargetMachine &TM = getTargetMachine(); @@ -554,8 +566,9 @@ bool TargetLowering::ShrinkDemandedConstant(SDValue Op, } /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. -/// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be -/// generalized for targets with other types of implicit widening casts. +/// This uses isTruncateFree/isZExtFree and ANY_EXTEND for the widening cast, +/// but it could be generalized for targets with other types of implicit +/// widening casts. bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &DemandedBits, TargetLoweringOpt &TLO) const { @@ -1040,13 +1053,10 @@ static SDValue combineShiftToAVG(SDValue Op, SelectionDAG &DAG, // larger type size to do the transform. if (!TLI.isOperationLegalOrCustom(AVGOpc, VT)) return SDValue(); - - if (DAG.computeOverflowForAdd(IsSigned, Add.getOperand(0), - Add.getOperand(1)) == - SelectionDAG::OFK_Never && - (!Add2 || DAG.computeOverflowForAdd(IsSigned, Add2.getOperand(0), - Add2.getOperand(1)) == - SelectionDAG::OFK_Never)) + if (DAG.willNotOverflowAdd(IsSigned, Add.getOperand(0), + Add.getOperand(1)) && + (!Add2 || DAG.willNotOverflowAdd(IsSigned, Add2.getOperand(0), + Add2.getOperand(1)))) NVT = VT; else return SDValue(); @@ -1155,6 +1165,18 @@ bool TargetLowering::SimplifyDemandedBits( // TODO: Call SimplifyDemandedBits for non-constant demanded elements. Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth); return false; // Don't fall through, will infinitely loop. + case ISD::SPLAT_VECTOR: { + SDValue Scl = Op.getOperand(0); + APInt DemandedSclBits = DemandedBits.zextOrTrunc(Scl.getValueSizeInBits()); + KnownBits KnownScl; + if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1)) + return true; + + // Implicitly truncate the bits to match the official semantics of + // SPLAT_VECTOR. + Known = KnownScl.trunc(BitWidth); + break; + } case ISD::LOAD: { auto *LD = cast<LoadSDNode>(Op); if (getTargetConstantFromLoad(LD)) { @@ -1765,8 +1787,17 @@ bool TargetLowering::SimplifyDemandedBits( APInt InDemandedMask = DemandedBits.lshr(ShAmt); if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO, - Depth + 1)) + Depth + 1)) { + SDNodeFlags Flags = Op.getNode()->getFlags(); + if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) { + // Disable the nsw and nuw flags. We can no longer guarantee that we + // won't wrap after simplification. + Flags.setNoSignedWrap(false); + Flags.setNoUnsignedWrap(false); + Op->setFlags(Flags); + } return true; + } assert(!Known.hasConflict() && "Bits known to be one AND zero?"); Known.Zero <<= ShAmt; Known.One <<= ShAmt; @@ -1788,6 +1819,37 @@ bool TargetLowering::SimplifyDemandedBits( if ((ShAmt < DemandedBits.getActiveBits()) && ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) return true; + + // Narrow shift to lower half - similar to ShrinkDemandedOp. + // (shl i64:x, K) -> (i64 zero_extend (shl (i32 (trunc i64:x)), K)) + // Only do this if we demand the upper half so the knownbits are correct. + unsigned HalfWidth = BitWidth / 2; + if ((BitWidth % 2) == 0 && !VT.isVector() && ShAmt < HalfWidth && + DemandedBits.countLeadingOnes() >= HalfWidth) { + EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), HalfWidth); + if (isNarrowingProfitable(VT, HalfVT) && + isTypeDesirableForOp(ISD::SHL, HalfVT) && + isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) && + (!TLO.LegalOperations() || isOperationLegal(ISD::SHL, HalfVT))) { + // If we're demanding the upper bits at all, we must ensure + // that the upper bits of the shift result are known to be zero, + // which is equivalent to the narrow shift being NUW. + if (bool IsNUW = (Known.countMinLeadingZeros() >= HalfWidth)) { + bool IsNSW = Known.countMinSignBits() > HalfWidth; + SDNodeFlags Flags; + Flags.setNoSignedWrap(IsNSW); + Flags.setNoUnsignedWrap(IsNUW); + SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0); + SDValue NewShiftAmt = TLO.DAG.getShiftAmountConstant( + ShAmt, HalfVT, dl, TLO.LegalTypes()); + SDValue NewShift = TLO.DAG.getNode(ISD::SHL, dl, HalfVT, NewOp, + NewShiftAmt, Flags); + SDValue NewExt = + TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, NewShift); + return TLO.CombineTo(Op, NewExt); + } + } + } } else { // This is a variable shift, so we can't shift the demand mask by a known // amount. But if we are not demanding high bits, then we are not @@ -1870,15 +1932,15 @@ bool TargetLowering::SimplifyDemandedBits( // Narrow shift to lower half - similar to ShrinkDemandedOp. // (srl i64:x, K) -> (i64 zero_extend (srl (i32 (trunc i64:x)), K)) - if ((BitWidth % 2) == 0 && !VT.isVector() && - ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) || - TLO.DAG.MaskedValueIsZero( - Op0, APInt::getHighBitsSet(BitWidth, BitWidth / 2)))) { + if ((BitWidth % 2) == 0 && !VT.isVector()) { + APInt HiBits = APInt::getHighBitsSet(BitWidth, BitWidth / 2); EVT HalfVT = EVT::getIntegerVT(*TLO.DAG.getContext(), BitWidth / 2); if (isNarrowingProfitable(VT, HalfVT) && isTypeDesirableForOp(ISD::SRL, HalfVT) && isTruncateFree(VT, HalfVT) && isZExtFree(HalfVT, VT) && - (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT))) { + (!TLO.LegalOperations() || isOperationLegal(ISD::SRL, HalfVT)) && + ((InDemandedMask.countLeadingZeros() >= (BitWidth / 2)) || + TLO.DAG.MaskedValueIsZero(Op0, HiBits))) { SDValue NewOp = TLO.DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Op0); SDValue NewShiftAmt = TLO.DAG.getShiftAmountConstant( ShAmt, HalfVT, dl, TLO.LegalTypes()); @@ -1945,6 +2007,35 @@ bool TargetLowering::SimplifyDemandedBits( if (ShAmt == 0) return TLO.CombineTo(Op, Op0); + // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target + // supports sext_inreg. + if (Op0.getOpcode() == ISD::SHL) { + if (const APInt *InnerSA = + TLO.DAG.getValidShiftAmountConstant(Op0, DemandedElts)) { + unsigned LowBits = BitWidth - ShAmt; + EVT ExtVT = EVT::getIntegerVT(*TLO.DAG.getContext(), LowBits); + if (VT.isVector()) + ExtVT = EVT::getVectorVT(*TLO.DAG.getContext(), ExtVT, + VT.getVectorElementCount()); + + if (*InnerSA == ShAmt) { + if (!TLO.LegalOperations() || + getOperationAction(ISD::SIGN_EXTEND_INREG, ExtVT) == Legal) + return TLO.CombineTo( + Op, TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, + Op0.getOperand(0), + TLO.DAG.getValueType(ExtVT))); + + // Even if we can't convert to sext_inreg, we might be able to + // remove this shift pair if the input is already sign extended. + unsigned NumSignBits = + TLO.DAG.ComputeNumSignBits(Op0.getOperand(0), DemandedElts); + if (NumSignBits > ShAmt) + return TLO.CombineTo(Op, Op0.getOperand(0)); + } + } + } + APInt InDemandedMask = (DemandedBits << ShAmt); // If the shift is exact, then it does demand the low bits (and knows that @@ -2106,30 +2197,57 @@ bool TargetLowering::SimplifyDemandedBits( } break; } - case ISD::UMIN: { - // Check if one arg is always less than (or equal) to the other arg. - SDValue Op0 = Op.getOperand(0); - SDValue Op1 = Op.getOperand(1); - KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1); - KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1); - Known = KnownBits::umin(Known0, Known1); - if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1)) - return TLO.CombineTo(Op, *IsULE ? Op0 : Op1); - if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1)) - return TLO.CombineTo(Op, *IsULT ? Op0 : Op1); - break; - } + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: case ISD::UMAX: { - // Check if one arg is always greater than (or equal) to the other arg. + unsigned Opc = Op.getOpcode(); SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); + + // If we're only demanding signbits, then we can simplify to OR/AND node. + unsigned BitOp = + (Opc == ISD::SMIN || Opc == ISD::UMAX) ? ISD::OR : ISD::AND; + unsigned NumSignBits = + std::min(TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1), + TLO.DAG.ComputeNumSignBits(Op1, DemandedElts, Depth + 1)); + unsigned NumDemandedUpperBits = BitWidth - DemandedBits.countr_zero(); + if (NumSignBits >= NumDemandedUpperBits) + return TLO.CombineTo(Op, TLO.DAG.getNode(BitOp, SDLoc(Op), VT, Op0, Op1)); + + // Check if one arg is always less/greater than (or equal) to the other arg. KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1); KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1); - Known = KnownBits::umax(Known0, Known1); - if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1)) - return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1); - if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1)) - return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1); + switch (Opc) { + case ISD::SMIN: + if (std::optional<bool> IsSLE = KnownBits::sle(Known0, Known1)) + return TLO.CombineTo(Op, *IsSLE ? Op0 : Op1); + if (std::optional<bool> IsSLT = KnownBits::slt(Known0, Known1)) + return TLO.CombineTo(Op, *IsSLT ? Op0 : Op1); + Known = KnownBits::smin(Known0, Known1); + break; + case ISD::SMAX: + if (std::optional<bool> IsSGE = KnownBits::sge(Known0, Known1)) + return TLO.CombineTo(Op, *IsSGE ? Op0 : Op1); + if (std::optional<bool> IsSGT = KnownBits::sgt(Known0, Known1)) + return TLO.CombineTo(Op, *IsSGT ? Op0 : Op1); + Known = KnownBits::smax(Known0, Known1); + break; + case ISD::UMIN: + if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1)) + return TLO.CombineTo(Op, *IsULE ? Op0 : Op1); + if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1)) + return TLO.CombineTo(Op, *IsULT ? Op0 : Op1); + Known = KnownBits::umin(Known0, Known1); + break; + case ISD::UMAX: + if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1)) + return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1); + if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1)) + return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1); + Known = KnownBits::umax(Known0, Known1); + break; + } break; } case ISD::BITREVERSE: { @@ -2285,11 +2403,17 @@ bool TargetLowering::SimplifyDemandedBits( return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src)); } + SDNodeFlags Flags = Op->getFlags(); APInt InDemandedBits = DemandedBits.trunc(InBits); APInt InDemandedElts = DemandedElts.zext(InElts); if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO, - Depth + 1)) + Depth + 1)) { + if (Flags.hasNonNeg()) { + Flags.setNonNeg(false); + Op->setFlags(Flags); + } return true; + } assert(!Known.hasConflict() && "Bits known to be one AND zero?"); assert(Known.getBitWidth() == InBits && "Src width has changed?"); Known = Known.zext(BitWidth); @@ -2653,7 +2777,7 @@ bool TargetLowering::SimplifyDemandedBits( // neg x with only low bit demanded is simply x. if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() && - isa<ConstantSDNode>(Op0) && cast<ConstantSDNode>(Op0)->isZero()) + isNullConstant(Op0)) return TLO.CombineTo(Op, Op1); // Attempt to avoid multi-use ops if we don't need anything from them. @@ -2913,8 +3037,9 @@ bool TargetLowering::SimplifyDemandedVectorElts( SDValue NewOp1 = SimplifyMultipleUseDemandedVectorElts(Op1, DemandedElts, TLO.DAG, Depth + 1); if (NewOp0 || NewOp1) { - SDValue NewOp = TLO.DAG.getNode( - Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0, NewOp1 ? NewOp1 : Op1); + SDValue NewOp = + TLO.DAG.getNode(Opcode, SDLoc(Op), VT, NewOp0 ? NewOp0 : Op0, + NewOp1 ? NewOp1 : Op1, Op->getFlags()); return TLO.CombineTo(Op, NewOp); } return false; @@ -3823,8 +3948,12 @@ SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, return SDValue(); } + // TODO: We should invert (X & Y) eq/ne 0 -> (X & Y) ne/eq Y if + // `isXAndYEqZeroPreferableToXAndYEqY` is false. This is a bit difficult as + // its liable to create and infinite loop. SDValue Zero = DAG.getConstant(0, DL, OpVT); - if (DAG.isKnownToBeAPowerOfTwo(Y)) { + if (isXAndYEqZeroPreferableToXAndYEqY(Cond, OpVT) && + DAG.isKnownToBeAPowerOfTwo(Y)) { // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set. // Note that where Y is variable and is known to have at most one bit set // (for example, if it is Z & 1) we cannot do this; the expressions are not @@ -3843,8 +3972,7 @@ SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, // Bail out if the compare operand that we want to turn into a zero is // already a zero (otherwise, infinite loop). - auto *YConst = dyn_cast<ConstantSDNode>(Y); - if (YConst && YConst->isZero()) + if (isNullConstant(Y)) return SDValue(); // Transform this into: ~X & Y == 0. @@ -4088,8 +4216,8 @@ static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT, // (ctpop x) u< 2 -> (x & x-1) == 0 // (ctpop x) u> 1 -> (x & x-1) != 0 if (Cond == ISD::SETULT || Cond == ISD::SETUGT) { - // Keep the CTPOP if it is a legal vector op. - if (CTVT.isVector() && TLI.isOperationLegal(ISD::CTPOP, CTVT)) + // Keep the CTPOP if it is a cheap vector op. + if (CTVT.isVector() && TLI.isCtpopFast(CTVT)) return SDValue(); unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond); @@ -4110,28 +4238,32 @@ static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT, return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC); } - // Expand a power-of-2 comparison based on ctpop: - // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0) - // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0) + // Expand a power-of-2 comparison based on ctpop if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) { - // Keep the CTPOP if it is legal. - if (TLI.isOperationLegal(ISD::CTPOP, CTVT)) + // Keep the CTPOP if it is cheap. + if (TLI.isCtpopFast(CTVT)) return SDValue(); SDValue Zero = DAG.getConstant(0, dl, CTVT); SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT); assert(CTVT.isInteger()); - ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, CTVT); SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne); - SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add); - SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond); + // Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so - // check before the emit a potentially unnecessary op. - if (DAG.isKnownNeverZero(CTOp)) + // check before emitting a potentially unnecessary op. + if (DAG.isKnownNeverZero(CTOp)) { + // (ctpop x) == 1 --> (x & x-1) == 0 + // (ctpop x) != 1 --> (x & x-1) != 0 + SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add); + SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond); return RHS; - SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond); - unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR; - return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS); + } + + // (ctpop x) == 1 --> (x ^ x-1) > x-1 + // (ctpop x) != 1 --> (x ^ x-1) <= x-1 + SDValue Xor = DAG.getNode(ISD::XOR, dl, CTVT, CTOp, Add); + ISD::CondCode CmpCond = Cond == ISD::SETEQ ? ISD::SETUGT : ISD::SETULE; + return DAG.getSetCC(dl, VT, Xor, Add, CmpCond); } return SDValue(); @@ -4477,8 +4609,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) { SDValue Ptr = Lod->getBasePtr(); if (bestOffset != 0) - Ptr = - DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(bestOffset), dl); + Ptr = DAG.getMemBasePlusOffset(Ptr, TypeSize::getFixed(bestOffset), + dl); SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr, Lod->getPointerInfo().getWithOffset(bestOffset), @@ -4983,6 +5115,21 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } } + // setueq/setoeq X, (fabs Inf) -> is_fpclass X, fcInf + if (isOperationLegalOrCustom(ISD::IS_FPCLASS, N0.getValueType()) && + !isFPImmLegal(CFP->getValueAPF(), CFP->getValueType(0))) { + bool IsFabs = N0.getOpcode() == ISD::FABS; + SDValue Op = IsFabs ? N0.getOperand(0) : N0; + if ((Cond == ISD::SETOEQ || Cond == ISD::SETUEQ) && CFP->isInfinity()) { + FPClassTest Flag = CFP->isNegative() ? (IsFabs ? fcNone : fcNegInf) + : (IsFabs ? fcInf : fcPosInf); + if (Cond == ISD::SETUEQ) + Flag |= fcNan; + return DAG.getNode(ISD::IS_FPCLASS, dl, VT, Op, + DAG.getTargetConstant(Flag, dl, MVT::i32)); + } + } + // If the condition is not legal, see if we can find an equivalent one // which is legal. if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) { @@ -5037,7 +5184,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (isBitwiseNot(N1)) return DAG.getSetCC(dl, VT, N1.getOperand(0), N0.getOperand(0), Cond); - if (DAG.isConstantIntBuildVectorOrConstantInt(N1)) { + if (DAG.isConstantIntBuildVectorOrConstantInt(N1) && + !DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(0))) { SDValue Not = DAG.getNOT(dl, N1, OpVT); return DAG.getSetCC(dl, VT, Not, N0.getOperand(0), Cond); } @@ -5297,11 +5445,12 @@ SDValue TargetLowering::LowerAsmOutputForConstraint( /// Lower the specified operand into the Ops vector. /// If it is invalid, don't add anything to Ops. void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, - std::string &Constraint, + StringRef Constraint, std::vector<SDValue> &Ops, SelectionDAG &DAG) const { - if (Constraint.length() > 1) return; + if (Constraint.size() > 1) + return; char ConstraintLetter = Constraint[0]; switch (ConstraintLetter) { @@ -5620,20 +5769,27 @@ TargetLowering::ParseConstraints(const DataLayout &DL, return ConstraintOperands; } -/// Return an integer indicating how general CT is. -static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) { +/// Return a number indicating our preference for chosing a type of constraint +/// over another, for the purpose of sorting them. Immediates are almost always +/// preferrable (when they can be emitted). A higher return value means a +/// stronger preference for one constraint type relative to another. +/// FIXME: We should prefer registers over memory but doing so may lead to +/// unrecoverable register exhaustion later. +/// https://github.com/llvm/llvm-project/issues/20571 +static unsigned getConstraintPiority(TargetLowering::ConstraintType CT) { switch (CT) { case TargetLowering::C_Immediate: case TargetLowering::C_Other: - case TargetLowering::C_Unknown: - return 0; - case TargetLowering::C_Register: - return 1; - case TargetLowering::C_RegisterClass: - return 2; + return 4; case TargetLowering::C_Memory: case TargetLowering::C_Address: return 3; + case TargetLowering::C_RegisterClass: + return 2; + case TargetLowering::C_Register: + return 1; + case TargetLowering::C_Unknown: + return 0; } llvm_unreachable("Invalid constraint type"); } @@ -5713,11 +5869,15 @@ TargetLowering::ConstraintWeight /// If there are multiple different constraints that we could pick for this /// operand (e.g. "imr") try to pick the 'best' one. -/// This is somewhat tricky: constraints fall into four classes: -/// Other -> immediates and magic values +/// This is somewhat tricky: constraints (TargetLowering::ConstraintType) fall +/// into seven classes: /// Register -> one specific register /// RegisterClass -> a group of regs /// Memory -> memory +/// Address -> a symbolic memory reference +/// Immediate -> immediate values +/// Other -> magic values (such as "Flag Output Operands") +/// Unknown -> something we don't recognize yet and can't handle /// Ideally, we would pick the most specific constraint possible: if we have /// something that fits into a register, we would pick it. The problem here /// is that if we have something that could either be in a register or in @@ -5731,18 +5891,13 @@ TargetLowering::ConstraintWeight /// 2) Otherwise, pick the most general constraint present. This prefers /// 'm' over 'r', for example. /// -static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, - const TargetLowering &TLI, - SDValue Op, SelectionDAG *DAG) { - assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options"); - unsigned BestIdx = 0; - TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown; - int BestGenerality = -1; +TargetLowering::ConstraintGroup TargetLowering::getConstraintPreferences( + TargetLowering::AsmOperandInfo &OpInfo) const { + ConstraintGroup Ret; - // Loop over the options, keeping track of the most general one. - for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) { - TargetLowering::ConstraintType CType = - TLI.getConstraintType(OpInfo.Codes[i]); + Ret.reserve(OpInfo.Codes.size()); + for (StringRef Code : OpInfo.Codes) { + TargetLowering::ConstraintType CType = getConstraintType(Code); // Indirect 'other' or 'immediate' constraints are not allowed. if (OpInfo.isIndirect && !(CType == TargetLowering::C_Memory || @@ -5750,40 +5905,38 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, CType == TargetLowering::C_RegisterClass)) continue; - // If this is an 'other' or 'immediate' constraint, see if the operand is - // valid for it. For example, on X86 we might have an 'rI' constraint. If - // the operand is an integer in the range [0..31] we want to use I (saving a - // load of a register), otherwise we must use 'r'. - if ((CType == TargetLowering::C_Other || - CType == TargetLowering::C_Immediate) && Op.getNode()) { - assert(OpInfo.Codes[i].size() == 1 && - "Unhandled multi-letter 'other' constraint"); - std::vector<SDValue> ResultOps; - TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i], - ResultOps, *DAG); - if (!ResultOps.empty()) { - BestType = CType; - BestIdx = i; - break; - } - } - // Things with matching constraints can only be registers, per gcc // documentation. This mainly affects "g" constraints. if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput()) continue; - // This constraint letter is more general than the previous one, use it. - int Generality = getConstraintGenerality(CType); - if (Generality > BestGenerality) { - BestType = CType; - BestIdx = i; - BestGenerality = Generality; - } + Ret.emplace_back(Code, CType); } - OpInfo.ConstraintCode = OpInfo.Codes[BestIdx]; - OpInfo.ConstraintType = BestType; + std::stable_sort( + Ret.begin(), Ret.end(), [](ConstraintPair a, ConstraintPair b) { + return getConstraintPiority(a.second) > getConstraintPiority(b.second); + }); + + return Ret; +} + +/// If we have an immediate, see if we can lower it. Return true if we can, +/// false otherwise. +static bool lowerImmediateIfPossible(TargetLowering::ConstraintPair &P, + SDValue Op, SelectionDAG *DAG, + const TargetLowering &TLI) { + + assert((P.second == TargetLowering::C_Other || + P.second == TargetLowering::C_Immediate) && + "need immediate or other"); + + if (!Op.getNode()) + return false; + + std::vector<SDValue> ResultOps; + TLI.LowerAsmOperandForConstraint(Op, P.first, ResultOps, *DAG); + return !ResultOps.empty(); } /// Determines the constraint code and constraint type to use for the specific @@ -5798,7 +5951,26 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, OpInfo.ConstraintCode = OpInfo.Codes[0]; OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode); } else { - ChooseConstraint(OpInfo, *this, Op, DAG); + ConstraintGroup G = getConstraintPreferences(OpInfo); + if (G.empty()) + return; + + unsigned BestIdx = 0; + for (const unsigned E = G.size(); + BestIdx < E && (G[BestIdx].second == TargetLowering::C_Other || + G[BestIdx].second == TargetLowering::C_Immediate); + ++BestIdx) { + if (lowerImmediateIfPossible(G[BestIdx], Op, DAG, *this)) + break; + // If we're out of constraints, just pick the first one. + if (BestIdx + 1 == E) { + BestIdx = 0; + break; + } + } + + OpInfo.ConstraintCode = G[BestIdx].first; + OpInfo.ConstraintType = G[BestIdx].second; } // 'X' matches anything. @@ -5914,6 +6086,49 @@ TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor, return SDValue(); } +/// Build sdiv by power-of-2 with conditional move instructions +/// Ref: "Hacker's Delight" by Henry Warren 10-1 +/// If conditional move/branch is preferred, we lower sdiv x, +/-2**k into: +/// bgez x, label +/// add x, x, 2**k-1 +/// label: +/// sra res, x, k +/// neg res, res (when the divisor is negative) +SDValue TargetLowering::buildSDIVPow2WithCMov( + SDNode *N, const APInt &Divisor, SelectionDAG &DAG, + SmallVectorImpl<SDNode *> &Created) const { + unsigned Lg2 = Divisor.countr_zero(); + EVT VT = N->getValueType(0); + + SDLoc DL(N); + SDValue N0 = N->getOperand(0); + SDValue Zero = DAG.getConstant(0, DL, VT); + APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2); + SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT); + + // If N0 is negative, we need to add (Pow2 - 1) to it before shifting right. + EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + SDValue Cmp = DAG.getSetCC(DL, CCVT, N0, Zero, ISD::SETLT); + SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne); + SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0); + + Created.push_back(Cmp.getNode()); + Created.push_back(Add.getNode()); + Created.push_back(CMov.getNode()); + + // Divide by pow2. + SDValue SRA = + DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, VT)); + + // If we're dividing by a positive value, we're done. Otherwise, we must + // negate the result. + if (Divisor.isNonNegative()) + return SRA; + + Created.push_back(SRA.getNode()); + return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA); +} + /// Given an ISD::SDIV node expressing a divide by constant, /// return a DAG expression to select that will generate the same value by /// multiplying by a magic number. @@ -6016,7 +6231,7 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, // Multiply the numerator (operand 0) by the magic value. // FIXME: We should support doing a MUL in a wider type. auto GetMULHS = [&](SDValue X, SDValue Y) { - // If the type isn't legal, use a wider mul of the the type calculated + // If the type isn't legal, use a wider mul of the type calculated // earlier. if (!isTypeLegal(VT)) { X = DAG.getNode(ISD::SIGN_EXTEND, dl, MulVT, X); @@ -6203,7 +6418,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, // FIXME: We should support doing a MUL in a wider type. auto GetMULHU = [&](SDValue X, SDValue Y) { - // If the type isn't legal, use a wider mul of the the type calculated + // If the type isn't legal, use a wider mul of the type calculated // earlier. if (!isTypeLegal(VT)) { X = DAG.getNode(ISD::ZERO_EXTEND, dl, MulVT, X); @@ -9131,7 +9346,7 @@ TargetLowering::scalarizeVectorLoad(LoadSDNode *LD, SrcEltVT, LD->getOriginalAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo()); - BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::Fixed(Stride)); + BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::getFixed(Stride)); Vals.push_back(ScalarLoad.getValue(0)); LoadChains.push_back(ScalarLoad.getValue(1)); @@ -9206,7 +9421,7 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST, DAG.getVectorIdxConstant(Idx, SL)); SDValue Ptr = - DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::Fixed(Idx * Stride)); + DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::getFixed(Idx * Stride)); // This scalar TruncStore may be illegal, but we legalize it later. SDValue Store = DAG.getTruncStore( @@ -9342,7 +9557,7 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(), LD->getAAInfo()); - Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize)); + Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize)); Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(), @@ -9352,7 +9567,7 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(), LD->getAAInfo()); - Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize)); + Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize)); Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(), @@ -9477,6 +9692,14 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, SDValue ShiftAmount = DAG.getConstant( NumBits, dl, getShiftAmountTy(Val.getValueType(), DAG.getDataLayout())); SDValue Lo = Val; + // If Val is a constant, replace the upper bits with 0. The SRL will constant + // fold and not use the upper bits. A smaller constant may be easier to + // materialize. + if (auto *C = dyn_cast<ConstantSDNode>(Lo); C && !C->isOpaque()) + Lo = DAG.getNode( + ISD::AND, dl, VT, Lo, + DAG.getConstant(APInt::getLowBitsSet(VT.getSizeInBits(), NumBits), dl, + VT)); SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount); // Store the two parts @@ -9486,7 +9709,7 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, Ptr, ST->getPointerInfo(), NewStoredVT, Alignment, ST->getMemOperand()->getFlags()); - Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize)); + Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::getFixed(IncrementSize)); Store2 = DAG.getTruncStore( Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment, @@ -9618,7 +9841,7 @@ SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, // Access to address of TLS varialbe xyz is lowered to a function call: // __emutls_get_address( address of global variable named "__emutls_v.xyz" ) EVT PtrVT = getPointerTy(DAG.getDataLayout()); - PointerType *VoidPtrType = Type::getInt8PtrTy(*DAG.getContext()); + PointerType *VoidPtrType = PointerType::get(*DAG.getContext(), 0); SDLoc dl(GA); ArgListTy Args; @@ -9657,20 +9880,18 @@ SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op, return SDValue(); ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); SDLoc dl(Op); - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { - if (C->isZero() && CC == ISD::SETEQ) { - EVT VT = Op.getOperand(0).getValueType(); - SDValue Zext = Op.getOperand(0); - if (VT.bitsLT(MVT::i32)) { - VT = MVT::i32; - Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0)); - } - unsigned Log2b = Log2_32(VT.getSizeInBits()); - SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext); - SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz, - DAG.getConstant(Log2b, dl, MVT::i32)); - return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc); + if (isNullConstant(Op.getOperand(1)) && CC == ISD::SETEQ) { + EVT VT = Op.getOperand(0).getValueType(); + SDValue Zext = Op.getOperand(0); + if (VT.bitsLT(MVT::i32)) { + VT = MVT::i32; + Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0)); } + unsigned Log2b = Log2_32(VT.getSizeInBits()); + SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext); + SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz, + DAG.getConstant(Log2b, dl, MVT::i32)); + return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc); } return SDValue(); } @@ -10489,9 +10710,9 @@ SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node, MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth); } - // We cannot risk emitting FP_TO_XINT nodes with a source VT of f16, as + // We cannot risk emitting FP_TO_XINT nodes with a source VT of [b]f16, as // libcall emission cannot handle this. Large result types will fail. - if (SrcVT == MVT::f16) { + if (SrcVT == MVT::f16 || SrcVT == MVT::bf16) { Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src); SrcVT = Src.getValueType(); } |
