diff options
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG')
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 327 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 12 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp | 18 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 140 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 5 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp | 24 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 131 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 128 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 95 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp | 42 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 114 |
14 files changed, 739 insertions, 303 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 2654c00929d8..edb0756e8c3b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1868,8 +1868,7 @@ SDValue DAGCombiner::combine(SDNode *N) { // If N is a commutative binary node, try to eliminate it if the commuted // version is already present in the DAG. - if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) && - N->getNumValues() == 1) { + if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode())) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -4159,6 +4158,10 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { if (SDValue RMUL = reassociateOps(ISD::MUL, DL, N0, N1, N->getFlags())) return RMUL; + // Simplify the operands using demanded-bits information. + if (SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + return SDValue(); } @@ -5978,44 +5981,64 @@ static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) { if (!TLI.isTypeLegal(VT)) return SDValue(); - // Look through an optional extension and find a 'not'. - // TODO: Should we favor test+set even without the 'not' op? - SDValue Not = And->getOperand(0), And1 = And->getOperand(1); - if (Not.getOpcode() == ISD::ANY_EXTEND) - Not = Not.getOperand(0); - if (!isBitwiseNot(Not) || !Not.hasOneUse() || !isOneConstant(And1)) + // Look through an optional extension. + SDValue And0 = And->getOperand(0), And1 = And->getOperand(1); + if (And0.getOpcode() == ISD::ANY_EXTEND && And0.hasOneUse()) + And0 = And0.getOperand(0); + if (!isOneConstant(And1) || !And0.hasOneUse()) return SDValue(); - // Look though an optional truncation. The source operand may not be the same - // type as the original 'and', but that is ok because we are masking off - // everything but the low bit. - SDValue Srl = Not.getOperand(0); - if (Srl.getOpcode() == ISD::TRUNCATE) - Srl = Srl.getOperand(0); + SDValue Src = And0; + + // Attempt to find a 'not' op. + // TODO: Should we favor test+set even without the 'not' op? + bool FoundNot = false; + if (isBitwiseNot(Src)) { + FoundNot = true; + Src = Src.getOperand(0); + + // Look though an optional truncation. The source operand may not be the + // same type as the original 'and', but that is ok because we are masking + // off everything but the low bit. + if (Src.getOpcode() == ISD::TRUNCATE && Src.hasOneUse()) + Src = Src.getOperand(0); + } // Match a shift-right by constant. - if (Srl.getOpcode() != ISD::SRL || !Srl.hasOneUse() || - !isa<ConstantSDNode>(Srl.getOperand(1))) + if (Src.getOpcode() != ISD::SRL || !Src.hasOneUse()) return SDValue(); // We might have looked through casts that make this transform invalid. // TODO: If the source type is wider than the result type, do the mask and // compare in the source type. - const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1); - unsigned VTBitWidth = VT.getSizeInBits(); - if (ShiftAmt.uge(VTBitWidth)) + unsigned VTBitWidth = VT.getScalarSizeInBits(); + SDValue ShiftAmt = Src.getOperand(1); + auto *ShiftAmtC = dyn_cast<ConstantSDNode>(ShiftAmt); + if (!ShiftAmtC || !ShiftAmtC->getAPIntValue().ult(VTBitWidth)) return SDValue(); - if (!TLI.hasBitTest(Srl.getOperand(0), Srl.getOperand(1))) + // Set source to shift source. + Src = Src.getOperand(0); + + // Try again to find a 'not' op. + // TODO: Should we favor test+set even with two 'not' ops? + if (!FoundNot) { + if (!isBitwiseNot(Src)) + return SDValue(); + Src = Src.getOperand(0); + } + + if (!TLI.hasBitTest(Src, ShiftAmt)) return SDValue(); // Turn this into a bit-test pattern using mask op + setcc: // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0 + // and (srl (not X), C)), 1 --> (and X, 1<<C) == 0 SDLoc DL(And); - SDValue X = DAG.getZExtOrTrunc(Srl.getOperand(0), DL, VT); + SDValue X = DAG.getZExtOrTrunc(Src, DL, VT); EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); SDValue Mask = DAG.getConstant( - APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT); + APInt::getOneBitSet(VTBitWidth, ShiftAmtC->getZExtValue()), DL, VT); SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask); SDValue Zero = DAG.getConstant(0, DL, VT); SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ); @@ -6229,7 +6252,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // This can be a pure constant or a vector splat, in which case we treat the // vector as a scalar and use the splat value. APInt Constant = APInt::getZero(1); - if (const ConstantSDNode *C = isConstOrConstSplat(N1)) { + if (const ConstantSDNode *C = isConstOrConstSplat( + N1, /*AllowUndef=*/false, /*AllowTruncation=*/true)) { Constant = C->getAPIntValue(); } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) { APInt SplatValue, SplatUndef; @@ -6339,18 +6363,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // fold (and (load x), 255) -> (zextload x, i8) // fold (and (extload x, i16), 255) -> (zextload x, i8) - // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8) - if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD || - (N0.getOpcode() == ISD::ANY_EXTEND && - N0.getOperand(0).getOpcode() == ISD::LOAD))) { - if (SDValue Res = reduceLoadWidth(N)) { - LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND - ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0); - AddToWorklist(N); - DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res); - return SDValue(N, 0); - } - } + if (N1C && N0.getOpcode() == ISD::LOAD && !VT.isVector()) + if (SDValue Res = reduceLoadWidth(N)) + return Res; if (LegalTypes) { // Attempt to propagate the AND back up to the leaves which, if they're @@ -6856,20 +6871,23 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) { } /// OR combines for which the commuted variant will be tried as well. -static SDValue visitORCommutative( - SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) { +static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1, + SDNode *N) { EVT VT = N0.getValueType(); if (N0.getOpcode() == ISD::AND) { + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y) // TODO: Set AllowUndefs = true. - if (getBitwiseNotOperand(N0.getOperand(1), N0.getOperand(0), + if (getBitwiseNotOperand(N01, N00, /* AllowUndefs */ false) == N1) - return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1); + return DAG.getNode(ISD::OR, SDLoc(N), VT, N00, N1); // fold (or (and (xor Y, -1), X), Y) -> (or X, Y) - if (getBitwiseNotOperand(N0.getOperand(0), N0.getOperand(1), + if (getBitwiseNotOperand(N00, N01, /* AllowUndefs */ false) == N1) - return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1); + return DAG.getNode(ISD::OR, SDLoc(N), VT, N01, N1); } if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG)) @@ -7915,7 +7933,7 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) { int64_t FirstOffset = INT64_MAX; StoreSDNode *FirstStore = nullptr; Optional<BaseIndexOffset> Base; - for (auto Store : Stores) { + for (auto *Store : Stores) { // All the stores store different parts of the CombinedValue. A truncate is // required to get the partial value. SDValue Trunc = Store->getValue(); @@ -8488,28 +8506,6 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { return DAG.getNode(ISD::AND, DL, VT, NotX, N1); } - if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) { - ConstantSDNode *XorC = isConstOrConstSplat(N1); - ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1)); - unsigned BitWidth = VT.getScalarSizeInBits(); - if (XorC && ShiftC) { - // Don't crash on an oversized shift. We can not guarantee that a bogus - // shift has been simplified to undef. - uint64_t ShiftAmt = ShiftC->getLimitedValue(); - if (ShiftAmt < BitWidth) { - APInt Ones = APInt::getAllOnes(BitWidth); - Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt); - if (XorC->getAPIntValue() == Ones) { - // If the xor constant is a shifted -1, do a 'not' before the shift: - // xor (X << ShiftC), XorC --> (not X) << ShiftC - // xor (X >> ShiftC), XorC --> (not X) >> ShiftC - SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT); - return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1)); - } - } - } - } - // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X) if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) { SDValue A = N0Opcode == ISD::ADD ? N0 : N1; @@ -11817,6 +11813,9 @@ SDValue DAGCombiner::foldSextSetcc(SDNode *N) { EVT N00VT = N00.getValueType(); SDLoc DL(N); + // Propagate fast-math-flags. + SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags()); + // On some architectures (such as SSE/NEON/etc) the SETCC result type is // the same size as the compared operands. Try to optimize sext(setcc()) // if this is the case. @@ -12358,6 +12357,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { return V; if (N0.getOpcode() == ISD::SETCC) { + // Propagate fast-math-flags. + SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags()); + // Only do this before legalize for now. if (!LegalOperations && VT.isVector() && N0.getValueType().getVectorElementType() == MVT::i1) { @@ -12549,6 +12551,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { } if (N0.getOpcode() == ISD::SETCC) { + // Propagate fast-math-flags. + SelectionDAG::FlagInserter FlagsInserter(DAG, N0->getFlags()); + // For vectors: // aext(setcc) -> vsetcc // aext(setcc) -> truncate(vsetcc) @@ -13155,6 +13160,19 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { return N0.getOperand(0); } + // Try to narrow a truncate-of-sext_in_reg to the destination type: + // trunc (sign_ext_inreg X, iM) to iN --> sign_ext_inreg (trunc X to iN), iM + if (!LegalTypes && N0.getOpcode() == ISD::SIGN_EXTEND_INREG && + N0.hasOneUse()) { + SDValue X = N0.getOperand(0); + SDValue ExtVal = N0.getOperand(1); + EVT ExtVT = cast<VTSDNode>(ExtVal)->getVT(); + if (ExtVT.bitsLT(VT)) { + SDValue TrX = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, TrX, ExtVal); + } + } + // If this is anyext(trunc), don't fold it, allow ourselves to be folded. if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND)) return SDValue(); @@ -19478,7 +19496,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { return Shuf; // Handle <1 x ???> vector insertion special cases. - if (VT.getVectorNumElements() == 1) { + if (NumElts == 1) { // insert_vector_elt(x, extract_vector_elt(y, 0), 0) -> y if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT && InVal.getOperand(0).getValueType() == VT && @@ -19506,80 +19524,77 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { } } - // Attempt to fold the insertion into a legal BUILD_VECTOR. + // Attempt to convert an insert_vector_elt chain into a legal build_vector. if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) { - auto UpdateBuildVector = [&](SmallVectorImpl<SDValue> &Ops) { - assert(Ops.size() == NumElts && "Unexpected vector size"); - - // Insert the element - if (Elt < Ops.size()) { - // All the operands of BUILD_VECTOR must have the same type; - // we enforce that here. - EVT OpVT = Ops[0].getValueType(); - Ops[Elt] = - OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal; + // vXi1 vector - we don't need to recurse. + if (NumElts == 1) + return DAG.getBuildVector(VT, DL, {InVal}); + + // If we haven't already collected the element, insert into the op list. + EVT MaxEltVT = InVal.getValueType(); + auto AddBuildVectorOp = [&](SmallVectorImpl<SDValue> &Ops, SDValue Elt, + unsigned Idx) { + if (!Ops[Idx]) { + Ops[Idx] = Elt; + if (VT.isInteger()) { + EVT EltVT = Elt.getValueType(); + MaxEltVT = MaxEltVT.bitsGE(EltVT) ? MaxEltVT : EltVT; + } } + }; - // Return the new vector + // Ensure all the operands are the same value type, fill any missing + // operands with UNDEF and create the BUILD_VECTOR. + auto CanonicalizeBuildVector = [&](SmallVectorImpl<SDValue> &Ops) { + assert(Ops.size() == NumElts && "Unexpected vector size"); + for (SDValue &Op : Ops) { + if (Op) + Op = VT.isInteger() ? DAG.getAnyExtOrTrunc(Op, DL, MaxEltVT) : Op; + else + Op = DAG.getUNDEF(MaxEltVT); + } return DAG.getBuildVector(VT, DL, Ops); }; - // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially - // be converted to a BUILD_VECTOR). Fill in the Ops vector with the - // vector elements. - SmallVector<SDValue, 8> Ops; + SmallVector<SDValue, 8> Ops(NumElts, SDValue()); + Ops[Elt] = InVal; - // Do not combine these two vectors if the output vector will not replace - // the input vector. - if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) { - Ops.append(InVec->op_begin(), InVec->op_end()); - return UpdateBuildVector(Ops); - } + // Recurse up a INSERT_VECTOR_ELT chain to build a BUILD_VECTOR. + for (SDValue CurVec = InVec; CurVec;) { + // UNDEF - build new BUILD_VECTOR from already inserted operands. + if (CurVec.isUndef()) + return CanonicalizeBuildVector(Ops); - if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && InVec.hasOneUse()) { - Ops.push_back(InVec.getOperand(0)); - Ops.append(NumElts - 1, DAG.getUNDEF(InVec.getOperand(0).getValueType())); - return UpdateBuildVector(Ops); - } + // BUILD_VECTOR - insert unused operands and build new BUILD_VECTOR. + if (CurVec.getOpcode() == ISD::BUILD_VECTOR && CurVec.hasOneUse()) { + for (unsigned I = 0; I != NumElts; ++I) + AddBuildVectorOp(Ops, CurVec.getOperand(I), I); + return CanonicalizeBuildVector(Ops); + } - if (InVec.isUndef()) { - Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType())); - return UpdateBuildVector(Ops); - } + // SCALAR_TO_VECTOR - insert unused scalar and build new BUILD_VECTOR. + if (CurVec.getOpcode() == ISD::SCALAR_TO_VECTOR && CurVec.hasOneUse()) { + AddBuildVectorOp(Ops, CurVec.getOperand(0), 0); + return CanonicalizeBuildVector(Ops); + } - // If we're inserting into the end of a vector as part of an sequence, see - // if we can create a BUILD_VECTOR by following the sequence back up the - // chain. - if (Elt == (NumElts - 1)) { - SmallVector<SDValue> ReverseInsertions; - ReverseInsertions.push_back(InVal); - - EVT MaxEltVT = InVal.getValueType(); - SDValue CurVec = InVec; - for (unsigned I = 1; I != NumElts; ++I) { - if (CurVec.getOpcode() != ISD::INSERT_VECTOR_ELT || !CurVec.hasOneUse()) - break; + // INSERT_VECTOR_ELT - insert operand and continue up the chain. + if (CurVec.getOpcode() == ISD::INSERT_VECTOR_ELT && CurVec.hasOneUse()) + if (auto *CurIdx = dyn_cast<ConstantSDNode>(CurVec.getOperand(2))) + if (CurIdx->getAPIntValue().ult(NumElts)) { + unsigned Idx = CurIdx->getZExtValue(); + AddBuildVectorOp(Ops, CurVec.getOperand(1), Idx); - auto *CurIdx = dyn_cast<ConstantSDNode>(CurVec.getOperand(2)); - if (!CurIdx || CurIdx->getAPIntValue() != ((NumElts - 1) - I)) - break; - SDValue CurVal = CurVec.getOperand(1); - ReverseInsertions.push_back(CurVal); - if (VT.isInteger()) { - EVT CurValVT = CurVal.getValueType(); - MaxEltVT = MaxEltVT.bitsGE(CurValVT) ? MaxEltVT : CurValVT; - } - CurVec = CurVec.getOperand(0); - } + // Found entire BUILD_VECTOR. + if (all_of(Ops, [](SDValue Op) { return !!Op; })) + return CanonicalizeBuildVector(Ops); - if (ReverseInsertions.size() == NumElts) { - for (unsigned I = 0; I != NumElts; ++I) { - SDValue Val = ReverseInsertions[(NumElts - 1) - I]; - Val = VT.isInteger() ? DAG.getAnyExtOrTrunc(Val, DL, MaxEltVT) : Val; - Ops.push_back(Val); - } - return DAG.getBuildVector(VT, DL, Ops); - } + CurVec = CurVec->getOperand(0); + continue; + } + + // Failed to find a match in the chain - bail. + break; } } @@ -22643,6 +22658,56 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { } } + // If we're not performing a select/blend shuffle, see if we can convert the + // shuffle into a AND node, with all the out-of-lane elements are known zero. + if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) { + bool IsInLaneMask = true; + ArrayRef<int> Mask = SVN->getMask(); + SmallVector<int, 16> ClearMask(NumElts, -1); + APInt DemandedLHS = APInt::getNullValue(NumElts); + APInt DemandedRHS = APInt::getNullValue(NumElts); + for (int I = 0; I != (int)NumElts; ++I) { + int M = Mask[I]; + if (M < 0) + continue; + ClearMask[I] = M == I ? I : (I + NumElts); + IsInLaneMask &= (M == I) || (M == (int)(I + NumElts)); + if (M != I) { + APInt &Demanded = M < (int)NumElts ? DemandedLHS : DemandedRHS; + Demanded.setBit(M % NumElts); + } + } + // TODO: Should we try to mask with N1 as well? + if (!IsInLaneMask && + (!DemandedLHS.isNullValue() || !DemandedRHS.isNullValue()) && + (DemandedLHS.isNullValue() || + DAG.MaskedVectorIsZero(N0, DemandedLHS)) && + (DemandedRHS.isNullValue() || + DAG.MaskedVectorIsZero(N1, DemandedRHS))) { + SDLoc DL(N); + EVT IntVT = VT.changeVectorElementTypeToInteger(); + EVT IntSVT = VT.getVectorElementType().changeTypeToInteger(); + SDValue ZeroElt = DAG.getConstant(0, DL, IntSVT); + SDValue AllOnesElt = DAG.getAllOnesConstant(DL, IntSVT); + SmallVector<SDValue, 16> AndMask(NumElts, DAG.getUNDEF(IntSVT)); + for (int I = 0; I != (int)NumElts; ++I) + if (0 <= Mask[I]) + AndMask[I] = Mask[I] == I ? AllOnesElt : ZeroElt; + + // See if a clear mask is legal instead of going via + // XformToShuffleWithZero which loses UNDEF mask elements. + if (TLI.isVectorClearMaskLegal(ClearMask, IntVT)) + return DAG.getBitcast( + VT, DAG.getVectorShuffle(IntVT, DL, DAG.getBitcast(IntVT, N0), + DAG.getConstant(0, DL, IntVT), ClearMask)); + + if (TLI.isOperationLegalOrCustom(ISD::AND, IntVT)) + return DAG.getBitcast( + VT, DAG.getNode(ISD::AND, DL, IntVT, DAG.getBitcast(IntVT, N0), + DAG.getBuildVector(IntVT, DL, AndMask))); + } + } + // Attempt to combine a shuffle of 2 inputs of 'scalar sources' - // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR. if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) @@ -23385,10 +23450,14 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG, int Index0, Index1; SDValue Src0 = DAG.getSplatSourceVector(N0, Index0); SDValue Src1 = DAG.getSplatSourceVector(N1, Index1); + // Extract element from splat_vector should be free. + // TODO: use DAG.isSplatValue instead? + bool IsBothSplatVector = N0.getOpcode() == ISD::SPLAT_VECTOR && + N1.getOpcode() == ISD::SPLAT_VECTOR; if (!Src0 || !Src1 || Index0 != Index1 || Src0.getValueType().getVectorElementType() != EltVT || Src1.getValueType().getVectorElementType() != EltVT || - !TLI.isExtractVecEltCheap(VT, Index0) || + !(IsBothSplatVector || TLI.isExtractVecEltCheap(VT, Index0)) || !TLI.isOperationLegalOrCustom(Opcode, EltVT)) return SDValue(); @@ -23410,6 +23479,8 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG, } // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index + if (VT.isScalableVector()) + return DAG.getSplatVector(VT, DL, ScalarBO); SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO); return DAG.getBuildVector(VT, DL, Ops); } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 8bdc9410d131..56d35dfe8701 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1404,17 +1404,21 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { } SDValue NewLoad; + Align ElementAlignment = + std::min(cast<StoreSDNode>(Ch)->getAlign(), + DAG.getDataLayout().getPrefTypeAlign( + Op.getValueType().getTypeForEVT(*DAG.getContext()))); if (Op.getValueType().isVector()) { StackPtr = TLI.getVectorSubVecPointer(DAG, StackPtr, VecVT, Op.getValueType(), Idx); - NewLoad = - DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, MachinePointerInfo()); + NewLoad = DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, + MachinePointerInfo(), ElementAlignment); } else { StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); NewLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr, - MachinePointerInfo(), - VecVT.getVectorElementType()); + MachinePointerInfo(), VecVT.getVectorElementType(), + ElementAlignment); } // Replace the chain going out of the store, by the one out of the load. diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 6c136bdfc652..b2df67f45c72 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -2918,6 +2918,9 @@ bool DAGTypeLegalizer::SoftPromoteHalfOperand(SDNode *N, unsigned OpNo) { case ISD::STACKMAP: Res = SoftPromoteHalfOp_STACKMAP(N, OpNo); break; + case ISD::PATCHPOINT: + Res = SoftPromoteHalfOp_PATCHPOINT(N, OpNo); + break; } if (!Res.getNode()) @@ -3059,3 +3062,18 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_STACKMAP(SDNode *N, unsigned OpNo) { return SDValue(); // Signal that we replaced the node ourselves. } + +SDValue DAGTypeLegalizer::SoftPromoteHalfOp_PATCHPOINT(SDNode *N, + unsigned OpNo) { + assert(OpNo >= 7); + SmallVector<SDValue> NewOps(N->ops().begin(), N->ops().end()); + SDValue Op = N->getOperand(OpNo); + NewOps[OpNo] = GetSoftPromotedHalf(Op); + SDValue NewNode = + DAG.getNode(N->getOpcode(), SDLoc(N), N->getVTList(), NewOps); + + for (unsigned ResNum = 0; ResNum < N->getNumValues(); ResNum++) + ReplaceValueWith(SDValue(N, ResNum), NewNode.getValue(ResNum)); + + return SDValue(); // Signal that we replaced the node ourselves. +} diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 343722a97c3c..228d4a43ccde 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1727,6 +1727,13 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::STACKMAP: Res = PromoteIntOp_STACKMAP(N, OpNo); break; + case ISD::PATCHPOINT: + Res = PromoteIntOp_PATCHPOINT(N, OpNo); + break; + case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: + case ISD::EXPERIMENTAL_VP_STRIDED_STORE: + Res = PromoteIntOp_VP_STRIDED(N, OpNo); + break; } // If the result is null, the sub-method took care of registering results etc. @@ -2341,6 +2348,25 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STACKMAP(SDNode *N, unsigned OpNo) { return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); } +SDValue DAGTypeLegalizer::PromoteIntOp_PATCHPOINT(SDNode *N, unsigned OpNo) { + assert(OpNo >= 7); + SmallVector<SDValue> NewOps(N->ops().begin(), N->ops().end()); + SDValue Operand = N->getOperand(OpNo); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Operand.getValueType()); + NewOps[OpNo] = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), NVT, Operand); + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_VP_STRIDED(SDNode *N, unsigned OpNo) { + assert((N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD && OpNo == 3) || + (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE && OpNo == 4)); + + SmallVector<SDValue, 8> NewOps(N->op_begin(), N->op_end()); + NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo)); + + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); +} + //===----------------------------------------------------------------------===// // Integer Result Expansion //===----------------------------------------------------------------------===// @@ -2886,11 +2912,15 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, if (N->getOpcode() == ISD::ADD) { Lo = DAG.getNode(ISD::UADDO, dl, VTList, LoOps); HiOps[2] = Lo.getValue(1); - Hi = DAG.getNode(ISD::ADDCARRY, dl, VTList, HiOps); + Hi = DAG.computeKnownBits(HiOps[2]).isZero() + ? DAG.getNode(ISD::UADDO, dl, VTList, makeArrayRef(HiOps, 2)) + : DAG.getNode(ISD::ADDCARRY, dl, VTList, HiOps); } else { Lo = DAG.getNode(ISD::USUBO, dl, VTList, LoOps); HiOps[2] = Lo.getValue(1); - Hi = DAG.getNode(ISD::SUBCARRY, dl, VTList, HiOps); + Hi = DAG.computeKnownBits(HiOps[2]).isZero() + ? DAG.getNode(ISD::USUBO, dl, VTList, makeArrayRef(HiOps, 2)) + : DAG.getNode(ISD::SUBCARRY, dl, VTList, HiOps); } return; } @@ -4693,6 +4723,13 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::STACKMAP: Res = ExpandIntOp_STACKMAP(N, OpNo); break; + case ISD::PATCHPOINT: + Res = ExpandIntOp_PATCHPOINT(N, OpNo); + break; + case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: + case ISD::EXPERIMENTAL_VP_STRIDED_STORE: + Res = ExpandIntOp_VP_STRIDED(N, OpNo); + break; } // If the result is null, the sub-method took care of registering results etc. @@ -5108,6 +5145,17 @@ SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) { return Swap.getValue(1); } +SDValue DAGTypeLegalizer::ExpandIntOp_VP_STRIDED(SDNode *N, unsigned OpNo) { + assert((N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_LOAD && OpNo == 3) || + (N->getOpcode() == ISD::EXPERIMENTAL_VP_STRIDED_STORE && OpNo == 4)); + + SDValue Hi; // The upper half is dropped out. + SmallVector<SDValue, 8> NewOps(N->op_begin(), N->op_end()); + GetExpandedInteger(NewOps[OpNo], NewOps[OpNo], Hi); + + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); +} + SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SPLICE(SDNode *N) { SDLoc dl(N); @@ -5253,21 +5301,28 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) { assert(NOutVT.isVector() && "This type must be promoted to a vector type"); unsigned NumElems = N->getNumOperands(); EVT NOutVTElem = NOutVT.getVectorElementType(); - + TargetLoweringBase::BooleanContent NOutBoolType = TLI.getBooleanContents(NOutVT); + unsigned NOutExtOpc = TargetLowering::getExtendForContent(NOutBoolType); SDLoc dl(N); SmallVector<SDValue, 8> Ops; Ops.reserve(NumElems); for (unsigned i = 0; i != NumElems; ++i) { - SDValue Op; + SDValue Op = N->getOperand(i); + EVT OpVT = Op.getValueType(); // BUILD_VECTOR integer operand types are allowed to be larger than the // result's element type. This may still be true after the promotion. For // example, we might be promoting (<v?i1> = BV <i32>, <i32>, ...) to // (v?i16 = BV <i32>, <i32>, ...), and we can't any_extend <i32> to <i16>. - if (N->getOperand(i).getValueType().bitsLT(NOutVTElem)) - Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(i)); - else - Op = N->getOperand(i); + if (OpVT.bitsLT(NOutVTElem)) { + unsigned ExtOpc = ISD::ANY_EXTEND; + // Attempt to extend constant bool vectors to match target's BooleanContent. + // While not necessary, this improves chances of the constant correctly + // folding with compare results (e.g. for NOT patterns). + if (OpVT == MVT::i1 && Op.getOpcode() == ISD::Constant) + ExtOpc = NOutExtOpc; + Op = DAG.getNode(ExtOpc, dl, NOutVTElem, Op); + } Ops.push_back(Op); } @@ -5524,30 +5579,67 @@ SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) { SDValue DAGTypeLegalizer::ExpandIntOp_STACKMAP(SDNode *N, unsigned OpNo) { assert(OpNo > 1); - SDValue Op = N->getOperand(OpNo); - SDLoc DL = SDLoc(N); + + // FIXME: Non-constant operands are not yet handled: + // - https://github.com/llvm/llvm-project/issues/26431 + // - https://github.com/llvm/llvm-project/issues/55957 + ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op); + if (!CN) + return SDValue(); + + // Copy operands before the one being expanded. SmallVector<SDValue> NewOps; + for (unsigned I = 0; I < OpNo; I++) + NewOps.push_back(N->getOperand(I)); + + EVT Ty = Op.getValueType(); + SDLoc DL = SDLoc(N); + if (CN->getConstantIntValue()->getValue().getActiveBits() < 64) { + NewOps.push_back( + DAG.getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64)); + NewOps.push_back(DAG.getTargetConstant(CN->getZExtValue(), DL, Ty)); + } else { + // FIXME: https://github.com/llvm/llvm-project/issues/55609 + return SDValue(); + } + + // Copy remaining operands. + for (unsigned I = OpNo + 1; I < N->getNumOperands(); I++) + NewOps.push_back(N->getOperand(I)); + + SDValue NewNode = DAG.getNode(N->getOpcode(), DL, N->getVTList(), NewOps); + + for (unsigned ResNum = 0; ResNum < N->getNumValues(); ResNum++) + ReplaceValueWith(SDValue(N, ResNum), NewNode.getValue(ResNum)); + + return SDValue(); // Signal that we have replaced the node already. +} + +SDValue DAGTypeLegalizer::ExpandIntOp_PATCHPOINT(SDNode *N, unsigned OpNo) { + assert(OpNo >= 7); + SDValue Op = N->getOperand(OpNo); + + // FIXME: Non-constant operands are not yet handled: + // - https://github.com/llvm/llvm-project/issues/26431 + // - https://github.com/llvm/llvm-project/issues/55957 + ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op); + if (!CN) + return SDValue(); // Copy operands before the one being expanded. + SmallVector<SDValue> NewOps; for (unsigned I = 0; I < OpNo; I++) NewOps.push_back(N->getOperand(I)); - if (Op->getOpcode() == ISD::Constant) { - ConstantSDNode *CN = cast<ConstantSDNode>(Op); - EVT Ty = Op.getValueType(); - if (CN->getConstantIntValue()->getValue().getActiveBits() < 64) { - NewOps.push_back( - DAG.getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64)); - NewOps.push_back(DAG.getTargetConstant(CN->getZExtValue(), DL, Ty)); - } else { - // FIXME: https://github.com/llvm/llvm-project/issues/55609 - return SDValue(); - } + EVT Ty = Op.getValueType(); + SDLoc DL = SDLoc(N); + if (CN->getConstantIntValue()->getValue().getActiveBits() < 64) { + NewOps.push_back( + DAG.getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64)); + NewOps.push_back(DAG.getTargetConstant(CN->getZExtValue(), DL, Ty)); } else { - // FIXME: Non-constant operands are not yet handled: - // - https://github.com/llvm/llvm-project/issues/26431 - // - https://github.com/llvm/llvm-project/issues/55957 + // FIXME: https://github.com/llvm/llvm-project/issues/55609 return SDValue(); } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 2807b7f5ae68..6696b79cf885 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -403,6 +403,8 @@ private: SDValue PromoteIntOp_VP_REDUCE(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_SET_ROUNDING(SDNode *N); SDValue PromoteIntOp_STACKMAP(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_PATCHPOINT(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_VP_STRIDED(SDNode *N, unsigned OpNo); void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code); @@ -495,6 +497,8 @@ private: SDValue ExpandIntOp_ATOMIC_STORE(SDNode *N); SDValue ExpandIntOp_SPLAT_VECTOR(SDNode *N); SDValue ExpandIntOp_STACKMAP(SDNode *N, unsigned OpNo); + SDValue ExpandIntOp_PATCHPOINT(SDNode *N, unsigned OpNo); + SDValue ExpandIntOp_VP_STRIDED(SDNode *N, unsigned OpNo); void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &dl); @@ -744,6 +748,7 @@ private: SDValue SoftPromoteHalfOp_SELECT_CC(SDNode *N, unsigned OpNo); SDValue SoftPromoteHalfOp_STORE(SDNode *N, unsigned OpNo); SDValue SoftPromoteHalfOp_STACKMAP(SDNode *N, unsigned OpNo); + SDValue SoftPromoteHalfOp_PATCHPOINT(SDNode *N, unsigned OpNo); //===--------------------------------------------------------------------===// // Scalarization Support: LegalizeVectorTypes.cpp diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 842ffa2aa23e..f5a1eae1e7fe 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -737,6 +737,20 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { case ISD::SELECT: Results.push_back(ExpandSELECT(Node)); return; + case ISD::SELECT_CC: { + if (Node->getValueType(0).isScalableVector()) { + EVT CondVT = TLI.getSetCCResultType( + DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0)); + SDValue SetCC = + DAG.getNode(ISD::SETCC, SDLoc(Node), CondVT, Node->getOperand(0), + Node->getOperand(1), Node->getOperand(4)); + Results.push_back(DAG.getSelect(SDLoc(Node), Node->getValueType(0), SetCC, + Node->getOperand(2), + Node->getOperand(3))); + return; + } + break; + } case ISD::FP_TO_UINT: ExpandFP_TO_UINT(Node, Results); return; @@ -833,6 +847,16 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { return; } break; + case ISD::FP_TO_SINT_SAT: + case ISD::FP_TO_UINT_SAT: + // Expand the fpsosisat if it is scalable to prevent it from unrolling below. + if (Node->getValueType(0).isScalableVector()) { + if (SDValue Expanded = TLI.expandFP_TO_INT_SAT(Node, DAG)) { + Results.push_back(Expanded); + return; + } + } + break; case ISD::SMULFIX: case ISD::UMULFIX: if (SDValue Expanded = TLI.expandFixedPointMul(Node, DAG)) { diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 78fc407e9573..3ac2a7bddc5a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -793,7 +793,7 @@ ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) { // Emit any debug values associated with the node. if (N->getHasDebugValue()) { MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos(); - for (auto DV : DAG->GetDbgValues(N)) { + for (auto *DV : DAG->GetDbgValues(N)) { if (!DV->isEmitted()) if (auto *DbgMI = Emitter.EmitDbgValue(DV, VRBaseMap)) BB->insert(InsertPos, DbgMI); diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 2a10157b404e..5166db033c62 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -749,7 +749,7 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, // source order number as N. MachineBasicBlock *BB = Emitter.getBlock(); MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos(); - for (auto DV : DAG->GetDbgValues(N)) { + for (auto *DV : DAG->GetDbgValues(N)) { if (DV->isEmitted()) continue; unsigned DVOrder = DV->getOrder(); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index c8d0f5faf647..441437351852 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -24,6 +24,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/Analysis.h" @@ -602,7 +603,7 @@ static void AddNodeIDValueTypes(FoldingSetNodeID &ID, SDVTList VTList) { /// AddNodeIDOperands - Various routines for adding operands to the NodeID data. static void AddNodeIDOperands(FoldingSetNodeID &ID, ArrayRef<SDValue> Ops) { - for (auto& Op : Ops) { + for (const auto &Op : Ops) { ID.AddPointer(Op.getNode()); ID.AddInteger(Op.getResNo()); } @@ -611,7 +612,7 @@ static void AddNodeIDOperands(FoldingSetNodeID &ID, /// AddNodeIDOperands - Various routines for adding operands to the NodeID data. static void AddNodeIDOperands(FoldingSetNodeID &ID, ArrayRef<SDUse> Ops) { - for (auto& Op : Ops) { + for (const auto &Op : Ops) { ID.AddPointer(Op.getNode()); ID.AddInteger(Op.getResNo()); } @@ -2711,16 +2712,9 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, SubDemandedElts &= ScaledDemandedElts; if (!isSplatValue(Src, SubDemandedElts, SubUndefElts, Depth + 1)) return false; - - // Here we can't do "MatchAnyBits" operation merge for undef bits. - // Because some operation only use part value of the source. - // Take llvm.fshl.* for example: - // t1: v4i32 = Constant:i32<12>, undef:i32, Constant:i32<12>, undef:i32 - // t2: v2i64 = bitcast t1 - // t5: v2i64 = fshl t3, t4, t2 - // We can not convert t2 to {i64 undef, i64 undef} - UndefElts |= APIntOps::ScaleBitMask(SubUndefElts, NumElts, - /*MatchAllBits=*/true); + // TODO: Add support for merging sub undef elements. + if (!SubUndefElts.isZero()) + return false; } return true; } @@ -2947,6 +2941,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, unsigned Opcode = Op.getOpcode(); switch (Opcode) { + case ISD::MERGE_VALUES: + return computeKnownBits(Op.getOperand(Op.getResNo()), DemandedElts, + Depth + 1); case ISD::BUILD_VECTOR: // Collect the known bits that are shared by every demanded vector element. Known.Zero.setAllBits(); Known.One.setAllBits(); @@ -3219,12 +3216,6 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known = KnownBits::mulhs(Known, Known2); break; } - case ISD::UDIV: { - Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); - Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); - Known = KnownBits::udiv(Known, Known2); - break; - } case ISD::AVGCEILU: { Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); @@ -3339,6 +3330,38 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known.Zero |= Known2.Zero; } break; + case ISD::SHL_PARTS: + case ISD::SRA_PARTS: + case ISD::SRL_PARTS: { + assert((Op.getResNo() == 0 || Op.getResNo() == 1) && "Unknown result"); + + // Collect lo/hi source values and concatenate. + // TODO: Would a KnownBits::concatBits helper be useful? + unsigned LoBits = Op.getOperand(0).getScalarValueSizeInBits(); + unsigned HiBits = Op.getOperand(1).getScalarValueSizeInBits(); + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + Known = Known.anyext(LoBits + HiBits); + Known.insertBits(Known2, LoBits); + + // Collect shift amount. + Known2 = computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1); + + if (Opcode == ISD::SHL_PARTS) + Known = KnownBits::shl(Known, Known2); + else if (Opcode == ISD::SRA_PARTS) + Known = KnownBits::ashr(Known, Known2); + else // if (Opcode == ISD::SRL_PARTS) + Known = KnownBits::lshr(Known, Known2); + + // TODO: Minimum shift low/high bits are known zero. + + if (Op.getResNo() == 0) + Known = Known.extractBits(LoBits, 0); + else + Known = Known.extractBits(HiBits, LoBits); + break; + } case ISD::SIGN_EXTEND_INREG: { Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); @@ -3570,6 +3593,12 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known = KnownBits::computeForAddCarry(Known, Known2, Carry); break; } + case ISD::UDIV: { + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + Known = KnownBits::udiv(Known, Known2); + break; + } case ISD::SREM: { Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); @@ -3925,7 +3954,9 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, case ISD::AssertZext: Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits(); return VTBits-Tmp; - + case ISD::MERGE_VALUES: + return ComputeNumSignBits(Op.getOperand(Op.getResNo()), DemandedElts, + Depth + 1); case ISD::BUILD_VECTOR: Tmp = VTBits; for (unsigned i = 0, e = Op.getNumOperands(); (i < e) && (Tmp > 1); ++i) { @@ -6105,8 +6136,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, assert(N1.getValueType().isVector() == VT.isVector() && "FP_TO_*INT_SAT type should be vector iff the operand type is " "vector!"); - assert((!VT.isVector() || VT.getVectorNumElements() == - N1.getValueType().getVectorNumElements()) && + assert((!VT.isVector() || VT.getVectorElementCount() == + N1.getValueType().getVectorElementCount()) && "Vector element counts must match in FP_TO_*INT_SAT"); assert(!cast<VTSDNode>(N2)->getVT().isVector() && "Type to saturate to must be a scalar."); @@ -6719,7 +6750,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, bool isVol, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, - const AAMDNodes &AAInfo) { + const AAMDNodes &AAInfo, AAResults *AA) { // Turn a memcpy of undef to nop. // FIXME: We need to honor volatile even is Src is undef. if (Src.isUndef()) @@ -6782,6 +6813,11 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, AAMDNodes NewAAInfo = AAInfo; NewAAInfo.TBAA = NewAAInfo.TBAAStruct = nullptr; + const Value *SrcVal = SrcPtrInfo.V.dyn_cast<const Value *>(); + bool isConstant = + AA && SrcVal && + AA->pointsToConstantMemory(MemoryLocation(SrcVal, Size, AAInfo)); + MachineMemOperand::Flags MMOFlags = isVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone; SmallVector<SDValue, 16> OutLoadChains; @@ -6843,6 +6879,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, MachineMemOperand::Flags SrcMMOFlags = MMOFlags; if (isDereferenceable) SrcMMOFlags |= MachineMemOperand::MODereferenceable; + if (isConstant) + SrcMMOFlags |= MachineMemOperand::MOInvariant; Value = DAG.getExtLoad( ISD::EXTLOAD, dl, NVT, Chain, @@ -7131,7 +7169,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, - const AAMDNodes &AAInfo) { + const AAMDNodes &AAInfo, AAResults *AA) { // Check to see if we should lower the memcpy to loads and stores first. // For cases within the target-specified limits, this is the best choice. ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); @@ -7142,7 +7180,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Result = getMemcpyLoadsAndStores( *this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment, - isVol, false, DstPtrInfo, SrcPtrInfo, AAInfo); + isVol, false, DstPtrInfo, SrcPtrInfo, AAInfo, AA); if (Result.getNode()) return Result; } @@ -7161,9 +7199,9 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, // use a (potentially long) sequence of loads and stores. if (AlwaysInline) { assert(ConstantSize && "AlwaysInline requires a constant size!"); - return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src, - ConstantSize->getZExtValue(), Alignment, - isVol, true, DstPtrInfo, SrcPtrInfo, AAInfo); + return getMemcpyLoadsAndStores( + *this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment, + isVol, true, DstPtrInfo, SrcPtrInfo, AAInfo, AA); } checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); @@ -7245,7 +7283,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, bool isVol, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, - const AAMDNodes &AAInfo) { + const AAMDNodes &AAInfo, AAResults *AA) { // Check to see if we should lower the memmove to loads and stores first. // For cases within the target-specified limits, this is the best choice. ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); @@ -8904,7 +8942,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, } #ifndef NDEBUG - for (auto &Op : Ops) + for (const auto &Op : Ops) assert(Op.getOpcode() != ISD::DELETED_NODE && "Operand is DELETED_NODE!"); #endif @@ -8928,6 +8966,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, "True and False arms of SelectCC must have same type!"); assert(Ops[2].getValueType() == VT && "select_cc node must be of same type as true and false value!"); + assert((!Ops[0].getValueType().isVector() || + Ops[0].getValueType().getVectorElementCount() == + VT.getVectorElementCount()) && + "Expected select_cc with vector result to have the same sized " + "comparison type!"); break; case ISD::BR_CC: assert(NumOps == 5 && "BR_CC takes 5 operands!"); @@ -9018,12 +9061,34 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, return getNode(Opcode, DL, VTList.VTs[0], Ops, Flags); #ifndef NDEBUG - for (auto &Op : Ops) + for (const auto &Op : Ops) assert(Op.getOpcode() != ISD::DELETED_NODE && "Operand is DELETED_NODE!"); #endif switch (Opcode) { + case ISD::SADDO: + case ISD::UADDO: + case ISD::SSUBO: + case ISD::USUBO: { + assert(VTList.NumVTs == 2 && Ops.size() == 2 && + "Invalid add/sub overflow op!"); + assert(VTList.VTs[0].isInteger() && VTList.VTs[1].isInteger() && + Ops[0].getValueType() == Ops[1].getValueType() && + Ops[0].getValueType() == VTList.VTs[0] && + "Binary operator types must match!"); + SDValue N1 = Ops[0], N2 = Ops[1]; + canonicalizeCommutativeBinop(Opcode, N1, N2); + + // (X +- 0) -> X with zero-overflow. + ConstantSDNode *N2CV = isConstOrConstSplat(N2, /*AllowUndefs*/ false, + /*AllowTruncation*/ true); + if (N2CV && N2CV->isZero()) { + SDValue ZeroOverFlow = getConstant(0, DL, VTList.VTs[1]); + return getNode(ISD::MERGE_VALUES, DL, VTList, {N1, ZeroOverFlow}, Flags); + } + break; + } case ISD::STRICT_FP_EXTEND: assert(VTList.NumVTs == 2 && Ops.size() == 2 && "Invalid STRICT_FP_EXTEND!"); @@ -9914,7 +9979,7 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) { return; SmallVector<SDDbgValue *, 2> ClonedDVs; - for (auto DV : GetDbgValues(&N)) { + for (auto *DV : GetDbgValues(&N)) { if (DV->isInvalidated()) continue; switch (N.getOpcode()) { @@ -10268,7 +10333,7 @@ bool SelectionDAG::calculateDivergence(SDNode *N) { } if (TLI->isSDNodeSourceOfDivergence(N, FLI, DA)) return true; - for (auto &Op : N->ops()) { + for (const auto &Op : N->ops()) { if (Op.Val.getValueType() != MVT::Other && Op.getNode()->isDivergent()) return true; } @@ -10298,7 +10363,7 @@ void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode *> &Order) { } for (size_t I = 0; I != Order.size(); ++I) { SDNode *N = Order[I]; - for (auto U : N->uses()) { + for (auto *U : N->uses()) { unsigned &UnsortedOps = Degree[U]; if (0 == --UnsortedOps) Order.push_back(U); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index fe3c38ec590d..35650b9bd00e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1789,7 +1789,7 @@ static void findWasmUnwindDestinations( UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob); UnwindDests.back().first->setIsEHScopeEntry(); break; - } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) { + } else if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) { // Add the catchpad handlers to the possible destinations. We don't // continue to the unwind destination of the catchswitch for wasm. for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) { @@ -1844,7 +1844,7 @@ static void findUnwindDestinations( UnwindDests.back().first->setIsEHScopeEntry(); UnwindDests.back().first->setIsEHFuncletEntry(); break; - } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) { + } else if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) { // Add the catchpad handlers to the possible destinations. for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) { UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob); @@ -2990,14 +2990,20 @@ void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) { CopyToExportRegsIfNeeded(&I); // Retrieve successors. + SmallPtrSet<BasicBlock *, 8> Dests; + Dests.insert(I.getDefaultDest()); MachineBasicBlock *Return = FuncInfo.MBBMap[I.getDefaultDest()]; // Update successor info. addSuccessorWithProb(CallBrMBB, Return, BranchProbability::getOne()); for (unsigned i = 0, e = I.getNumIndirectDests(); i < e; ++i) { - MachineBasicBlock *Target = FuncInfo.MBBMap[I.getIndirectDest(i)]; - addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero()); + BasicBlock *Dest = I.getIndirectDest(i); + MachineBasicBlock *Target = FuncInfo.MBBMap[Dest]; Target->setIsInlineAsmBrIndirectTarget(); + Target->setHasAddressTaken(); + // Don't add duplicate machine successors. + if (Dests.insert(Dest).second) + addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero()); } CallBrMBB->normalizeSuccProbs(); @@ -4075,6 +4081,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { return; bool isVolatile = I.isVolatile(); + MachineMemOperand::Flags MMOFlags = + TLI.getLoadMemOperandFlags(I, DAG.getDataLayout()); SDValue Root; bool ConstantMemory = false; @@ -4091,6 +4099,12 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { // Do not serialize (non-volatile) loads of constant memory with anything. Root = DAG.getEntryNode(); ConstantMemory = true; + MMOFlags |= MachineMemOperand::MOInvariant; + + // FIXME: pointsToConstantMemory probably does not imply dereferenceable, + // but the previous usage implied it did. Probably should check + // isDereferenceableAndAlignedPointer. + MMOFlags |= MachineMemOperand::MODereferenceable; } else { // Do not serialize non-volatile loads against each other. Root = DAG.getRoot(); @@ -4110,9 +4124,6 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues)); EVT PtrVT = Ptr.getValueType(); - MachineMemOperand::Flags MMOFlags - = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout()); - unsigned ChainI = 0; for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { // Serializing loads here may result in excessive register pressure, and @@ -5766,7 +5777,7 @@ static const CallBase *FindPreallocatedCall(const Value *PreallocatedSetup) { ->getCalledFunction() ->getIntrinsicID() == Intrinsic::call_preallocated_setup && "expected call_preallocated_setup Value"); - for (auto *U : PreallocatedSetup->users()) { + for (const auto *U : PreallocatedSetup->users()) { auto *UseCall = cast<CallBase>(U); const Function *Fn = UseCall->getCalledFunction(); if (!Fn || Fn->getIntrinsicID() != Intrinsic::call_preallocated_arg) { @@ -5859,11 +5870,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, // FIXME: Support passing different dest/src alignments to the memcpy DAG // node. SDValue Root = isVol ? getRoot() : getMemoryRoot(); - SDValue MC = DAG.getMemcpy(Root, sdl, Op1, Op2, Op3, Alignment, isVol, - /* AlwaysInline */ false, isTC, - MachinePointerInfo(I.getArgOperand(0)), - MachinePointerInfo(I.getArgOperand(1)), - I.getAAMetadata()); + SDValue MC = DAG.getMemcpy( + Root, sdl, Op1, Op2, Op3, Alignment, isVol, + /* AlwaysInline */ false, isTC, MachinePointerInfo(I.getArgOperand(0)), + MachinePointerInfo(I.getArgOperand(1)), I.getAAMetadata(), AA); updateDAGForMaybeTailCall(MC); return; } @@ -5881,11 +5891,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); // FIXME: Support passing different dest/src alignments to the memcpy DAG // node. - SDValue MC = DAG.getMemcpy(getRoot(), sdl, Dst, Src, Size, Alignment, isVol, - /* AlwaysInline */ true, isTC, - MachinePointerInfo(I.getArgOperand(0)), - MachinePointerInfo(I.getArgOperand(1)), - I.getAAMetadata()); + SDValue MC = DAG.getMemcpy( + getRoot(), sdl, Dst, Src, Size, Alignment, isVol, + /* AlwaysInline */ true, isTC, MachinePointerInfo(I.getArgOperand(0)), + MachinePointerInfo(I.getArgOperand(1)), I.getAAMetadata(), AA); updateDAGForMaybeTailCall(MC); return; } @@ -5940,7 +5949,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, SDValue MM = DAG.getMemmove(Root, sdl, Op1, Op2, Op3, Alignment, isVol, isTC, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1)), - I.getAAMetadata()); + I.getAAMetadata(), AA); updateDAGForMaybeTailCall(MM); return; } @@ -8855,7 +8864,8 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, } break; - case InlineAsm::isInput: { + case InlineAsm::isInput: + case InlineAsm::isLabel: { SDValue InOperandVal = OpInfo.CallOperand; if (OpInfo.isMatchingInputConstraint()) { @@ -9295,19 +9305,18 @@ void SelectionDAGBuilder::populateCallLoweringInfo( static void addStackMapLiveVars(const CallBase &Call, unsigned StartIdx, const SDLoc &DL, SmallVectorImpl<SDValue> &Ops, SelectionDAGBuilder &Builder) { - for (unsigned i = StartIdx, e = Call.arg_size(); i != e; ++i) { - SDValue OpVal = Builder.getValue(Call.getArgOperand(i)); - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) { - Ops.push_back( - Builder.DAG.getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64)); - Ops.push_back( - Builder.DAG.getTargetConstant(C->getSExtValue(), DL, MVT::i64)); - } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(OpVal)) { - const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo(); - Ops.push_back(Builder.DAG.getTargetFrameIndex( - FI->getIndex(), TLI.getFrameIndexTy(Builder.DAG.getDataLayout()))); - } else - Ops.push_back(OpVal); + SelectionDAG &DAG = Builder.DAG; + for (unsigned I = StartIdx; I < Call.arg_size(); I++) { + SDValue Op = Builder.getValue(Call.getArgOperand(I)); + + // Things on the stack are pointer-typed, meaning that they are already + // legal and can be emitted directly to target nodes. + if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op)) { + Ops.push_back(DAG.getTargetFrameIndex(FI->getIndex(), Op.getValueType())); + } else { + // Otherwise emit a target independent node to be legalised. + Ops.push_back(Builder.getValue(Call.getArgOperand(I))); + } } } @@ -9359,20 +9368,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { Ops.push_back(ShadConst); // Add the live variables. - for (unsigned I = 2; I < CI.arg_size(); I++) { - SDValue Op = getValue(CI.getArgOperand(I)); - - // Things on the stack are pointer-typed, meaning that they are already - // legal and can be emitted directly to target nodes. - if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op)) { - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - Ops.push_back(DAG.getTargetFrameIndex( - FI->getIndex(), TLI.getFrameIndexTy(DAG.getDataLayout()))); - } else { - // Otherwise emit a target independent node to be legalised. - Ops.push_back(getValue(CI.getArgOperand(I))); - } - } + addStackMapLiveVars(CI, 2, DL, Ops, *this); // Create the STACKMAP node. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); @@ -9449,6 +9445,19 @@ void SelectionDAGBuilder::visitPatchpoint(const CallBase &CB, // Replace the target specific call node with the patchable intrinsic. SmallVector<SDValue, 8> Ops; + // Push the chain. + Ops.push_back(*(Call->op_begin())); + + // Optionally, push the glue (if any). + if (HasGlue) + Ops.push_back(*(Call->op_end() - 1)); + + // Push the register mask info. + if (HasGlue) + Ops.push_back(*(Call->op_end() - 2)); + else + Ops.push_back(*(Call->op_end() - 1)); + // Add the <id> and <numBytes> constants. SDValue IDVal = getValue(CB.getArgOperand(PatchPointOpers::IDPos)); Ops.push_back(DAG.getTargetConstant( @@ -9477,27 +9486,13 @@ void SelectionDAGBuilder::visitPatchpoint(const CallBase &CB, for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i) Ops.push_back(getValue(CB.getArgOperand(i))); - // Push the arguments from the call instruction up to the register mask. + // Push the arguments from the call instruction. SDNode::op_iterator e = HasGlue ? Call->op_end()-2 : Call->op_end()-1; Ops.append(Call->op_begin() + 2, e); // Push live variables for the stack map. addStackMapLiveVars(CB, NumMetaOpers + NumArgs, dl, Ops, *this); - // Push the register mask info. - if (HasGlue) - Ops.push_back(*(Call->op_end()-2)); - else - Ops.push_back(*(Call->op_end()-1)); - - // Push the chain (this is originally the first operand of the call, but - // becomes now the last or second to last operand). - Ops.push_back(*(Call->op_begin())); - - // Push the glue flag (last operand). - if (HasGlue) - Ops.push_back(*(Call->op_end()-1)); - SDVTList NodeTys; if (IsAnyRegCC && HasDef) { // Create the return types based on the intrinsic definition @@ -9514,13 +9509,12 @@ void SelectionDAGBuilder::visitPatchpoint(const CallBase &CB, NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); // Replace the target specific call node with a PATCHPOINT node. - MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHPOINT, - dl, NodeTys, Ops); + SDValue PPV = DAG.getNode(ISD::PATCHPOINT, dl, NodeTys, Ops); // Update the NodeMap. if (HasDef) { if (IsAnyRegCC) - setValue(&CB, SDValue(MN, 0)); + setValue(&CB, SDValue(PPV.getNode(), 0)); else setValue(&CB, Result.first); } @@ -9531,10 +9525,10 @@ void SelectionDAGBuilder::visitPatchpoint(const CallBase &CB, // value. if (IsAnyRegCC && HasDef) { SDValue From[] = {SDValue(Call, 0), SDValue(Call, 1)}; - SDValue To[] = {SDValue(MN, 1), SDValue(MN, 2)}; + SDValue To[] = {PPV.getValue(1), PPV.getValue(2)}; DAG.ReplaceAllUsesOfValuesWith(From, To, 2); } else - DAG.ReplaceAllUsesWith(Call, MN); + DAG.ReplaceAllUsesWith(Call, PPV.getNode()); DAG.DeleteNode(Call); // Inform the Frame Information that we have a patchpoint in this function. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 9df0b64c26c3..6ba01664e756 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -488,6 +488,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::VECREDUCE_FMIN: return "vecreduce_fmin"; case ISD::STACKMAP: return "stackmap"; + case ISD::PATCHPOINT: + return "patchpoint"; // Vector Predication #define BEGIN_REGISTER_VP_SDNODE(SDID, LEGALARG, NAME, ...) \ diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 7f453f081982..d46a0a23cca3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -2193,8 +2193,27 @@ void SelectionDAGISel::Select_ARITH_FENCE(SDNode *N) { N->getOperand(0)); } +void SelectionDAGISel::pushStackMapLiveVariable(SmallVectorImpl<SDValue> &Ops, + SDValue OpVal, SDLoc DL) { + SDNode *OpNode = OpVal.getNode(); + + // FrameIndex nodes should have been directly emitted to TargetFrameIndex + // nodes at DAG-construction time. + assert(OpNode->getOpcode() != ISD::FrameIndex); + + if (OpNode->getOpcode() == ISD::Constant) { + Ops.push_back( + CurDAG->getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64)); + Ops.push_back( + CurDAG->getTargetConstant(cast<ConstantSDNode>(OpNode)->getZExtValue(), + DL, OpVal.getValueType())); + } else { + Ops.push_back(OpVal); + } +} + void SelectionDAGISel::Select_STACKMAP(SDNode *N) { - std::vector<SDValue> Ops; + SmallVector<SDValue, 32> Ops; auto *It = N->op_begin(); SDLoc DL(N); @@ -2213,24 +2232,8 @@ void SelectionDAGISel::Select_STACKMAP(SDNode *N) { Ops.push_back(Shad); // Live variable operands. - for (; It != N->op_end(); It++) { - SDNode *OpNode = It->getNode(); - SDValue O; - - // FrameIndex nodes should have been directly emitted to TargetFrameIndex - // nodes at DAG-construction time. - assert(OpNode->getOpcode() != ISD::FrameIndex); - - if (OpNode->getOpcode() == ISD::Constant) { - Ops.push_back( - CurDAG->getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64)); - O = CurDAG->getTargetConstant( - cast<ConstantSDNode>(OpNode)->getZExtValue(), DL, It->getValueType()); - } else { - O = *It; - } - Ops.push_back(O); - } + for (; It != N->op_end(); It++) + pushStackMapLiveVariable(Ops, *It, DL); Ops.push_back(Chain); Ops.push_back(InFlag); @@ -2239,6 +2242,57 @@ void SelectionDAGISel::Select_STACKMAP(SDNode *N) { CurDAG->SelectNodeTo(N, TargetOpcode::STACKMAP, NodeTys, Ops); } +void SelectionDAGISel::Select_PATCHPOINT(SDNode *N) { + SmallVector<SDValue, 32> Ops; + auto *It = N->op_begin(); + SDLoc DL(N); + + // Cache arguments that will be moved to the end in the target node. + SDValue Chain = *It++; + Optional<SDValue> Glue; + if (It->getValueType() == MVT::Glue) + Glue = *It++; + SDValue RegMask = *It++; + + // <id> operand. + SDValue ID = *It++; + assert(ID.getValueType() == MVT::i64); + Ops.push_back(ID); + + // <numShadowBytes> operand. + SDValue Shad = *It++; + assert(Shad.getValueType() == MVT::i32); + Ops.push_back(Shad); + + // Add the callee. + Ops.push_back(*It++); + + // Add <numArgs>. + SDValue NumArgs = *It++; + assert(NumArgs.getValueType() == MVT::i32); + Ops.push_back(NumArgs); + + // Calling convention. + Ops.push_back(*It++); + + // Push the args for the call. + for (uint64_t I = cast<ConstantSDNode>(NumArgs)->getZExtValue(); I != 0; I--) + Ops.push_back(*It++); + + // Now push the live variables. + for (; It != N->op_end(); It++) + pushStackMapLiveVariable(Ops, *It, DL); + + // Finally, the regmask, chain and (if present) glue are moved to the end. + Ops.push_back(RegMask); + Ops.push_back(Chain); + if (Glue.has_value()) + Ops.push_back(Glue.value()); + + SDVTList NodeTys = N->getVTList(); + CurDAG->SelectNodeTo(N, TargetOpcode::PATCHPOINT, NodeTys, Ops); +} + /// GetVBR - decode a vbr encoding whose top bit is set. LLVM_ATTRIBUTE_ALWAYS_INLINE static uint64_t GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) { @@ -2796,6 +2850,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, case ISD::STACKMAP: Select_STACKMAP(NodeToMatch); return; + case ISD::PATCHPOINT: + Select_PATCHPOINT(NodeToMatch); + return; } assert(!NodeToMatch->isMachineOpcode() && "Node already selected!"); diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 3061158eea30..c5c093ae228f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -169,8 +169,14 @@ static Optional<int> findPreviousSpillSlot(const Value *Val, // Spill location is known for gc relocates if (const auto *Relocate = dyn_cast<GCRelocateInst>(Val)) { - const auto &RelocationMap = - Builder.FuncInfo.StatepointRelocationMaps[Relocate->getStatepoint()]; + const Value *Statepoint = Relocate->getStatepoint(); + assert((isa<GCStatepointInst>(Statepoint) || isa<UndefValue>(Statepoint)) && + "GetStatepoint must return one of two types"); + if (isa<UndefValue>(Statepoint)) + return None; + + const auto &RelocationMap = Builder.FuncInfo.StatepointRelocationMaps + [cast<GCStatepointInst>(Statepoint)]; auto It = RelocationMap.find(Relocate); if (It == RelocationMap.end()) @@ -193,7 +199,7 @@ static Optional<int> findPreviousSpillSlot(const Value *Val, if (const PHINode *Phi = dyn_cast<PHINode>(Val)) { Optional<int> MergedResult = None; - for (auto &IncomingValue : Phi->incoming_values()) { + for (const auto &IncomingValue : Phi->incoming_values()) { Optional<int> SpillSlot = findPreviousSpillSlot(IncomingValue, Builder, LookUpDepth - 1); if (!SpillSlot) @@ -569,9 +575,10 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, // We cannot assing them to VRegs. SmallSet<SDValue, 8> LPadPointers; if (!UseRegistersForGCPointersInLandingPad) - if (auto *StInvoke = dyn_cast_or_null<InvokeInst>(SI.StatepointInstr)) { + if (const auto *StInvoke = + dyn_cast_or_null<InvokeInst>(SI.StatepointInstr)) { LandingPadInst *LPI = StInvoke->getLandingPadInst(); - for (auto *Relocate : SI.GCRelocates) + for (const auto *Relocate : SI.GCRelocates) if (Relocate->getOperand(0) == LPI) { LPadPointers.insert(Builder.getValue(Relocate->getBasePtr())); LPadPointers.insert(Builder.getValue(Relocate->getDerivedPtr())); @@ -739,7 +746,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( LLVM_DEBUG(dbgs() << "Lowering statepoint " << *SI.StatepointInstr << "\n"); #ifndef NDEBUG - for (auto *Reloc : SI.GCRelocates) + for (const auto *Reloc : SI.GCRelocates) if (Reloc->getParent() == SI.StatepointInstr->getParent()) StatepointLowering.scheduleRelocCall(*Reloc); #endif @@ -1017,7 +1024,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( static std::pair<const GCResultInst*, const GCResultInst*> getGCResultLocality(const GCStatepointInst &S) { std::pair<const GCResultInst *, const GCResultInst*> Res(nullptr, nullptr); - for (auto *U : S.users()) { + for (const auto *U : S.users()) { auto *GRI = dyn_cast<GCResultInst>(U); if (!GRI) continue; @@ -1195,9 +1202,13 @@ void SelectionDAGBuilder::LowerCallSiteWithDeoptBundle( void SelectionDAGBuilder::visitGCResult(const GCResultInst &CI) { // The result value of the gc_result is simply the result of the actual // call. We've already emitted this, so just grab the value. - const GCStatepointInst *SI = CI.getStatepoint(); + const Value *SI = CI.getStatepoint(); + assert((isa<GCStatepointInst>(SI) || isa<UndefValue>(SI)) && + "GetStatepoint must return one of two types"); + if (isa<UndefValue>(SI)) + return; - if (SI->getParent() == CI.getParent()) { + if (cast<GCStatepointInst>(SI)->getParent() == CI.getParent()) { setValue(&CI, getValue(SI)); return; } @@ -1215,12 +1226,18 @@ void SelectionDAGBuilder::visitGCResult(const GCResultInst &CI) { } void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { + const Value *Statepoint = Relocate.getStatepoint(); #ifndef NDEBUG // Consistency check // We skip this check for relocates not in the same basic block as their // statepoint. It would be too expensive to preserve validation info through // different basic blocks. - if (Relocate.getStatepoint()->getParent() == Relocate.getParent()) + assert((isa<GCStatepointInst>(Statepoint) || isa<UndefValue>(Statepoint)) && + "GetStatepoint must return one of two types"); + if (isa<UndefValue>(Statepoint)) + return; + + if (cast<GCStatepointInst>(Statepoint)->getParent() == Relocate.getParent()) StatepointLowering.relocCallVisited(Relocate); auto *Ty = Relocate.getType()->getScalarType(); @@ -1230,14 +1247,15 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { const Value *DerivedPtr = Relocate.getDerivedPtr(); auto &RelocationMap = - FuncInfo.StatepointRelocationMaps[Relocate.getStatepoint()]; + FuncInfo.StatepointRelocationMaps[cast<GCStatepointInst>(Statepoint)]; auto SlotIt = RelocationMap.find(&Relocate); assert(SlotIt != RelocationMap.end() && "Relocating not lowered gc value"); const RecordType &Record = SlotIt->second; // If relocation was done via virtual register.. if (Record.type == RecordType::SDValueNode) { - assert(Relocate.getStatepoint()->getParent() == Relocate.getParent() && + assert(cast<GCStatepointInst>(Statepoint)->getParent() == + Relocate.getParent() && "Nonlocal gc.relocate mapped via SDValue"); SDValue SDV = StatepointLowering.getLocation(getValue(DerivedPtr)); assert(SDV.getNode() && "empty SDValue"); diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 66389a57f780..cd4f0ae42bcd 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1056,13 +1056,13 @@ bool TargetLowering::SimplifyDemandedBits( // TODO: We can probably do more work on calculating the known bits and // simplifying the operations for scalable vectors, but for now we just // bail out. - if (Op.getValueType().isScalableVector()) + EVT VT = Op.getValueType(); + if (VT.isScalableVector()) return false; bool IsLE = TLO.DAG.getDataLayout().isLittleEndian(); unsigned NumElts = OriginalDemandedElts.getBitWidth(); - assert((!Op.getValueType().isVector() || - NumElts == Op.getValueType().getVectorNumElements()) && + assert((!VT.isVector() || NumElts == VT.getVectorNumElements()) && "Unexpected vector size"); APInt DemandedBits = OriginalDemandedBits; @@ -1088,7 +1088,6 @@ bool TargetLowering::SimplifyDemandedBits( } // Other users may use these bits. - EVT VT = Op.getValueType(); if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) { if (Depth != 0) { // If not at the root, Just compute the Known bits to @@ -1468,6 +1467,33 @@ bool TargetLowering::SimplifyDemandedBits( } } + // (or (and X, C1), (and (or X, Y), C2)) -> (or (and X, C1|C2), (and Y, C2)) + // TODO: Use SimplifyMultipleUseDemandedBits to peek through masks. + if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::AND && + Op0->hasOneUse() && Op1->hasOneUse()) { + // Attempt to match all commutations - m_c_Or would've been useful! + for (int I = 0; I != 2; ++I) { + SDValue X = Op.getOperand(I).getOperand(0); + SDValue C1 = Op.getOperand(I).getOperand(1); + SDValue Alt = Op.getOperand(1 - I).getOperand(0); + SDValue C2 = Op.getOperand(1 - I).getOperand(1); + if (Alt.getOpcode() == ISD::OR) { + for (int J = 0; J != 2; ++J) { + if (X == Alt.getOperand(J)) { + SDValue Y = Alt.getOperand(1 - J); + if (SDValue C12 = TLO.DAG.FoldConstantArithmetic(ISD::OR, dl, VT, + {C1, C2})) { + SDValue MaskX = TLO.DAG.getNode(ISD::AND, dl, VT, X, C12); + SDValue MaskY = TLO.DAG.getNode(ISD::AND, dl, VT, Y, C2); + return TLO.CombineTo( + Op, TLO.DAG.getNode(ISD::OR, dl, VT, MaskX, MaskY)); + } + } + } + } + } + } + Known |= Known2; break; } @@ -1500,7 +1526,7 @@ bool TargetLowering::SimplifyDemandedBits( if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero)) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1)); - ConstantSDNode* C = isConstOrConstSplat(Op1, DemandedElts); + ConstantSDNode *C = isConstOrConstSplat(Op1, DemandedElts); if (C) { // If one side is a constant, and all of the set bits in the constant are // also known set on the other side, turn this into an AND, as we know @@ -1521,6 +1547,32 @@ bool TargetLowering::SimplifyDemandedBits( SDValue New = TLO.DAG.getNOT(dl, Op0, VT); return TLO.CombineTo(Op, New); } + + unsigned Op0Opcode = Op0.getOpcode(); + if ((Op0Opcode == ISD::SRL || Op0Opcode == ISD::SHL) && Op0.hasOneUse()) { + if (ConstantSDNode *ShiftC = + isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) { + // Don't crash on an oversized shift. We can not guarantee that a + // bogus shift has been simplified to undef. + if (ShiftC->getAPIntValue().ult(BitWidth)) { + uint64_t ShiftAmt = ShiftC->getZExtValue(); + APInt Ones = APInt::getAllOnes(BitWidth); + Ones = Op0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) + : Ones.lshr(ShiftAmt); + const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo(); + if ((DemandedBits & C->getAPIntValue()) == (DemandedBits & Ones) && + TLI.isDesirableToCommuteXorWithShift(Op.getNode())) { + // If the xor constant is a demanded mask, do a 'not' before the + // shift: + // xor (X << ShiftC), XorC --> (not X) << ShiftC + // xor (X >> ShiftC), XorC --> (not X) >> ShiftC + SDValue Not = TLO.DAG.getNOT(dl, Op0.getOperand(0), VT); + return TLO.CombineTo(Op, TLO.DAG.getNode(Op0Opcode, dl, VT, Not, + Op0.getOperand(1))); + } + } + } + } } // If we can't turn this into a 'not', try to shrink the constant. @@ -1723,6 +1775,26 @@ bool TargetLowering::SimplifyDemandedBits( if ((ShAmt < DemandedBits.getActiveBits()) && ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) return true; + } else { + // This is a variable shift, so we can't shift the demand mask by a known + // amount. But if we are not demanding high bits, then we are not + // demanding those bits from the pre-shifted operand either. + if (unsigned CTLZ = DemandedBits.countLeadingZeros()) { + APInt DemandedFromOp(APInt::getLowBitsSet(BitWidth, BitWidth - CTLZ)); + if (SimplifyDemandedBits(Op0, DemandedFromOp, DemandedElts, Known, TLO, + Depth + 1)) { + SDNodeFlags Flags = Op.getNode()->getFlags(); + if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) { + // Disable the nsw and nuw flags. We can no longer guarantee that we + // won't wrap after simplification. + Flags.setNoSignedWrap(false); + Flags.setNoUnsignedWrap(false); + Op->setFlags(Flags); + } + return true; + } + Known.resetAll(); + } } // If we are only demanding sign bits then we can use the shift source @@ -3292,6 +3364,12 @@ bool TargetLowering::SimplifyDemandedVectorElts( TLO, Depth + 1)) return true; + // If every element pair has a zero/undef then just fold to zero. + // fold (and x, undef) -> 0 / (and x, 0) -> 0 + // fold (mul x, undef) -> 0 / (mul x, 0) -> 0 + if (DemandedElts.isSubsetOf(SrcZero | KnownZero | SrcUndef | KnownUndef)) + return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT)); + // If either side has a zero element, then the result element is zero, even // if the other is an UNDEF. // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros @@ -3301,7 +3379,6 @@ bool TargetLowering::SimplifyDemandedVectorElts( KnownUndef &= ~KnownZero; // Attempt to avoid multi-use ops if we don't need anything from them. - // TODO - use KnownUndef to relax the demandedelts? if (!DemandedElts.isAllOnes()) if (SimplifyDemandedVectorEltsBinOp(Op0, Op1)) return true; @@ -5204,6 +5281,7 @@ TargetLowering::ParseConstraints(const DataLayout &DL, // ConstraintOperands list. unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. unsigned ResNo = 0; // ResNo - The result number of the next output. + unsigned LabelNo = 0; // LabelNo - CallBr indirect dest number. for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) { ConstraintOperands.emplace_back(std::move(CI)); @@ -5240,6 +5318,14 @@ TargetLowering::ParseConstraints(const DataLayout &DL, case InlineAsm::isInput: OpInfo.CallOperandVal = Call.getArgOperand(ArgNo); break; + case InlineAsm::isLabel: + OpInfo.CallOperandVal = + cast<CallBrInst>(&Call)->getBlockAddressForIndirectDest(LabelNo); + OpInfo.ConstraintVT = + getAsmOperandValueType(DL, OpInfo.CallOperandVal->getType()) + .getSimpleVT(); + ++LabelNo; + continue; case InlineAsm::isClobber: // Nothing to do. break; @@ -5852,22 +5938,22 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, // FIXME: We should use a narrower constant when the upper // bits are known to be zero. const APInt& Divisor = C->getAPIntValue(); - UnsignedDivisonByConstantInfo magics = UnsignedDivisonByConstantInfo::get(Divisor); + UnsignedDivisionByConstantInfo magics = + UnsignedDivisionByConstantInfo::get(Divisor); unsigned PreShift = 0, PostShift = 0; // If the divisor is even, we can avoid using the expensive fixup by // shifting the divided value upfront. - if (magics.IsAdd != 0 && !Divisor[0]) { + if (magics.IsAdd && !Divisor[0]) { PreShift = Divisor.countTrailingZeros(); // Get magic number for the shifted divisor. - magics = UnsignedDivisonByConstantInfo::get(Divisor.lshr(PreShift), PreShift); - assert(magics.IsAdd == 0 && "Should use cheap fixup now"); + magics = + UnsignedDivisionByConstantInfo::get(Divisor.lshr(PreShift), PreShift); + assert(!magics.IsAdd && "Should use cheap fixup now"); } - APInt Magic = magics.Magic; - unsigned SelNPQ; - if (magics.IsAdd == 0 || Divisor.isOne()) { + if (!magics.IsAdd || Divisor.isOne()) { assert(magics.ShiftAmount < Divisor.getBitWidth() && "We shouldn't generate an undefined shift!"); PostShift = magics.ShiftAmount; @@ -5878,7 +5964,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, } PreShifts.push_back(DAG.getConstant(PreShift, dl, ShSVT)); - MagicFactors.push_back(DAG.getConstant(Magic, dl, SVT)); + MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT)); NPQFactors.push_back( DAG.getConstant(SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1) : APInt::getZero(EltBits), |
