diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2022-03-20 11:40:34 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2022-05-14 11:43:05 +0000 |
commit | 349cc55c9796c4596a5b9904cd3281af295f878f (patch) | |
tree | 410c5a785075730a35f1272ca6a7adf72222ad03 /contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | |
parent | cb2ae6163174b90e999326ecec3699ee093a5d43 (diff) | |
parent | c0981da47d5696fe36474fcf86b4ce03ae3ff818 (diff) |
Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 471 |
1 files changed, 244 insertions, 227 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 7f80ce37e28a..e4a69adff05b 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -26,6 +26,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" +#include "llvm/Support/DivisionByConstantInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" @@ -537,7 +538,7 @@ bool TargetLowering::ShrinkDemandedConstant(SDValue Op, TargetLoweringOpt &TLO) const { EVT VT = Op.getValueType(); APInt DemandedElts = VT.isVector() - ? APInt::getAllOnesValue(VT.getVectorNumElements()) + ? APInt::getAllOnes(VT.getVectorNumElements()) : APInt(1, 1); return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO); } @@ -621,7 +622,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, } APInt DemandedElts = VT.isVector() - ? APInt::getAllOnesValue(VT.getVectorNumElements()) + ? APInt::getAllOnes(VT.getVectorNumElements()) : APInt(1, 1); return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth, AssumeSingleUse); @@ -667,12 +668,12 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( DAG.getDataLayout().isLittleEndian()) { unsigned Scale = NumDstEltBits / NumSrcEltBits; unsigned NumSrcElts = SrcVT.getVectorNumElements(); - APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits); - APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts); + APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits); + APInt DemandedSrcElts = APInt::getZero(NumSrcElts); for (unsigned i = 0; i != Scale; ++i) { unsigned Offset = i * NumSrcEltBits; APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset); - if (!Sub.isNullValue()) { + if (!Sub.isZero()) { DemandedSrcBits |= Sub; for (unsigned j = 0; j != NumElts; ++j) if (DemandedElts[j]) @@ -690,8 +691,8 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( DAG.getDataLayout().isLittleEndian()) { unsigned Scale = NumSrcEltBits / NumDstEltBits; unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; - APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits); - APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts); + APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits); + APInt DemandedSrcElts = APInt::getZero(NumSrcElts); for (unsigned i = 0; i != NumElts; ++i) if (DemandedElts[i]) { unsigned Offset = (i % Scale) * NumDstEltBits; @@ -819,13 +820,21 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( break; } case ISD::INSERT_SUBVECTOR: { - // If we don't demand the inserted subvector, return the base vector. SDValue Vec = Op.getOperand(0); SDValue Sub = Op.getOperand(1); uint64_t Idx = Op.getConstantOperandVal(2); unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); - if (DemandedElts.extractBits(NumSubElts, Idx) == 0) + APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); + // If we don't demand the inserted subvector, return the base vector. + if (DemandedSubElts == 0) return Vec; + // If this simply widens the lowest subvector, see if we can do it earlier. + if (Idx == 0 && Vec.isUndef()) { + if (SDValue NewSub = SimplifyMultipleUseDemandedBits( + Sub, DemandedBits, DemandedSubElts, DAG, Depth + 1)) + return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), + Op.getOperand(0), NewSub, Op.getOperand(2)); + } break; } case ISD::VECTOR_SHUFFLE: { @@ -866,7 +875,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( unsigned Depth) const { EVT VT = Op.getValueType(); APInt DemandedElts = VT.isVector() - ? APInt::getAllOnesValue(VT.getVectorNumElements()) + ? APInt::getAllOnes(VT.getVectorNumElements()) : APInt(1, 1); return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG, Depth); @@ -875,7 +884,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts( SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth) const { - APInt DemandedBits = APInt::getAllOnesValue(Op.getScalarValueSizeInBits()); + APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits()); return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG, Depth); } @@ -942,8 +951,8 @@ bool TargetLowering::SimplifyDemandedBits( } // If this is the root being simplified, allow it to have multiple uses, // just set the DemandedBits/Elts to all bits. - DemandedBits = APInt::getAllOnesValue(BitWidth); - DemandedElts = APInt::getAllOnesValue(NumElts); + DemandedBits = APInt::getAllOnes(BitWidth); + DemandedElts = APInt::getAllOnes(NumElts); } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) { // Not demanding any bits/elts from Op. return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); @@ -1038,7 +1047,7 @@ bool TargetLowering::SimplifyDemandedBits( unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); APInt DemandedSrcElts = DemandedElts; - DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx); + DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx); KnownBits KnownSub, KnownSrc; if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO, @@ -1056,8 +1065,8 @@ bool TargetLowering::SimplifyDemandedBits( Known = KnownBits::commonBits(Known, KnownSrc); // Attempt to avoid multi-use src if we don't need anything from it. - if (!DemandedBits.isAllOnesValue() || !DemandedSubElts.isAllOnesValue() || - !DemandedSrcElts.isAllOnesValue()) { + if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() || + !DemandedSrcElts.isAllOnes()) { SDValue NewSub = SimplifyMultipleUseDemandedBits( Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1); SDValue NewSrc = SimplifyMultipleUseDemandedBits( @@ -1086,7 +1095,7 @@ bool TargetLowering::SimplifyDemandedBits( return true; // Attempt to avoid multi-use src if we don't need anything from it. - if (!DemandedBits.isAllOnesValue() || !DemandedSrcElts.isAllOnesValue()) { + if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) { SDValue DemandedSrc = SimplifyMultipleUseDemandedBits( Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1); if (DemandedSrc) { @@ -1216,7 +1225,7 @@ bool TargetLowering::SimplifyDemandedBits( assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); // Attempt to avoid multi-use ops if we don't need anything from them. - if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { + if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) { SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( @@ -1263,7 +1272,7 @@ bool TargetLowering::SimplifyDemandedBits( assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); // Attempt to avoid multi-use ops if we don't need anything from them. - if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { + if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) { SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( @@ -1306,7 +1315,7 @@ bool TargetLowering::SimplifyDemandedBits( assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); // Attempt to avoid multi-use ops if we don't need anything from them. - if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { + if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) { SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( @@ -1351,8 +1360,7 @@ bool TargetLowering::SimplifyDemandedBits( // If the RHS is a constant, see if we can change it. Don't alter a -1 // constant because that's a 'not' op, and that is better for combining // and codegen. - if (!C->isAllOnesValue() && - DemandedBits.isSubsetOf(C->getAPIntValue())) { + if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) { // We're flipping all demanded bits. Flip the undemanded bits too. SDValue New = TLO.DAG.getNOT(dl, Op0, VT); return TLO.CombineTo(Op, New); @@ -1360,7 +1368,7 @@ bool TargetLowering::SimplifyDemandedBits( } // If we can't turn this into a 'not', try to shrink the constant. - if (!C || !C->isAllOnesValue()) + if (!C || !C->isAllOnes()) if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO)) return true; @@ -1605,7 +1613,7 @@ bool TargetLowering::SimplifyDemandedBits( // always convert this into a logical shr, even if the shift amount is // variable. The low bit of the shift cannot be an input sign bit unless // the shift amount is >= the size of the datatype, which is undefined. - if (DemandedBits.isOneValue()) + if (DemandedBits.isOne()) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1)); if (const APInt *SA = @@ -1655,7 +1663,7 @@ bool TargetLowering::SimplifyDemandedBits( Known.One.setHighBits(ShAmt); // Attempt to avoid multi-use ops if we don't need anything from them. - if (!InDemandedMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { + if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) { SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1); if (DemandedOp0) { @@ -1781,7 +1789,7 @@ bool TargetLowering::SimplifyDemandedBits( // If only 1 bit is demanded, replace with PARITY as long as we're before // op legalization. // FIXME: Limit to scalars for now. - if (DemandedBits.isOneValue() && !TLO.LegalOps && !VT.isVector()) + if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector()) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT, Op.getOperand(0))); @@ -1795,9 +1803,9 @@ bool TargetLowering::SimplifyDemandedBits( // If we only care about the highest bit, don't bother shifting right. if (DemandedBits.isSignMask()) { - unsigned NumSignBits = - TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1); - bool AlreadySignExtended = NumSignBits >= BitWidth - ExVTBits + 1; + unsigned MinSignedBits = + TLO.DAG.ComputeMinSignedBits(Op0, DemandedElts, Depth + 1); + bool AlreadySignExtended = ExVTBits >= MinSignedBits; // However if the input is already sign extended we expect the sign // extension to be dropped altogether later and do not simplify. if (!AlreadySignExtended) { @@ -2071,7 +2079,7 @@ bool TargetLowering::SimplifyDemandedBits( // Demand the bits from every vector element without a constant index. unsigned NumSrcElts = SrcEltCnt.getFixedValue(); - APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts); + APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts); if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx)) if (CIdx->getAPIntValue().ult(NumSrcElts)) DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue()); @@ -2087,8 +2095,7 @@ bool TargetLowering::SimplifyDemandedBits( return true; // Attempt to avoid multi-use ops if we don't need anything from them. - if (!DemandedSrcBits.isAllOnesValue() || - !DemandedSrcElts.isAllOnesValue()) { + if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) { if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits( Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) { SDValue NewOp = @@ -2138,12 +2145,12 @@ bool TargetLowering::SimplifyDemandedBits( TLO.DAG.getDataLayout().isLittleEndian()) { unsigned Scale = BitWidth / NumSrcEltBits; unsigned NumSrcElts = SrcVT.getVectorNumElements(); - APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits); - APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts); + APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits); + APInt DemandedSrcElts = APInt::getZero(NumSrcElts); for (unsigned i = 0; i != Scale; ++i) { unsigned Offset = i * NumSrcEltBits; APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset); - if (!Sub.isNullValue()) { + if (!Sub.isZero()) { DemandedSrcBits |= Sub; for (unsigned j = 0; j != NumElts; ++j) if (DemandedElts[j]) @@ -2164,8 +2171,8 @@ bool TargetLowering::SimplifyDemandedBits( TLO.DAG.getDataLayout().isLittleEndian()) { unsigned Scale = NumSrcEltBits / BitWidth; unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; - APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits); - APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts); + APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits); + APInt DemandedSrcElts = APInt::getZero(NumSrcElts); for (unsigned i = 0; i != NumElts; ++i) if (DemandedElts[i]) { unsigned Offset = (i % Scale) * BitWidth; @@ -2222,7 +2229,7 @@ bool TargetLowering::SimplifyDemandedBits( } // Attempt to avoid multi-use ops if we don't need anything from them. - if (!LoMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { + if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) { SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1); SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( @@ -2245,8 +2252,8 @@ bool TargetLowering::SimplifyDemandedBits( // is probably not useful (and could be detrimental). ConstantSDNode *C = isConstOrConstSplat(Op1); APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ); - if (C && !C->isAllOnesValue() && !C->isOne() && - (C->getAPIntValue() | HighMask).isAllOnesValue()) { + if (C && !C->isAllOnes() && !C->isOne() && + (C->getAPIntValue() | HighMask).isAllOnes()) { SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT); // Disable the nsw and nuw flags. We can no longer guarantee that we // won't wrap after simplification. @@ -2344,7 +2351,7 @@ static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG, return SDValue(); }; - APInt KnownUndef = APInt::getNullValue(NumElts); + APInt KnownUndef = APInt::getZero(NumElts); for (unsigned i = 0; i != NumElts; ++i) { // If both inputs for this element are either constant or undef and match // the element type, compute the constant/undef result for this element of @@ -2371,7 +2378,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( unsigned NumElts = DemandedElts.getBitWidth(); assert(VT.isVector() && "Expected vector op"); - KnownUndef = KnownZero = APInt::getNullValue(NumElts); + KnownUndef = KnownZero = APInt::getZero(NumElts); // TODO: For now we assume we know nothing about scalable vectors. if (VT.isScalableVector()) @@ -2463,17 +2470,13 @@ bool TargetLowering::SimplifyDemandedVectorElts( return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero, TLO, Depth + 1); - APInt SrcZero, SrcUndef; - APInt SrcDemandedElts = APInt::getNullValue(NumSrcElts); + APInt SrcDemandedElts, SrcZero, SrcUndef; // Bitcast from 'large element' src vector to 'small element' vector, we // must demand a source element if any DemandedElt maps to it. if ((NumElts % NumSrcElts) == 0) { unsigned Scale = NumElts / NumSrcElts; - for (unsigned i = 0; i != NumElts; ++i) - if (DemandedElts[i]) - SrcDemandedElts.setBit(i / Scale); - + SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts); if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero, TLO, Depth + 1)) return true; @@ -2483,7 +2486,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( // TODO - bigendian once we have test coverage. if (TLO.DAG.getDataLayout().isLittleEndian()) { unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits(); - APInt SrcDemandedBits = APInt::getNullValue(SrcEltSizeInBits); + APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits); for (unsigned i = 0; i != NumElts; ++i) if (DemandedElts[i]) { unsigned Ofs = (i % Scale) * EltSizeInBits; @@ -2513,10 +2516,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( // of this vector. if ((NumSrcElts % NumElts) == 0) { unsigned Scale = NumSrcElts / NumElts; - for (unsigned i = 0; i != NumElts; ++i) - if (DemandedElts[i]) - SrcDemandedElts.setBits(i * Scale, (i + 1) * Scale); - + SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts); if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero, TLO, Depth + 1)) return true; @@ -2525,9 +2525,9 @@ bool TargetLowering::SimplifyDemandedVectorElts( // the output element will be as well, assuming it was demanded. for (unsigned i = 0; i != NumElts; ++i) { if (DemandedElts[i]) { - if (SrcZero.extractBits(Scale, i * Scale).isAllOnesValue()) + if (SrcZero.extractBits(Scale, i * Scale).isAllOnes()) KnownZero.setBit(i); - if (SrcUndef.extractBits(Scale, i * Scale).isAllOnesValue()) + if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes()) KnownUndef.setBit(i); } } @@ -2536,7 +2536,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( } case ISD::BUILD_VECTOR: { // Check all elements and simplify any unused elements with UNDEF. - if (!DemandedElts.isAllOnesValue()) { + if (!DemandedElts.isAllOnes()) { // Don't simplify BROADCASTS. if (llvm::any_of(Op->op_values(), [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) { @@ -2589,7 +2589,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); APInt DemandedSrcElts = DemandedElts; - DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx); + DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx); APInt SubUndef, SubZero; if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO, @@ -2609,8 +2609,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( KnownZero.insertBits(SubZero, Idx); // Attempt to avoid multi-use ops if we don't need anything from them. - if (!DemandedSrcElts.isAllOnesValue() || - !DemandedSubElts.isAllOnesValue()) { + if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) { SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts( Src, DemandedSrcElts, TLO.DAG, Depth + 1); SDValue NewSub = SimplifyMultipleUseDemandedVectorElts( @@ -2642,7 +2641,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( KnownZero = SrcZero.extractBits(NumElts, Idx); // Attempt to avoid multi-use ops if we don't need anything from them. - if (!DemandedElts.isAllOnesValue()) { + if (!DemandedElts.isAllOnes()) { SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts( Src, DemandedSrcElts, TLO.DAG, Depth + 1); if (NewSrc) { @@ -2810,6 +2809,25 @@ bool TargetLowering::SimplifyDemandedVectorElts( if (DemandedElts.isSubsetOf(KnownUndef)) return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT)); KnownUndef.clearAllBits(); + + // zext - if we just need the bottom element then we can mask: + // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and. + if (DemandedSrcElts == 1 && TLO.DAG.getDataLayout().isLittleEndian() && + Src.getOpcode() == ISD::AND && Op->isOnlyUserOf(Src.getNode()) && + Op.getValueSizeInBits() == Src.getValueSizeInBits()) { + SDLoc DL(Op); + EVT SrcVT = Src.getValueType(); + EVT SrcSVT = SrcVT.getScalarType(); + SmallVector<SDValue> MaskElts; + MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT)); + MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT)); + SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts); + if (SDValue Fold = TLO.DAG.FoldConstantArithmetic( + ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) { + Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold); + return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold)); + } + } } break; } @@ -2842,7 +2860,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( // Attempt to avoid multi-use ops if we don't need anything from them. // TODO - use KnownUndef to relax the demandedelts? - if (!DemandedElts.isAllOnesValue()) + if (!DemandedElts.isAllOnes()) if (SimplifyDemandedVectorEltsBinOp(Op0, Op1)) return true; break; @@ -2869,7 +2887,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( // Attempt to avoid multi-use ops if we don't need anything from them. // TODO - use KnownUndef to relax the demandedelts? - if (!DemandedElts.isAllOnesValue()) + if (!DemandedElts.isAllOnes()) if (SimplifyDemandedVectorEltsBinOp(Op0, Op1)) return true; break; @@ -2897,7 +2915,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( // Attempt to avoid multi-use ops if we don't need anything from them. // TODO - use KnownUndef to relax the demandedelts? - if (!DemandedElts.isAllOnesValue()) + if (!DemandedElts.isAllOnes()) if (SimplifyDemandedVectorEltsBinOp(Op0, Op1)) return true; break; @@ -2923,7 +2941,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( return true; } else { KnownBits Known; - APInt DemandedBits = APInt::getAllOnesValue(EltSizeInBits); + APInt DemandedBits = APInt::getAllOnes(EltSizeInBits); if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known, TLO, Depth, AssumeSingleUse)) return true; @@ -3111,9 +3129,9 @@ bool TargetLowering::isConstTrueVal(const SDNode *N) const { case UndefinedBooleanContent: return CVal[0]; case ZeroOrOneBooleanContent: - return CVal.isOneValue(); + return CVal.isOne(); case ZeroOrNegativeOneBooleanContent: - return CVal.isAllOnesValue(); + return CVal.isAllOnes(); } llvm_unreachable("Invalid boolean contents"); @@ -3140,7 +3158,7 @@ bool TargetLowering::isConstFalseVal(const SDNode *N) const { if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent) return !CN->getAPIntValue()[0]; - return CN->isNullValue(); + return CN->isZero(); } bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT, @@ -3156,7 +3174,7 @@ bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT, return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1)); case TargetLowering::UndefinedBooleanContent: case TargetLowering::ZeroOrNegativeOneBooleanContent: - return N->isAllOnesValue() && SExt; + return N->isAllOnes() && SExt; } llvm_unreachable("Unexpected enumeration."); } @@ -3210,7 +3228,7 @@ SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, // Bail out if the compare operand that we want to turn into a zero is // already a zero (otherwise, infinite loop). auto *YConst = dyn_cast<ConstantSDNode>(Y); - if (YConst && YConst->isNullValue()) + if (YConst && YConst->isZero()) return SDValue(); // Transform this into: ~X & Y == 0. @@ -3325,7 +3343,7 @@ SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift( EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond, DAGCombinerInfo &DCI, const SDLoc &DL) const { assert(isConstOrConstSplat(N1C) && - isConstOrConstSplat(N1C)->getAPIntValue().isNullValue() && + isConstOrConstSplat(N1C)->getAPIntValue().isZero() && "Should be a comparison with 0."); assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Valid only for [in]equality comparisons."); @@ -3548,7 +3566,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an // equality comparison, then we're just comparing whether X itself is // zero. - if (N0.getOpcode() == ISD::SRL && (C1.isNullValue() || C1.isOneValue()) && + if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) && N0.getOperand(0).getOpcode() == ISD::CTLZ && isPowerOf2_32(N0.getScalarValueSizeInBits())) { if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) { @@ -3648,8 +3666,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, (isConstFalseVal(N1C) || isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) { - bool Inverse = (N1C->isNullValue() && Cond == ISD::SETEQ) || - (!N1C->isNullValue() && Cond == ISD::SETNE); + bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) || + (!N1C->isZero() && Cond == ISD::SETNE); if (!Inverse) return TopSetCC; @@ -3800,8 +3818,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // Otherwise, make this a use of a zext. return DAG.getSetCC(dl, VT, ZextOp, DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond); - } else if ((N1C->isNullValue() || N1C->isOne()) && - (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { + } else if ((N1C->isZero() || N1C->isOne()) && + (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC if (N0.getOpcode() == ISD::SETCC && isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) && @@ -3894,7 +3912,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // icmp eq/ne (urem %x, %y), 0 // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem': // icmp eq/ne %x, 0 - if (N0.getOpcode() == ISD::UREM && N1C->isNullValue() && + if (N0.getOpcode() == ISD::UREM && N1C->isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0)); KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1)); @@ -3902,6 +3920,17 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond); } + // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0 + // and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0 + if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) && + N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 && + N1C && N1C->isAllOnes()) { + return DAG.getSetCC(dl, VT, N0.getOperand(0), + DAG.getConstant(0, dl, OpVT), + Cond == ISD::SETEQ ? ISD::SETLT : ISD::SETGE); + } + if (SDValue V = optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl)) return V; @@ -4001,7 +4030,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (Cond == ISD::SETEQ || Cond == ISD::SETNE) { // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0 - if (C1.isNullValue()) + if (C1.isZero()) if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift( VT, N0, N1, Cond, DCI, dl)) return CC; @@ -4010,8 +4039,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // For example, when high 32-bits of i64 X are known clear: // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0 // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1 - bool CmpZero = N1C->getAPIntValue().isNullValue(); - bool CmpNegOne = N1C->getAPIntValue().isAllOnesValue(); + bool CmpZero = N1C->getAPIntValue().isZero(); + bool CmpNegOne = N1C->getAPIntValue().isAllOnes(); if ((CmpZero || CmpNegOne) && N0.hasOneUse()) { // Match or(lo,shl(hi,bw/2)) pattern. auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) { @@ -4140,7 +4169,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, N0.getOpcode() == ISD::AND && N0.hasOneUse()) { if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { const APInt &AndRHSC = AndRHS->getAPIntValue(); - if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) { + if (AndRHSC.isNegatedPowerOf2() && (AndRHSC & C1) == C1) { unsigned ShiftBits = AndRHSC.countTrailingZeros(); if (!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) { SDValue Shift = @@ -4336,7 +4365,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // When division is cheap or optimizing for minimum size, // fall through to DIVREM creation by skipping this fold. - if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize)) { + if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) { if (N0.getOpcode() == ISD::UREM) { if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl)) return Folded; @@ -5050,7 +5079,7 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, SmallVector<SDValue, 16> Shifts, Factors; auto BuildSDIVPattern = [&](ConstantSDNode *C) { - if (C->isNullValue()) + if (C->isZero()) return false; APInt Divisor = C->getAPIntValue(); unsigned Shift = Divisor.countTrailingZeros(); @@ -5152,31 +5181,31 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks; auto BuildSDIVPattern = [&](ConstantSDNode *C) { - if (C->isNullValue()) + if (C->isZero()) return false; const APInt &Divisor = C->getAPIntValue(); - APInt::ms magics = Divisor.magic(); + SignedDivisionByConstantInfo magics = SignedDivisionByConstantInfo::get(Divisor); int NumeratorFactor = 0; int ShiftMask = -1; - if (Divisor.isOneValue() || Divisor.isAllOnesValue()) { + if (Divisor.isOne() || Divisor.isAllOnes()) { // If d is +1/-1, we just multiply the numerator by +1/-1. NumeratorFactor = Divisor.getSExtValue(); - magics.m = 0; - magics.s = 0; + magics.Magic = 0; + magics.ShiftAmount = 0; ShiftMask = 0; - } else if (Divisor.isStrictlyPositive() && magics.m.isNegative()) { + } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) { // If d > 0 and m < 0, add the numerator. NumeratorFactor = 1; - } else if (Divisor.isNegative() && magics.m.isStrictlyPositive()) { + } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) { // If d < 0 and m > 0, subtract the numerator. NumeratorFactor = -1; } - MagicFactors.push_back(DAG.getConstant(magics.m, dl, SVT)); + MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT)); Factors.push_back(DAG.getConstant(NumeratorFactor, dl, SVT)); - Shifts.push_back(DAG.getConstant(magics.s, dl, ShSVT)); + Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT)); ShiftMasks.push_back(DAG.getConstant(ShiftMask, dl, SVT)); return true; }; @@ -5297,33 +5326,33 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors; auto BuildUDIVPattern = [&](ConstantSDNode *C) { - if (C->isNullValue()) + if (C->isZero()) return false; // FIXME: We should use a narrower constant when the upper // bits are known to be zero. const APInt& Divisor = C->getAPIntValue(); - APInt::mu magics = Divisor.magicu(); + UnsignedDivisonByConstantInfo magics = UnsignedDivisonByConstantInfo::get(Divisor); unsigned PreShift = 0, PostShift = 0; // If the divisor is even, we can avoid using the expensive fixup by // shifting the divided value upfront. - if (magics.a != 0 && !Divisor[0]) { + if (magics.IsAdd != 0 && !Divisor[0]) { PreShift = Divisor.countTrailingZeros(); // Get magic number for the shifted divisor. - magics = Divisor.lshr(PreShift).magicu(PreShift); - assert(magics.a == 0 && "Should use cheap fixup now"); + magics = UnsignedDivisonByConstantInfo::get(Divisor.lshr(PreShift), PreShift); + assert(magics.IsAdd == 0 && "Should use cheap fixup now"); } - APInt Magic = magics.m; + APInt Magic = magics.Magic; unsigned SelNPQ; - if (magics.a == 0 || Divisor.isOneValue()) { - assert(magics.s < Divisor.getBitWidth() && + if (magics.IsAdd == 0 || Divisor.isOne()) { + assert(magics.ShiftAmount < Divisor.getBitWidth() && "We shouldn't generate an undefined shift!"); - PostShift = magics.s; + PostShift = magics.ShiftAmount; SelNPQ = false; } else { - PostShift = magics.s - 1; + PostShift = magics.ShiftAmount - 1; SelNPQ = true; } @@ -5331,7 +5360,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, MagicFactors.push_back(DAG.getConstant(Magic, dl, SVT)); NPQFactors.push_back( DAG.getConstant(SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1) - : APInt::getNullValue(EltBits), + : APInt::getZero(EltBits), dl, SVT)); PostShifts.push_back(DAG.getConstant(PostShift, dl, ShSVT)); UseNPQ |= SelNPQ; @@ -5511,13 +5540,13 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) { // Division by 0 is UB. Leave it to be constant-folded elsewhere. - if (CDiv->isNullValue()) + if (CDiv->isZero()) return false; const APInt &D = CDiv->getAPIntValue(); const APInt &Cmp = CCmp->getAPIntValue(); - ComparingWithAllZeros &= Cmp.isNullValue(); + ComparingWithAllZeros &= Cmp.isZero(); // x u% C1` is *always* less than C1. So given `x u% C1 == C2`, // if C2 is not less than C1, the comparison is always false. @@ -5529,26 +5558,26 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, // If all lanes are tautological (either all divisors are ones, or divisor // is not greater than the constant we are comparing with), // we will prefer to avoid the fold. - bool TautologicalLane = D.isOneValue() || TautologicalInvertedLane; + bool TautologicalLane = D.isOne() || TautologicalInvertedLane; HadTautologicalLanes |= TautologicalLane; AllLanesAreTautological &= TautologicalLane; // If we are comparing with non-zero, we need'll need to subtract said // comparison value from the LHS. But there is no point in doing that if // every lane where we are comparing with non-zero is tautological.. - if (!Cmp.isNullValue()) + if (!Cmp.isZero()) AllComparisonsWithNonZerosAreTautological &= TautologicalLane; // Decompose D into D0 * 2^K unsigned K = D.countTrailingZeros(); - assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate."); + assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate."); APInt D0 = D.lshr(K); // D is even if it has trailing zeros. HadEvenDivisor |= (K != 0); // D is a power-of-two if D0 is one. // If all divisors are power-of-two, we will prefer to avoid the fold. - AllDivisorsArePowerOfTwo &= D0.isOneValue(); + AllDivisorsArePowerOfTwo &= D0.isOne(); // P = inv(D0, 2^W) // 2^W requires W + 1 bits, so we have to extend and then truncate. @@ -5556,20 +5585,20 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, APInt P = D0.zext(W + 1) .multiplicativeInverse(APInt::getSignedMinValue(W + 1)) .trunc(W); - assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable - assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check."); + assert(!P.isZero() && "No multiplicative inverse!"); // unreachable + assert((D0 * P).isOne() && "Multiplicative inverse sanity check."); // Q = floor((2^W - 1) u/ D) // R = ((2^W - 1) u% D) APInt Q, R; - APInt::udivrem(APInt::getAllOnesValue(W), D, Q, R); + APInt::udivrem(APInt::getAllOnes(W), D, Q, R); // If we are comparing with zero, then that comparison constant is okay, // else it may need to be one less than that. if (Cmp.ugt(R)) Q -= 1; - assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) && + assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) && "We are expecting that K is always less than all-ones for ShSVT"); // If the lane is tautological the result can be constant-folded. @@ -5752,7 +5781,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, // TODO: Could support comparing with non-zero too. ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode); - if (!CompTarget || !CompTarget->isNullValue()) + if (!CompTarget || !CompTarget->isZero()) return SDValue(); bool HadIntMinDivisor = false; @@ -5765,7 +5794,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, auto BuildSREMPattern = [&](ConstantSDNode *C) { // Division by 0 is UB. Leave it to be constant-folded elsewhere. - if (C->isNullValue()) + if (C->isZero()) return false; // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine. @@ -5778,12 +5807,12 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, HadIntMinDivisor |= D.isMinSignedValue(); // If all divisors are ones, we will prefer to avoid the fold. - HadOneDivisor |= D.isOneValue(); - AllDivisorsAreOnes &= D.isOneValue(); + HadOneDivisor |= D.isOne(); + AllDivisorsAreOnes &= D.isOne(); // Decompose D into D0 * 2^K unsigned K = D.countTrailingZeros(); - assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate."); + assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate."); APInt D0 = D.lshr(K); if (!D.isMinSignedValue()) { @@ -5794,7 +5823,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, // D is a power-of-two if D0 is one. This includes INT_MIN. // If all divisors are power-of-two, we will prefer to avoid the fold. - AllDivisorsArePowerOfTwo &= D0.isOneValue(); + AllDivisorsArePowerOfTwo &= D0.isOne(); // P = inv(D0, 2^W) // 2^W requires W + 1 bits, so we have to extend and then truncate. @@ -5802,8 +5831,8 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, APInt P = D0.zext(W + 1) .multiplicativeInverse(APInt::getSignedMinValue(W + 1)) .trunc(W); - assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable - assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check."); + assert(!P.isZero() && "No multiplicative inverse!"); // unreachable + assert((D0 * P).isOne() && "Multiplicative inverse sanity check."); // A = floor((2^(W - 1) - 1) / D0) & -2^K APInt A = APInt::getSignedMaxValue(W).udiv(D0); @@ -5818,14 +5847,14 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, // Q = floor((2 * A) / (2^K)) APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K)); - assert(APInt::getAllOnesValue(SVT.getSizeInBits()).ugt(A) && + assert(APInt::getAllOnes(SVT.getSizeInBits()).ugt(A) && "We are expecting that A is always less than all-ones for SVT"); - assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) && + assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) && "We are expecting that K is always less than all-ones for ShSVT"); // If the divisor is 1 the result can be constant-folded. Likewise, we // don't care about INT_MIN lanes, those can be set to undef if appropriate. - if (D.isOneValue()) { + if (D.isOne()) { // Set P, A and K to a bogus values so we can try to splat them. P = 0; A = -1; @@ -5951,7 +5980,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue IntMax = DAG.getConstant( APInt::getSignedMaxValue(SVT.getScalarSizeInBits()), DL, VT); SDValue Zero = - DAG.getConstant(APInt::getNullValue(SVT.getScalarSizeInBits()), DL, VT); + DAG.getConstant(APInt::getZero(SVT.getScalarSizeInBits()), DL, VT); // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded. SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ); @@ -6777,7 +6806,7 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, // the destination signmask can't be represented by the float, so we can // just use FP_TO_SINT directly. const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(SrcVT); - APFloat APF(APFSem, APInt::getNullValue(SrcVT.getScalarSizeInBits())); + APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits())); APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits()); if (APFloat::opOverflow & APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) { @@ -6970,8 +6999,18 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node, return SDValue(); } -bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result, - SelectionDAG &DAG) const { +// Only expand vector types if we have the appropriate vector bit operations. +static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) { + assert(VT.isVector() && "Expected vector type"); + unsigned Len = VT.getScalarSizeInBits(); + return TLI.isOperationLegalOrCustom(ISD::ADD, VT) && + TLI.isOperationLegalOrCustom(ISD::SUB, VT) && + TLI.isOperationLegalOrCustom(ISD::SRL, VT) && + (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) && + TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT); +} + +SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const { SDLoc dl(Node); EVT VT = Node->getValueType(0); EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); @@ -6981,15 +7020,11 @@ bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result, // TODO: Add support for irregular type lengths. if (!(Len <= 128 && Len % 8 == 0)) - return false; + return SDValue(); // Only expand vector types if we have the appropriate vector bit operations. - if (VT.isVector() && (!isOperationLegalOrCustom(ISD::ADD, VT) || - !isOperationLegalOrCustom(ISD::SUB, VT) || - !isOperationLegalOrCustom(ISD::SRL, VT) || - (Len != 8 && !isOperationLegalOrCustom(ISD::MUL, VT)) || - !isOperationLegalOrCustomOrPromote(ISD::AND, VT))) - return false; + if (VT.isVector() && !canExpandVectorCTPOP(*this, VT)) + return SDValue(); // This is the "best" algorithm from // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel @@ -7026,12 +7061,10 @@ bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result, DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::MUL, dl, VT, Op, Mask01), DAG.getConstant(Len - 8, dl, ShVT)); - Result = Op; - return true; + return Op; } -bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result, - SelectionDAG &DAG) const { +SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const { SDLoc dl(Node); EVT VT = Node->getValueType(0); EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); @@ -7040,10 +7073,8 @@ bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result, // If the non-ZERO_UNDEF version is supported we can use that instead. if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF && - isOperationLegalOrCustom(ISD::CTLZ, VT)) { - Result = DAG.getNode(ISD::CTLZ, dl, VT, Op); - return true; - } + isOperationLegalOrCustom(ISD::CTLZ, VT)) + return DAG.getNode(ISD::CTLZ, dl, VT, Op); // If the ZERO_UNDEF version is supported use that and handle the zero case. if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) { @@ -7052,17 +7083,18 @@ bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result, SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op); SDValue Zero = DAG.getConstant(0, dl, VT); SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ); - Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero, + return DAG.getSelect(dl, VT, SrcIsZero, DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ); - return true; } // Only expand vector types if we have the appropriate vector bit operations. + // This includes the operations needed to expand CTPOP if it isn't supported. if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) || - !isOperationLegalOrCustom(ISD::CTPOP, VT) || + (!isOperationLegalOrCustom(ISD::CTPOP, VT) && + !canExpandVectorCTPOP(*this, VT)) || !isOperationLegalOrCustom(ISD::SRL, VT) || !isOperationLegalOrCustomOrPromote(ISD::OR, VT))) - return false; + return SDValue(); // for now, we do this: // x = x | (x >> 1); @@ -7079,12 +7111,10 @@ bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result, DAG.getNode(ISD::SRL, dl, VT, Op, Tmp)); } Op = DAG.getNOT(dl, Op, VT); - Result = DAG.getNode(ISD::CTPOP, dl, VT, Op); - return true; + return DAG.getNode(ISD::CTPOP, dl, VT, Op); } -bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result, - SelectionDAG &DAG) const { +SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const { SDLoc dl(Node); EVT VT = Node->getValueType(0); SDValue Op = Node->getOperand(0); @@ -7092,10 +7122,8 @@ bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result, // If the non-ZERO_UNDEF version is supported we can use that instead. if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF && - isOperationLegalOrCustom(ISD::CTTZ, VT)) { - Result = DAG.getNode(ISD::CTTZ, dl, VT, Op); - return true; - } + isOperationLegalOrCustom(ISD::CTTZ, VT)) + return DAG.getNode(ISD::CTTZ, dl, VT, Op); // If the ZERO_UNDEF version is supported use that and handle the zero case. if (isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) { @@ -7104,19 +7132,20 @@ bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result, SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op); SDValue Zero = DAG.getConstant(0, dl, VT); SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ); - Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero, + return DAG.getSelect(dl, VT, SrcIsZero, DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ); - return true; } // Only expand vector types if we have the appropriate vector bit operations. + // This includes the operations needed to expand CTPOP if it isn't supported. if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) || (!isOperationLegalOrCustom(ISD::CTPOP, VT) && - !isOperationLegalOrCustom(ISD::CTLZ, VT)) || + !isOperationLegalOrCustom(ISD::CTLZ, VT) && + !canExpandVectorCTPOP(*this, VT)) || !isOperationLegalOrCustom(ISD::SUB, VT) || !isOperationLegalOrCustomOrPromote(ISD::AND, VT) || !isOperationLegalOrCustomOrPromote(ISD::XOR, VT))) - return false; + return SDValue(); // for now, we use: { return popcount(~x & (x - 1)); } // unless the target has ctlz but not ctpop, in which case we use: @@ -7128,18 +7157,15 @@ bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result, // If ISD::CTLZ is legal and CTPOP isn't, then do that instead. if (isOperationLegal(ISD::CTLZ, VT) && !isOperationLegal(ISD::CTPOP, VT)) { - Result = - DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT), - DAG.getNode(ISD::CTLZ, dl, VT, Tmp)); - return true; + return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT), + DAG.getNode(ISD::CTLZ, dl, VT, Tmp)); } - Result = DAG.getNode(ISD::CTPOP, dl, VT, Tmp); - return true; + return DAG.getNode(ISD::CTPOP, dl, VT, Tmp); } -bool TargetLowering::expandABS(SDNode *N, SDValue &Result, - SelectionDAG &DAG, bool IsNegative) const { +SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG, + bool IsNegative) const { SDLoc dl(N); EVT VT = N->getValueType(0); EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); @@ -7149,27 +7175,24 @@ bool TargetLowering::expandABS(SDNode *N, SDValue &Result, if (!IsNegative && isOperationLegal(ISD::SUB, VT) && isOperationLegal(ISD::SMAX, VT)) { SDValue Zero = DAG.getConstant(0, dl, VT); - Result = DAG.getNode(ISD::SMAX, dl, VT, Op, - DAG.getNode(ISD::SUB, dl, VT, Zero, Op)); - return true; + return DAG.getNode(ISD::SMAX, dl, VT, Op, + DAG.getNode(ISD::SUB, dl, VT, Zero, Op)); } // abs(x) -> umin(x,sub(0,x)) if (!IsNegative && isOperationLegal(ISD::SUB, VT) && isOperationLegal(ISD::UMIN, VT)) { SDValue Zero = DAG.getConstant(0, dl, VT); - Result = DAG.getNode(ISD::UMIN, dl, VT, Op, - DAG.getNode(ISD::SUB, dl, VT, Zero, Op)); - return true; + return DAG.getNode(ISD::UMIN, dl, VT, Op, + DAG.getNode(ISD::SUB, dl, VT, Zero, Op)); } // 0 - abs(x) -> smin(x, sub(0,x)) if (IsNegative && isOperationLegal(ISD::SUB, VT) && isOperationLegal(ISD::SMIN, VT)) { SDValue Zero = DAG.getConstant(0, dl, VT); - Result = DAG.getNode(ISD::SMIN, dl, VT, Op, - DAG.getNode(ISD::SUB, dl, VT, Zero, Op)); - return true; + return DAG.getNode(ISD::SMIN, dl, VT, Op, + DAG.getNode(ISD::SUB, dl, VT, Zero, Op)); } // Only expand vector types if we have the appropriate vector operations. @@ -7178,20 +7201,19 @@ bool TargetLowering::expandABS(SDNode *N, SDValue &Result, (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) || (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) || !isOperationLegalOrCustomOrPromote(ISD::XOR, VT))) - return false; + return SDValue(); SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, Op, DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT)); if (!IsNegative) { SDValue Add = DAG.getNode(ISD::ADD, dl, VT, Op, Shift); - Result = DAG.getNode(ISD::XOR, dl, VT, Add, Shift); - } else { - // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y)) - SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift); - Result = DAG.getNode(ISD::SUB, dl, VT, Shift, Xor); + return DAG.getNode(ISD::XOR, dl, VT, Add, Shift); } - return true; + + // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y)) + SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift); + return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor); } SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const { @@ -7266,34 +7288,31 @@ SDValue TargetLowering::expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const { // TODO: We can easily support i4/i2 legal types if any target ever does. if (Sz >= 8 && isPowerOf2_32(Sz)) { // Create the masks - repeating the pattern every byte. - APInt MaskHi4 = APInt::getSplat(Sz, APInt(8, 0xF0)); - APInt MaskHi2 = APInt::getSplat(Sz, APInt(8, 0xCC)); - APInt MaskHi1 = APInt::getSplat(Sz, APInt(8, 0xAA)); - APInt MaskLo4 = APInt::getSplat(Sz, APInt(8, 0x0F)); - APInt MaskLo2 = APInt::getSplat(Sz, APInt(8, 0x33)); - APInt MaskLo1 = APInt::getSplat(Sz, APInt(8, 0x55)); + APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F)); + APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33)); + APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55)); // BSWAP if the type is wider than a single byte. Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op); - // swap i4: ((V & 0xF0) >> 4) | ((V & 0x0F) << 4) - Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi4, dl, VT)); - Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo4, dl, VT)); - Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(4, dl, SHVT)); + // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4) + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT)); + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT)); + Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT)); Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT)); Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); - // swap i2: ((V & 0xCC) >> 2) | ((V & 0x33) << 2) - Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi2, dl, VT)); - Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo2, dl, VT)); - Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(2, dl, SHVT)); + // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2) + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT)); + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT)); + Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT)); Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT)); Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); - // swap i1: ((V & 0xAA) >> 1) | ((V & 0x55) << 1) - Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi1, dl, VT)); - Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo1, dl, VT)); - Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(1, dl, SHVT)); + // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1) + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT)); + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT)); + Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT)); Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT)); Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); return Tmp; @@ -7803,13 +7822,15 @@ TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask, static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx, EVT VecVT, const SDLoc &dl, - unsigned NumSubElts) { - if (!VecVT.isScalableVector() && isa<ConstantSDNode>(Idx)) - return Idx; + ElementCount SubEC) { + assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) && + "Cannot index a scalable vector within a fixed-width vector"); - EVT IdxVT = Idx.getValueType(); unsigned NElts = VecVT.getVectorMinNumElements(); - if (VecVT.isScalableVector()) { + unsigned NumSubElts = SubEC.getKnownMinValue(); + EVT IdxVT = Idx.getValueType(); + + if (VecVT.isScalableVector() && !SubEC.isScalable()) { // If this is a constant index and we know the value plus the number of the // elements in the subvector minus one is less than the minimum number of // elements then it's safe to return Idx. @@ -7856,16 +7877,16 @@ SDValue TargetLowering::getVectorSubVecPointer(SelectionDAG &DAG, unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size. assert(EltSize * 8 == EltVT.getFixedSizeInBits() && "Converting bits to bytes lost precision"); - - // Scalable vectors don't need clamping as these are checked at compile time - if (SubVecVT.isFixedLengthVector()) { - assert(SubVecVT.getVectorElementType() == EltVT && - "Sub-vector must be a fixed vector with matching element type"); - Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl, - SubVecVT.getVectorNumElements()); - } + assert(SubVecVT.getVectorElementType() == EltVT && + "Sub-vector must be a vector with matching element type"); + Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl, + SubVecVT.getVectorElementCount()); EVT IdxVT = Index.getValueType(); + if (SubVecVT.isScalableVector()) + Index = + DAG.getNode(ISD::MUL, dl, IdxVT, Index, + DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1))); Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index, DAG.getConstant(EltSize, dl, IdxVT)); @@ -7921,7 +7942,7 @@ SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op, ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); SDLoc dl(Op); if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { - if (C->isNullValue() && CC == ISD::SETEQ) { + if (C->isZero() && CC == ISD::SETEQ) { EVT VT = Op.getOperand(0).getValueType(); SDValue Zext = Op.getOperand(0); if (VT.bitsLT(MVT::i32)) { @@ -7949,10 +7970,8 @@ TargetLowering::getCanonicalIndexType(ISD::MemIndexType IndexType, EVT MemVT, (IndexType == ISD::SIGNED_SCALED) || (IndexType == ISD::SIGNED_UNSCALED); // Scaling is unimportant for bytes, canonicalize to unscaled. - if (IsScaledIndex && MemVT.getScalarType() == MVT::i8) { - IsScaledIndex = false; - IndexType = IsSignedIndex ? ISD::SIGNED_UNSCALED : ISD::UNSIGNED_UNSCALED; - } + if (IsScaledIndex && MemVT.getScalarType() == MVT::i8) + return IsSignedIndex ? ISD::SIGNED_UNSCALED : ISD::UNSIGNED_UNSCALED; return IndexType; } @@ -8073,14 +8092,12 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const { return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff); } - // SatMax -> Overflow && SumDiff < 0 - // SatMin -> Overflow && SumDiff >= 0 + // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff APInt MinVal = APInt::getSignedMinValue(BitWidth); - APInt MaxVal = APInt::getSignedMaxValue(BitWidth); SDValue SatMin = DAG.getConstant(MinVal, dl, VT); - SDValue SatMax = DAG.getConstant(MaxVal, dl, VT); - SDValue SumNeg = DAG.getSetCC(dl, BoolVT, SumDiff, Zero, ISD::SETLT); - Result = DAG.getSelect(dl, VT, SumNeg, SatMax, SatMin); + SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff, + DAG.getConstant(BitWidth - 1, dl, VT)); + Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin); return DAG.getSelect(dl, VT, Overflow, Result, SumDiff); } @@ -8394,7 +8411,7 @@ void TargetLowering::expandSADDSUBO( // If SADDSAT/SSUBSAT is legal, compare results to detect overflow. unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT; - if (isOperationLegalOrCustom(OpcSat, LHS.getValueType())) { + if (isOperationLegal(OpcSat, LHS.getValueType())) { SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS); SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE); Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType); @@ -8447,8 +8464,8 @@ bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result, EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2); if (VT.isVector()) - WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT, - VT.getVectorNumElements()); + WideVT = + EVT::getVectorVT(*DAG.getContext(), WideVT, VT.getVectorElementCount()); SDValue BottomHalf; SDValue TopHalf; |