aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2022-03-20 11:40:34 +0000
committerDimitry Andric <dim@FreeBSD.org>2022-05-14 11:43:05 +0000
commit349cc55c9796c4596a5b9904cd3281af295f878f (patch)
tree410c5a785075730a35f1272ca6a7adf72222ad03 /contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
parentcb2ae6163174b90e999326ecec3699ee093a5d43 (diff)
parentc0981da47d5696fe36474fcf86b4ce03ae3ff818 (diff)
Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp471
1 files changed, 244 insertions, 227 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 7f80ce37e28a..e4a69adff05b 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -26,6 +26,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/Support/DivisionByConstantInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
@@ -537,7 +538,7 @@ bool TargetLowering::ShrinkDemandedConstant(SDValue Op,
TargetLoweringOpt &TLO) const {
EVT VT = Op.getValueType();
APInt DemandedElts = VT.isVector()
- ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ ? APInt::getAllOnes(VT.getVectorNumElements())
: APInt(1, 1);
return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO);
}
@@ -621,7 +622,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
}
APInt DemandedElts = VT.isVector()
- ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ ? APInt::getAllOnes(VT.getVectorNumElements())
: APInt(1, 1);
return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth,
AssumeSingleUse);
@@ -667,12 +668,12 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
DAG.getDataLayout().isLittleEndian()) {
unsigned Scale = NumDstEltBits / NumSrcEltBits;
unsigned NumSrcElts = SrcVT.getVectorNumElements();
- APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
- APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
+ APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
+ APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
for (unsigned i = 0; i != Scale; ++i) {
unsigned Offset = i * NumSrcEltBits;
APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
- if (!Sub.isNullValue()) {
+ if (!Sub.isZero()) {
DemandedSrcBits |= Sub;
for (unsigned j = 0; j != NumElts; ++j)
if (DemandedElts[j])
@@ -690,8 +691,8 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
DAG.getDataLayout().isLittleEndian()) {
unsigned Scale = NumSrcEltBits / NumDstEltBits;
unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
- APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
- APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
+ APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
+ APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
for (unsigned i = 0; i != NumElts; ++i)
if (DemandedElts[i]) {
unsigned Offset = (i % Scale) * NumDstEltBits;
@@ -819,13 +820,21 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
break;
}
case ISD::INSERT_SUBVECTOR: {
- // If we don't demand the inserted subvector, return the base vector.
SDValue Vec = Op.getOperand(0);
SDValue Sub = Op.getOperand(1);
uint64_t Idx = Op.getConstantOperandVal(2);
unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
- if (DemandedElts.extractBits(NumSubElts, Idx) == 0)
+ APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
+ // If we don't demand the inserted subvector, return the base vector.
+ if (DemandedSubElts == 0)
return Vec;
+ // If this simply widens the lowest subvector, see if we can do it earlier.
+ if (Idx == 0 && Vec.isUndef()) {
+ if (SDValue NewSub = SimplifyMultipleUseDemandedBits(
+ Sub, DemandedBits, DemandedSubElts, DAG, Depth + 1))
+ return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
+ Op.getOperand(0), NewSub, Op.getOperand(2));
+ }
break;
}
case ISD::VECTOR_SHUFFLE: {
@@ -866,7 +875,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
unsigned Depth) const {
EVT VT = Op.getValueType();
APInt DemandedElts = VT.isVector()
- ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ ? APInt::getAllOnes(VT.getVectorNumElements())
: APInt(1, 1);
return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
Depth);
@@ -875,7 +884,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts(
SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG,
unsigned Depth) const {
- APInt DemandedBits = APInt::getAllOnesValue(Op.getScalarValueSizeInBits());
+ APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits());
return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG,
Depth);
}
@@ -942,8 +951,8 @@ bool TargetLowering::SimplifyDemandedBits(
}
// If this is the root being simplified, allow it to have multiple uses,
// just set the DemandedBits/Elts to all bits.
- DemandedBits = APInt::getAllOnesValue(BitWidth);
- DemandedElts = APInt::getAllOnesValue(NumElts);
+ DemandedBits = APInt::getAllOnes(BitWidth);
+ DemandedElts = APInt::getAllOnes(NumElts);
} else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
// Not demanding any bits/elts from Op.
return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
@@ -1038,7 +1047,7 @@ bool TargetLowering::SimplifyDemandedBits(
unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
APInt DemandedSrcElts = DemandedElts;
- DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx);
+ DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
KnownBits KnownSub, KnownSrc;
if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO,
@@ -1056,8 +1065,8 @@ bool TargetLowering::SimplifyDemandedBits(
Known = KnownBits::commonBits(Known, KnownSrc);
// Attempt to avoid multi-use src if we don't need anything from it.
- if (!DemandedBits.isAllOnesValue() || !DemandedSubElts.isAllOnesValue() ||
- !DemandedSrcElts.isAllOnesValue()) {
+ if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() ||
+ !DemandedSrcElts.isAllOnes()) {
SDValue NewSub = SimplifyMultipleUseDemandedBits(
Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1);
SDValue NewSrc = SimplifyMultipleUseDemandedBits(
@@ -1086,7 +1095,7 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
// Attempt to avoid multi-use src if we don't need anything from it.
- if (!DemandedBits.isAllOnesValue() || !DemandedSrcElts.isAllOnesValue()) {
+ if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1);
if (DemandedSrc) {
@@ -1216,7 +1225,7 @@ bool TargetLowering::SimplifyDemandedBits(
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// Attempt to avoid multi-use ops if we don't need anything from them.
- if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
@@ -1263,7 +1272,7 @@ bool TargetLowering::SimplifyDemandedBits(
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// Attempt to avoid multi-use ops if we don't need anything from them.
- if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
@@ -1306,7 +1315,7 @@ bool TargetLowering::SimplifyDemandedBits(
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
// Attempt to avoid multi-use ops if we don't need anything from them.
- if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
@@ -1351,8 +1360,7 @@ bool TargetLowering::SimplifyDemandedBits(
// If the RHS is a constant, see if we can change it. Don't alter a -1
// constant because that's a 'not' op, and that is better for combining
// and codegen.
- if (!C->isAllOnesValue() &&
- DemandedBits.isSubsetOf(C->getAPIntValue())) {
+ if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) {
// We're flipping all demanded bits. Flip the undemanded bits too.
SDValue New = TLO.DAG.getNOT(dl, Op0, VT);
return TLO.CombineTo(Op, New);
@@ -1360,7 +1368,7 @@ bool TargetLowering::SimplifyDemandedBits(
}
// If we can't turn this into a 'not', try to shrink the constant.
- if (!C || !C->isAllOnesValue())
+ if (!C || !C->isAllOnes())
if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO))
return true;
@@ -1605,7 +1613,7 @@ bool TargetLowering::SimplifyDemandedBits(
// always convert this into a logical shr, even if the shift amount is
// variable. The low bit of the shift cannot be an input sign bit unless
// the shift amount is >= the size of the datatype, which is undefined.
- if (DemandedBits.isOneValue())
+ if (DemandedBits.isOne())
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
if (const APInt *SA =
@@ -1655,7 +1663,7 @@ bool TargetLowering::SimplifyDemandedBits(
Known.One.setHighBits(ShAmt);
// Attempt to avoid multi-use ops if we don't need anything from them.
- if (!InDemandedMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1);
if (DemandedOp0) {
@@ -1781,7 +1789,7 @@ bool TargetLowering::SimplifyDemandedBits(
// If only 1 bit is demanded, replace with PARITY as long as we're before
// op legalization.
// FIXME: Limit to scalars for now.
- if (DemandedBits.isOneValue() && !TLO.LegalOps && !VT.isVector())
+ if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector())
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
Op.getOperand(0)));
@@ -1795,9 +1803,9 @@ bool TargetLowering::SimplifyDemandedBits(
// If we only care about the highest bit, don't bother shifting right.
if (DemandedBits.isSignMask()) {
- unsigned NumSignBits =
- TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1);
- bool AlreadySignExtended = NumSignBits >= BitWidth - ExVTBits + 1;
+ unsigned MinSignedBits =
+ TLO.DAG.ComputeMinSignedBits(Op0, DemandedElts, Depth + 1);
+ bool AlreadySignExtended = ExVTBits >= MinSignedBits;
// However if the input is already sign extended we expect the sign
// extension to be dropped altogether later and do not simplify.
if (!AlreadySignExtended) {
@@ -2071,7 +2079,7 @@ bool TargetLowering::SimplifyDemandedBits(
// Demand the bits from every vector element without a constant index.
unsigned NumSrcElts = SrcEltCnt.getFixedValue();
- APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts);
+ APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts);
if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx))
if (CIdx->getAPIntValue().ult(NumSrcElts))
DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue());
@@ -2087,8 +2095,7 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
// Attempt to avoid multi-use ops if we don't need anything from them.
- if (!DemandedSrcBits.isAllOnesValue() ||
- !DemandedSrcElts.isAllOnesValue()) {
+ if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) {
if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
SDValue NewOp =
@@ -2138,12 +2145,12 @@ bool TargetLowering::SimplifyDemandedBits(
TLO.DAG.getDataLayout().isLittleEndian()) {
unsigned Scale = BitWidth / NumSrcEltBits;
unsigned NumSrcElts = SrcVT.getVectorNumElements();
- APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
- APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
+ APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
+ APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
for (unsigned i = 0; i != Scale; ++i) {
unsigned Offset = i * NumSrcEltBits;
APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
- if (!Sub.isNullValue()) {
+ if (!Sub.isZero()) {
DemandedSrcBits |= Sub;
for (unsigned j = 0; j != NumElts; ++j)
if (DemandedElts[j])
@@ -2164,8 +2171,8 @@ bool TargetLowering::SimplifyDemandedBits(
TLO.DAG.getDataLayout().isLittleEndian()) {
unsigned Scale = NumSrcEltBits / BitWidth;
unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
- APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
- APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
+ APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits);
+ APInt DemandedSrcElts = APInt::getZero(NumSrcElts);
for (unsigned i = 0; i != NumElts; ++i)
if (DemandedElts[i]) {
unsigned Offset = (i % Scale) * BitWidth;
@@ -2222,7 +2229,7 @@ bool TargetLowering::SimplifyDemandedBits(
}
// Attempt to avoid multi-use ops if we don't need anything from them.
- if (!LoMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) {
SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
@@ -2245,8 +2252,8 @@ bool TargetLowering::SimplifyDemandedBits(
// is probably not useful (and could be detrimental).
ConstantSDNode *C = isConstOrConstSplat(Op1);
APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ);
- if (C && !C->isAllOnesValue() && !C->isOne() &&
- (C->getAPIntValue() | HighMask).isAllOnesValue()) {
+ if (C && !C->isAllOnes() && !C->isOne() &&
+ (C->getAPIntValue() | HighMask).isAllOnes()) {
SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
// Disable the nsw and nuw flags. We can no longer guarantee that we
// won't wrap after simplification.
@@ -2344,7 +2351,7 @@ static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
return SDValue();
};
- APInt KnownUndef = APInt::getNullValue(NumElts);
+ APInt KnownUndef = APInt::getZero(NumElts);
for (unsigned i = 0; i != NumElts; ++i) {
// If both inputs for this element are either constant or undef and match
// the element type, compute the constant/undef result for this element of
@@ -2371,7 +2378,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
unsigned NumElts = DemandedElts.getBitWidth();
assert(VT.isVector() && "Expected vector op");
- KnownUndef = KnownZero = APInt::getNullValue(NumElts);
+ KnownUndef = KnownZero = APInt::getZero(NumElts);
// TODO: For now we assume we know nothing about scalable vectors.
if (VT.isScalableVector())
@@ -2463,17 +2470,13 @@ bool TargetLowering::SimplifyDemandedVectorElts(
return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
KnownZero, TLO, Depth + 1);
- APInt SrcZero, SrcUndef;
- APInt SrcDemandedElts = APInt::getNullValue(NumSrcElts);
+ APInt SrcDemandedElts, SrcZero, SrcUndef;
// Bitcast from 'large element' src vector to 'small element' vector, we
// must demand a source element if any DemandedElt maps to it.
if ((NumElts % NumSrcElts) == 0) {
unsigned Scale = NumElts / NumSrcElts;
- for (unsigned i = 0; i != NumElts; ++i)
- if (DemandedElts[i])
- SrcDemandedElts.setBit(i / Scale);
-
+ SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
TLO, Depth + 1))
return true;
@@ -2483,7 +2486,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// TODO - bigendian once we have test coverage.
if (TLO.DAG.getDataLayout().isLittleEndian()) {
unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits();
- APInt SrcDemandedBits = APInt::getNullValue(SrcEltSizeInBits);
+ APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits);
for (unsigned i = 0; i != NumElts; ++i)
if (DemandedElts[i]) {
unsigned Ofs = (i % Scale) * EltSizeInBits;
@@ -2513,10 +2516,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// of this vector.
if ((NumSrcElts % NumElts) == 0) {
unsigned Scale = NumSrcElts / NumElts;
- for (unsigned i = 0; i != NumElts; ++i)
- if (DemandedElts[i])
- SrcDemandedElts.setBits(i * Scale, (i + 1) * Scale);
-
+ SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts);
if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
TLO, Depth + 1))
return true;
@@ -2525,9 +2525,9 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// the output element will be as well, assuming it was demanded.
for (unsigned i = 0; i != NumElts; ++i) {
if (DemandedElts[i]) {
- if (SrcZero.extractBits(Scale, i * Scale).isAllOnesValue())
+ if (SrcZero.extractBits(Scale, i * Scale).isAllOnes())
KnownZero.setBit(i);
- if (SrcUndef.extractBits(Scale, i * Scale).isAllOnesValue())
+ if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes())
KnownUndef.setBit(i);
}
}
@@ -2536,7 +2536,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
}
case ISD::BUILD_VECTOR: {
// Check all elements and simplify any unused elements with UNDEF.
- if (!DemandedElts.isAllOnesValue()) {
+ if (!DemandedElts.isAllOnes()) {
// Don't simplify BROADCASTS.
if (llvm::any_of(Op->op_values(),
[&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
@@ -2589,7 +2589,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
APInt DemandedSrcElts = DemandedElts;
- DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx);
+ DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx);
APInt SubUndef, SubZero;
if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO,
@@ -2609,8 +2609,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
KnownZero.insertBits(SubZero, Idx);
// Attempt to avoid multi-use ops if we don't need anything from them.
- if (!DemandedSrcElts.isAllOnesValue() ||
- !DemandedSubElts.isAllOnesValue()) {
+ if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) {
SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
Src, DemandedSrcElts, TLO.DAG, Depth + 1);
SDValue NewSub = SimplifyMultipleUseDemandedVectorElts(
@@ -2642,7 +2641,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
KnownZero = SrcZero.extractBits(NumElts, Idx);
// Attempt to avoid multi-use ops if we don't need anything from them.
- if (!DemandedElts.isAllOnesValue()) {
+ if (!DemandedElts.isAllOnes()) {
SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts(
Src, DemandedSrcElts, TLO.DAG, Depth + 1);
if (NewSrc) {
@@ -2810,6 +2809,25 @@ bool TargetLowering::SimplifyDemandedVectorElts(
if (DemandedElts.isSubsetOf(KnownUndef))
return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT));
KnownUndef.clearAllBits();
+
+ // zext - if we just need the bottom element then we can mask:
+ // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and.
+ if (DemandedSrcElts == 1 && TLO.DAG.getDataLayout().isLittleEndian() &&
+ Src.getOpcode() == ISD::AND && Op->isOnlyUserOf(Src.getNode()) &&
+ Op.getValueSizeInBits() == Src.getValueSizeInBits()) {
+ SDLoc DL(Op);
+ EVT SrcVT = Src.getValueType();
+ EVT SrcSVT = SrcVT.getScalarType();
+ SmallVector<SDValue> MaskElts;
+ MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT));
+ MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT));
+ SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts);
+ if (SDValue Fold = TLO.DAG.FoldConstantArithmetic(
+ ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) {
+ Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold);
+ return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold));
+ }
+ }
}
break;
}
@@ -2842,7 +2860,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// Attempt to avoid multi-use ops if we don't need anything from them.
// TODO - use KnownUndef to relax the demandedelts?
- if (!DemandedElts.isAllOnesValue())
+ if (!DemandedElts.isAllOnes())
if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
return true;
break;
@@ -2869,7 +2887,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// Attempt to avoid multi-use ops if we don't need anything from them.
// TODO - use KnownUndef to relax the demandedelts?
- if (!DemandedElts.isAllOnesValue())
+ if (!DemandedElts.isAllOnes())
if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
return true;
break;
@@ -2897,7 +2915,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// Attempt to avoid multi-use ops if we don't need anything from them.
// TODO - use KnownUndef to relax the demandedelts?
- if (!DemandedElts.isAllOnesValue())
+ if (!DemandedElts.isAllOnes())
if (SimplifyDemandedVectorEltsBinOp(Op0, Op1))
return true;
break;
@@ -2923,7 +2941,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
return true;
} else {
KnownBits Known;
- APInt DemandedBits = APInt::getAllOnesValue(EltSizeInBits);
+ APInt DemandedBits = APInt::getAllOnes(EltSizeInBits);
if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
TLO, Depth, AssumeSingleUse))
return true;
@@ -3111,9 +3129,9 @@ bool TargetLowering::isConstTrueVal(const SDNode *N) const {
case UndefinedBooleanContent:
return CVal[0];
case ZeroOrOneBooleanContent:
- return CVal.isOneValue();
+ return CVal.isOne();
case ZeroOrNegativeOneBooleanContent:
- return CVal.isAllOnesValue();
+ return CVal.isAllOnes();
}
llvm_unreachable("Invalid boolean contents");
@@ -3140,7 +3158,7 @@ bool TargetLowering::isConstFalseVal(const SDNode *N) const {
if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
return !CN->getAPIntValue()[0];
- return CN->isNullValue();
+ return CN->isZero();
}
bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
@@ -3156,7 +3174,7 @@ bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
case TargetLowering::UndefinedBooleanContent:
case TargetLowering::ZeroOrNegativeOneBooleanContent:
- return N->isAllOnesValue() && SExt;
+ return N->isAllOnes() && SExt;
}
llvm_unreachable("Unexpected enumeration.");
}
@@ -3210,7 +3228,7 @@ SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
// Bail out if the compare operand that we want to turn into a zero is
// already a zero (otherwise, infinite loop).
auto *YConst = dyn_cast<ConstantSDNode>(Y);
- if (YConst && YConst->isNullValue())
+ if (YConst && YConst->isZero())
return SDValue();
// Transform this into: ~X & Y == 0.
@@ -3325,7 +3343,7 @@ SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
DAGCombinerInfo &DCI, const SDLoc &DL) const {
assert(isConstOrConstSplat(N1C) &&
- isConstOrConstSplat(N1C)->getAPIntValue().isNullValue() &&
+ isConstOrConstSplat(N1C)->getAPIntValue().isZero() &&
"Should be a comparison with 0.");
assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
"Valid only for [in]equality comparisons.");
@@ -3548,7 +3566,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
// equality comparison, then we're just comparing whether X itself is
// zero.
- if (N0.getOpcode() == ISD::SRL && (C1.isNullValue() || C1.isOneValue()) &&
+ if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) &&
N0.getOperand(0).getOpcode() == ISD::CTLZ &&
isPowerOf2_32(N0.getScalarValueSizeInBits())) {
if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) {
@@ -3648,8 +3666,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
(isConstFalseVal(N1C) ||
isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
- bool Inverse = (N1C->isNullValue() && Cond == ISD::SETEQ) ||
- (!N1C->isNullValue() && Cond == ISD::SETNE);
+ bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) ||
+ (!N1C->isZero() && Cond == ISD::SETNE);
if (!Inverse)
return TopSetCC;
@@ -3800,8 +3818,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// Otherwise, make this a use of a zext.
return DAG.getSetCC(dl, VT, ZextOp,
DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond);
- } else if ((N1C->isNullValue() || N1C->isOne()) &&
- (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+ } else if ((N1C->isZero() || N1C->isOne()) &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
// SETCC (SETCC), [0|1], [EQ|NE] -> SETCC
if (N0.getOpcode() == ISD::SETCC &&
isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) &&
@@ -3894,7 +3912,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// icmp eq/ne (urem %x, %y), 0
// Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
// icmp eq/ne %x, 0
- if (N0.getOpcode() == ISD::UREM && N1C->isNullValue() &&
+ if (N0.getOpcode() == ISD::UREM && N1C->isZero() &&
(Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
@@ -3902,6 +3920,17 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
}
+ // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0
+ // and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0
+ if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) &&
+ N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 &&
+ N1C && N1C->isAllOnes()) {
+ return DAG.getSetCC(dl, VT, N0.getOperand(0),
+ DAG.getConstant(0, dl, OpVT),
+ Cond == ISD::SETEQ ? ISD::SETLT : ISD::SETGE);
+ }
+
if (SDValue V =
optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
return V;
@@ -4001,7 +4030,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
- if (C1.isNullValue())
+ if (C1.isZero())
if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
VT, N0, N1, Cond, DCI, dl))
return CC;
@@ -4010,8 +4039,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// For example, when high 32-bits of i64 X are known clear:
// all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
// all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
- bool CmpZero = N1C->getAPIntValue().isNullValue();
- bool CmpNegOne = N1C->getAPIntValue().isAllOnesValue();
+ bool CmpZero = N1C->getAPIntValue().isZero();
+ bool CmpNegOne = N1C->getAPIntValue().isAllOnes();
if ((CmpZero || CmpNegOne) && N0.hasOneUse()) {
// Match or(lo,shl(hi,bw/2)) pattern.
auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) {
@@ -4140,7 +4169,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
const APInt &AndRHSC = AndRHS->getAPIntValue();
- if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
+ if (AndRHSC.isNegatedPowerOf2() && (AndRHSC & C1) == C1) {
unsigned ShiftBits = AndRHSC.countTrailingZeros();
if (!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) {
SDValue Shift =
@@ -4336,7 +4365,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// When division is cheap or optimizing for minimum size,
// fall through to DIVREM creation by skipping this fold.
- if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize)) {
+ if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) {
if (N0.getOpcode() == ISD::UREM) {
if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
return Folded;
@@ -5050,7 +5079,7 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
SmallVector<SDValue, 16> Shifts, Factors;
auto BuildSDIVPattern = [&](ConstantSDNode *C) {
- if (C->isNullValue())
+ if (C->isZero())
return false;
APInt Divisor = C->getAPIntValue();
unsigned Shift = Divisor.countTrailingZeros();
@@ -5152,31 +5181,31 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
auto BuildSDIVPattern = [&](ConstantSDNode *C) {
- if (C->isNullValue())
+ if (C->isZero())
return false;
const APInt &Divisor = C->getAPIntValue();
- APInt::ms magics = Divisor.magic();
+ SignedDivisionByConstantInfo magics = SignedDivisionByConstantInfo::get(Divisor);
int NumeratorFactor = 0;
int ShiftMask = -1;
- if (Divisor.isOneValue() || Divisor.isAllOnesValue()) {
+ if (Divisor.isOne() || Divisor.isAllOnes()) {
// If d is +1/-1, we just multiply the numerator by +1/-1.
NumeratorFactor = Divisor.getSExtValue();
- magics.m = 0;
- magics.s = 0;
+ magics.Magic = 0;
+ magics.ShiftAmount = 0;
ShiftMask = 0;
- } else if (Divisor.isStrictlyPositive() && magics.m.isNegative()) {
+ } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) {
// If d > 0 and m < 0, add the numerator.
NumeratorFactor = 1;
- } else if (Divisor.isNegative() && magics.m.isStrictlyPositive()) {
+ } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) {
// If d < 0 and m > 0, subtract the numerator.
NumeratorFactor = -1;
}
- MagicFactors.push_back(DAG.getConstant(magics.m, dl, SVT));
+ MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT));
Factors.push_back(DAG.getConstant(NumeratorFactor, dl, SVT));
- Shifts.push_back(DAG.getConstant(magics.s, dl, ShSVT));
+ Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT));
ShiftMasks.push_back(DAG.getConstant(ShiftMask, dl, SVT));
return true;
};
@@ -5297,33 +5326,33 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
auto BuildUDIVPattern = [&](ConstantSDNode *C) {
- if (C->isNullValue())
+ if (C->isZero())
return false;
// FIXME: We should use a narrower constant when the upper
// bits are known to be zero.
const APInt& Divisor = C->getAPIntValue();
- APInt::mu magics = Divisor.magicu();
+ UnsignedDivisonByConstantInfo magics = UnsignedDivisonByConstantInfo::get(Divisor);
unsigned PreShift = 0, PostShift = 0;
// If the divisor is even, we can avoid using the expensive fixup by
// shifting the divided value upfront.
- if (magics.a != 0 && !Divisor[0]) {
+ if (magics.IsAdd != 0 && !Divisor[0]) {
PreShift = Divisor.countTrailingZeros();
// Get magic number for the shifted divisor.
- magics = Divisor.lshr(PreShift).magicu(PreShift);
- assert(magics.a == 0 && "Should use cheap fixup now");
+ magics = UnsignedDivisonByConstantInfo::get(Divisor.lshr(PreShift), PreShift);
+ assert(magics.IsAdd == 0 && "Should use cheap fixup now");
}
- APInt Magic = magics.m;
+ APInt Magic = magics.Magic;
unsigned SelNPQ;
- if (magics.a == 0 || Divisor.isOneValue()) {
- assert(magics.s < Divisor.getBitWidth() &&
+ if (magics.IsAdd == 0 || Divisor.isOne()) {
+ assert(magics.ShiftAmount < Divisor.getBitWidth() &&
"We shouldn't generate an undefined shift!");
- PostShift = magics.s;
+ PostShift = magics.ShiftAmount;
SelNPQ = false;
} else {
- PostShift = magics.s - 1;
+ PostShift = magics.ShiftAmount - 1;
SelNPQ = true;
}
@@ -5331,7 +5360,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
MagicFactors.push_back(DAG.getConstant(Magic, dl, SVT));
NPQFactors.push_back(
DAG.getConstant(SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
- : APInt::getNullValue(EltBits),
+ : APInt::getZero(EltBits),
dl, SVT));
PostShifts.push_back(DAG.getConstant(PostShift, dl, ShSVT));
UseNPQ |= SelNPQ;
@@ -5511,13 +5540,13 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) {
// Division by 0 is UB. Leave it to be constant-folded elsewhere.
- if (CDiv->isNullValue())
+ if (CDiv->isZero())
return false;
const APInt &D = CDiv->getAPIntValue();
const APInt &Cmp = CCmp->getAPIntValue();
- ComparingWithAllZeros &= Cmp.isNullValue();
+ ComparingWithAllZeros &= Cmp.isZero();
// x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
// if C2 is not less than C1, the comparison is always false.
@@ -5529,26 +5558,26 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
// If all lanes are tautological (either all divisors are ones, or divisor
// is not greater than the constant we are comparing with),
// we will prefer to avoid the fold.
- bool TautologicalLane = D.isOneValue() || TautologicalInvertedLane;
+ bool TautologicalLane = D.isOne() || TautologicalInvertedLane;
HadTautologicalLanes |= TautologicalLane;
AllLanesAreTautological &= TautologicalLane;
// If we are comparing with non-zero, we need'll need to subtract said
// comparison value from the LHS. But there is no point in doing that if
// every lane where we are comparing with non-zero is tautological..
- if (!Cmp.isNullValue())
+ if (!Cmp.isZero())
AllComparisonsWithNonZerosAreTautological &= TautologicalLane;
// Decompose D into D0 * 2^K
unsigned K = D.countTrailingZeros();
- assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.");
+ assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
APInt D0 = D.lshr(K);
// D is even if it has trailing zeros.
HadEvenDivisor |= (K != 0);
// D is a power-of-two if D0 is one.
// If all divisors are power-of-two, we will prefer to avoid the fold.
- AllDivisorsArePowerOfTwo &= D0.isOneValue();
+ AllDivisorsArePowerOfTwo &= D0.isOne();
// P = inv(D0, 2^W)
// 2^W requires W + 1 bits, so we have to extend and then truncate.
@@ -5556,20 +5585,20 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
APInt P = D0.zext(W + 1)
.multiplicativeInverse(APInt::getSignedMinValue(W + 1))
.trunc(W);
- assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
- assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
+ assert(!P.isZero() && "No multiplicative inverse!"); // unreachable
+ assert((D0 * P).isOne() && "Multiplicative inverse sanity check.");
// Q = floor((2^W - 1) u/ D)
// R = ((2^W - 1) u% D)
APInt Q, R;
- APInt::udivrem(APInt::getAllOnesValue(W), D, Q, R);
+ APInt::udivrem(APInt::getAllOnes(W), D, Q, R);
// If we are comparing with zero, then that comparison constant is okay,
// else it may need to be one less than that.
if (Cmp.ugt(R))
Q -= 1;
- assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
+ assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
"We are expecting that K is always less than all-ones for ShSVT");
// If the lane is tautological the result can be constant-folded.
@@ -5752,7 +5781,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
// TODO: Could support comparing with non-zero too.
ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
- if (!CompTarget || !CompTarget->isNullValue())
+ if (!CompTarget || !CompTarget->isZero())
return SDValue();
bool HadIntMinDivisor = false;
@@ -5765,7 +5794,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
auto BuildSREMPattern = [&](ConstantSDNode *C) {
// Division by 0 is UB. Leave it to be constant-folded elsewhere.
- if (C->isNullValue())
+ if (C->isZero())
return false;
// FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
@@ -5778,12 +5807,12 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
HadIntMinDivisor |= D.isMinSignedValue();
// If all divisors are ones, we will prefer to avoid the fold.
- HadOneDivisor |= D.isOneValue();
- AllDivisorsAreOnes &= D.isOneValue();
+ HadOneDivisor |= D.isOne();
+ AllDivisorsAreOnes &= D.isOne();
// Decompose D into D0 * 2^K
unsigned K = D.countTrailingZeros();
- assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.");
+ assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate.");
APInt D0 = D.lshr(K);
if (!D.isMinSignedValue()) {
@@ -5794,7 +5823,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
// D is a power-of-two if D0 is one. This includes INT_MIN.
// If all divisors are power-of-two, we will prefer to avoid the fold.
- AllDivisorsArePowerOfTwo &= D0.isOneValue();
+ AllDivisorsArePowerOfTwo &= D0.isOne();
// P = inv(D0, 2^W)
// 2^W requires W + 1 bits, so we have to extend and then truncate.
@@ -5802,8 +5831,8 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
APInt P = D0.zext(W + 1)
.multiplicativeInverse(APInt::getSignedMinValue(W + 1))
.trunc(W);
- assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
- assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
+ assert(!P.isZero() && "No multiplicative inverse!"); // unreachable
+ assert((D0 * P).isOne() && "Multiplicative inverse sanity check.");
// A = floor((2^(W - 1) - 1) / D0) & -2^K
APInt A = APInt::getSignedMaxValue(W).udiv(D0);
@@ -5818,14 +5847,14 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
// Q = floor((2 * A) / (2^K))
APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
- assert(APInt::getAllOnesValue(SVT.getSizeInBits()).ugt(A) &&
+ assert(APInt::getAllOnes(SVT.getSizeInBits()).ugt(A) &&
"We are expecting that A is always less than all-ones for SVT");
- assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
+ assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) &&
"We are expecting that K is always less than all-ones for ShSVT");
// If the divisor is 1 the result can be constant-folded. Likewise, we
// don't care about INT_MIN lanes, those can be set to undef if appropriate.
- if (D.isOneValue()) {
+ if (D.isOne()) {
// Set P, A and K to a bogus values so we can try to splat them.
P = 0;
A = -1;
@@ -5951,7 +5980,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
SDValue IntMax = DAG.getConstant(
APInt::getSignedMaxValue(SVT.getScalarSizeInBits()), DL, VT);
SDValue Zero =
- DAG.getConstant(APInt::getNullValue(SVT.getScalarSizeInBits()), DL, VT);
+ DAG.getConstant(APInt::getZero(SVT.getScalarSizeInBits()), DL, VT);
// Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
@@ -6777,7 +6806,7 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
// the destination signmask can't be represented by the float, so we can
// just use FP_TO_SINT directly.
const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(SrcVT);
- APFloat APF(APFSem, APInt::getNullValue(SrcVT.getScalarSizeInBits()));
+ APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits()));
APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
if (APFloat::opOverflow &
APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
@@ -6970,8 +6999,18 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
return SDValue();
}
-bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result,
- SelectionDAG &DAG) const {
+// Only expand vector types if we have the appropriate vector bit operations.
+static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) {
+ assert(VT.isVector() && "Expected vector type");
+ unsigned Len = VT.getScalarSizeInBits();
+ return TLI.isOperationLegalOrCustom(ISD::ADD, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::SUB, VT) &&
+ TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
+ (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) &&
+ TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT);
+}
+
+SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const {
SDLoc dl(Node);
EVT VT = Node->getValueType(0);
EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
@@ -6981,15 +7020,11 @@ bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result,
// TODO: Add support for irregular type lengths.
if (!(Len <= 128 && Len % 8 == 0))
- return false;
+ return SDValue();
// Only expand vector types if we have the appropriate vector bit operations.
- if (VT.isVector() && (!isOperationLegalOrCustom(ISD::ADD, VT) ||
- !isOperationLegalOrCustom(ISD::SUB, VT) ||
- !isOperationLegalOrCustom(ISD::SRL, VT) ||
- (Len != 8 && !isOperationLegalOrCustom(ISD::MUL, VT)) ||
- !isOperationLegalOrCustomOrPromote(ISD::AND, VT)))
- return false;
+ if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
+ return SDValue();
// This is the "best" algorithm from
// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
@@ -7026,12 +7061,10 @@ bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result,
DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
DAG.getConstant(Len - 8, dl, ShVT));
- Result = Op;
- return true;
+ return Op;
}
-bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result,
- SelectionDAG &DAG) const {
+SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const {
SDLoc dl(Node);
EVT VT = Node->getValueType(0);
EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
@@ -7040,10 +7073,8 @@ bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result,
// If the non-ZERO_UNDEF version is supported we can use that instead.
if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
- isOperationLegalOrCustom(ISD::CTLZ, VT)) {
- Result = DAG.getNode(ISD::CTLZ, dl, VT, Op);
- return true;
- }
+ isOperationLegalOrCustom(ISD::CTLZ, VT))
+ return DAG.getNode(ISD::CTLZ, dl, VT, Op);
// If the ZERO_UNDEF version is supported use that and handle the zero case.
if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
@@ -7052,17 +7083,18 @@ bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result,
SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
SDValue Zero = DAG.getConstant(0, dl, VT);
SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
- Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
+ return DAG.getSelect(dl, VT, SrcIsZero,
DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ);
- return true;
}
// Only expand vector types if we have the appropriate vector bit operations.
+ // This includes the operations needed to expand CTPOP if it isn't supported.
if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
- !isOperationLegalOrCustom(ISD::CTPOP, VT) ||
+ (!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
+ !canExpandVectorCTPOP(*this, VT)) ||
!isOperationLegalOrCustom(ISD::SRL, VT) ||
!isOperationLegalOrCustomOrPromote(ISD::OR, VT)))
- return false;
+ return SDValue();
// for now, we do this:
// x = x | (x >> 1);
@@ -7079,12 +7111,10 @@ bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result,
DAG.getNode(ISD::SRL, dl, VT, Op, Tmp));
}
Op = DAG.getNOT(dl, Op, VT);
- Result = DAG.getNode(ISD::CTPOP, dl, VT, Op);
- return true;
+ return DAG.getNode(ISD::CTPOP, dl, VT, Op);
}
-bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,
- SelectionDAG &DAG) const {
+SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const {
SDLoc dl(Node);
EVT VT = Node->getValueType(0);
SDValue Op = Node->getOperand(0);
@@ -7092,10 +7122,8 @@ bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,
// If the non-ZERO_UNDEF version is supported we can use that instead.
if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF &&
- isOperationLegalOrCustom(ISD::CTTZ, VT)) {
- Result = DAG.getNode(ISD::CTTZ, dl, VT, Op);
- return true;
- }
+ isOperationLegalOrCustom(ISD::CTTZ, VT))
+ return DAG.getNode(ISD::CTTZ, dl, VT, Op);
// If the ZERO_UNDEF version is supported use that and handle the zero case.
if (isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) {
@@ -7104,19 +7132,20 @@ bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,
SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op);
SDValue Zero = DAG.getConstant(0, dl, VT);
SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
- Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
+ return DAG.getSelect(dl, VT, SrcIsZero,
DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ);
- return true;
}
// Only expand vector types if we have the appropriate vector bit operations.
+ // This includes the operations needed to expand CTPOP if it isn't supported.
if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
(!isOperationLegalOrCustom(ISD::CTPOP, VT) &&
- !isOperationLegalOrCustom(ISD::CTLZ, VT)) ||
+ !isOperationLegalOrCustom(ISD::CTLZ, VT) &&
+ !canExpandVectorCTPOP(*this, VT)) ||
!isOperationLegalOrCustom(ISD::SUB, VT) ||
!isOperationLegalOrCustomOrPromote(ISD::AND, VT) ||
!isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
- return false;
+ return SDValue();
// for now, we use: { return popcount(~x & (x - 1)); }
// unless the target has ctlz but not ctpop, in which case we use:
@@ -7128,18 +7157,15 @@ bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result,
// If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
if (isOperationLegal(ISD::CTLZ, VT) && !isOperationLegal(ISD::CTPOP, VT)) {
- Result =
- DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
- DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
- return true;
+ return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT),
+ DAG.getNode(ISD::CTLZ, dl, VT, Tmp));
}
- Result = DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
- return true;
+ return DAG.getNode(ISD::CTPOP, dl, VT, Tmp);
}
-bool TargetLowering::expandABS(SDNode *N, SDValue &Result,
- SelectionDAG &DAG, bool IsNegative) const {
+SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
+ bool IsNegative) const {
SDLoc dl(N);
EVT VT = N->getValueType(0);
EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
@@ -7149,27 +7175,24 @@ bool TargetLowering::expandABS(SDNode *N, SDValue &Result,
if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
isOperationLegal(ISD::SMAX, VT)) {
SDValue Zero = DAG.getConstant(0, dl, VT);
- Result = DAG.getNode(ISD::SMAX, dl, VT, Op,
- DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
- return true;
+ return DAG.getNode(ISD::SMAX, dl, VT, Op,
+ DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
}
// abs(x) -> umin(x,sub(0,x))
if (!IsNegative && isOperationLegal(ISD::SUB, VT) &&
isOperationLegal(ISD::UMIN, VT)) {
SDValue Zero = DAG.getConstant(0, dl, VT);
- Result = DAG.getNode(ISD::UMIN, dl, VT, Op,
- DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
- return true;
+ return DAG.getNode(ISD::UMIN, dl, VT, Op,
+ DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
}
// 0 - abs(x) -> smin(x, sub(0,x))
if (IsNegative && isOperationLegal(ISD::SUB, VT) &&
isOperationLegal(ISD::SMIN, VT)) {
SDValue Zero = DAG.getConstant(0, dl, VT);
- Result = DAG.getNode(ISD::SMIN, dl, VT, Op,
- DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
- return true;
+ return DAG.getNode(ISD::SMIN, dl, VT, Op,
+ DAG.getNode(ISD::SUB, dl, VT, Zero, Op));
}
// Only expand vector types if we have the appropriate vector operations.
@@ -7178,20 +7201,19 @@ bool TargetLowering::expandABS(SDNode *N, SDValue &Result,
(!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) ||
(IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) ||
!isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
- return false;
+ return SDValue();
SDValue Shift =
DAG.getNode(ISD::SRA, dl, VT, Op,
DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT));
if (!IsNegative) {
SDValue Add = DAG.getNode(ISD::ADD, dl, VT, Op, Shift);
- Result = DAG.getNode(ISD::XOR, dl, VT, Add, Shift);
- } else {
- // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
- SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
- Result = DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
+ return DAG.getNode(ISD::XOR, dl, VT, Add, Shift);
}
- return true;
+
+ // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
+ SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
+ return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
}
SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {
@@ -7266,34 +7288,31 @@ SDValue TargetLowering::expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const {
// TODO: We can easily support i4/i2 legal types if any target ever does.
if (Sz >= 8 && isPowerOf2_32(Sz)) {
// Create the masks - repeating the pattern every byte.
- APInt MaskHi4 = APInt::getSplat(Sz, APInt(8, 0xF0));
- APInt MaskHi2 = APInt::getSplat(Sz, APInt(8, 0xCC));
- APInt MaskHi1 = APInt::getSplat(Sz, APInt(8, 0xAA));
- APInt MaskLo4 = APInt::getSplat(Sz, APInt(8, 0x0F));
- APInt MaskLo2 = APInt::getSplat(Sz, APInt(8, 0x33));
- APInt MaskLo1 = APInt::getSplat(Sz, APInt(8, 0x55));
+ APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F));
+ APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33));
+ APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55));
// BSWAP if the type is wider than a single byte.
Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
- // swap i4: ((V & 0xF0) >> 4) | ((V & 0x0F) << 4)
- Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi4, dl, VT));
- Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo4, dl, VT));
- Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(4, dl, SHVT));
+ // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4)
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT));
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT));
Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT));
Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
- // swap i2: ((V & 0xCC) >> 2) | ((V & 0x33) << 2)
- Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi2, dl, VT));
- Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo2, dl, VT));
- Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(2, dl, SHVT));
+ // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2)
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT));
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT));
Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT));
Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
- // swap i1: ((V & 0xAA) >> 1) | ((V & 0x55) << 1)
- Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi1, dl, VT));
- Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo1, dl, VT));
- Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(1, dl, SHVT));
+ // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1)
+ Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT));
+ Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT));
+ Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT));
Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT));
Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
return Tmp;
@@ -7803,13 +7822,15 @@ TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx,
EVT VecVT, const SDLoc &dl,
- unsigned NumSubElts) {
- if (!VecVT.isScalableVector() && isa<ConstantSDNode>(Idx))
- return Idx;
+ ElementCount SubEC) {
+ assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) &&
+ "Cannot index a scalable vector within a fixed-width vector");
- EVT IdxVT = Idx.getValueType();
unsigned NElts = VecVT.getVectorMinNumElements();
- if (VecVT.isScalableVector()) {
+ unsigned NumSubElts = SubEC.getKnownMinValue();
+ EVT IdxVT = Idx.getValueType();
+
+ if (VecVT.isScalableVector() && !SubEC.isScalable()) {
// If this is a constant index and we know the value plus the number of the
// elements in the subvector minus one is less than the minimum number of
// elements then it's safe to return Idx.
@@ -7856,16 +7877,16 @@ SDValue TargetLowering::getVectorSubVecPointer(SelectionDAG &DAG,
unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
assert(EltSize * 8 == EltVT.getFixedSizeInBits() &&
"Converting bits to bytes lost precision");
-
- // Scalable vectors don't need clamping as these are checked at compile time
- if (SubVecVT.isFixedLengthVector()) {
- assert(SubVecVT.getVectorElementType() == EltVT &&
- "Sub-vector must be a fixed vector with matching element type");
- Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
- SubVecVT.getVectorNumElements());
- }
+ assert(SubVecVT.getVectorElementType() == EltVT &&
+ "Sub-vector must be a vector with matching element type");
+ Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl,
+ SubVecVT.getVectorElementCount());
EVT IdxVT = Index.getValueType();
+ if (SubVecVT.isScalableVector())
+ Index =
+ DAG.getNode(ISD::MUL, dl, IdxVT, Index,
+ DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1)));
Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
DAG.getConstant(EltSize, dl, IdxVT));
@@ -7921,7 +7942,7 @@ SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
SDLoc dl(Op);
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
- if (C->isNullValue() && CC == ISD::SETEQ) {
+ if (C->isZero() && CC == ISD::SETEQ) {
EVT VT = Op.getOperand(0).getValueType();
SDValue Zext = Op.getOperand(0);
if (VT.bitsLT(MVT::i32)) {
@@ -7949,10 +7970,8 @@ TargetLowering::getCanonicalIndexType(ISD::MemIndexType IndexType, EVT MemVT,
(IndexType == ISD::SIGNED_SCALED) || (IndexType == ISD::SIGNED_UNSCALED);
// Scaling is unimportant for bytes, canonicalize to unscaled.
- if (IsScaledIndex && MemVT.getScalarType() == MVT::i8) {
- IsScaledIndex = false;
- IndexType = IsSignedIndex ? ISD::SIGNED_UNSCALED : ISD::UNSIGNED_UNSCALED;
- }
+ if (IsScaledIndex && MemVT.getScalarType() == MVT::i8)
+ return IsSignedIndex ? ISD::SIGNED_UNSCALED : ISD::UNSIGNED_UNSCALED;
return IndexType;
}
@@ -8073,14 +8092,12 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
}
- // SatMax -> Overflow && SumDiff < 0
- // SatMin -> Overflow && SumDiff >= 0
+ // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
APInt MinVal = APInt::getSignedMinValue(BitWidth);
- APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
- SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
- SDValue SumNeg = DAG.getSetCC(dl, BoolVT, SumDiff, Zero, ISD::SETLT);
- Result = DAG.getSelect(dl, VT, SumNeg, SatMax, SatMin);
+ SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff,
+ DAG.getConstant(BitWidth - 1, dl, VT));
+ Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin);
return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
}
@@ -8394,7 +8411,7 @@ void TargetLowering::expandSADDSUBO(
// If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
- if (isOperationLegalOrCustom(OpcSat, LHS.getValueType())) {
+ if (isOperationLegal(OpcSat, LHS.getValueType())) {
SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
@@ -8447,8 +8464,8 @@ bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
if (VT.isVector())
- WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
- VT.getVectorNumElements());
+ WideVT =
+ EVT::getVectorVT(*DAG.getContext(), WideVT, VT.getVectorElementCount());
SDValue BottomHalf;
SDValue TopHalf;