diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2018-07-28 10:51:19 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2018-07-28 10:51:19 +0000 |
commit | eb11fae6d08f479c0799db45860a98af528fa6e7 (patch) | |
tree | 44d492a50c8c1a7eb8e2d17ea3360ec4d066f042 /lib/CodeGen/SelectionDAG/TargetLowering.cpp | |
parent | b8a2042aa938069e862750553db0e4d82d25822c (diff) |
Notes
Diffstat (limited to 'lib/CodeGen/SelectionDAG/TargetLowering.cpp')
-rw-r--r-- | lib/CodeGen/SelectionDAG/TargetLowering.cpp | 918 |
1 files changed, 719 insertions, 199 deletions
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index d76e52d78870..fa867fcec366 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -20,7 +20,6 @@ #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/TargetLoweringObjectFile.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" @@ -32,6 +31,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include <cctype> using namespace llvm; @@ -96,7 +96,7 @@ bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI, return true; } -/// \brief Set CallLoweringInfo attribute flags based on a call instruction +/// Set CallLoweringInfo attribute flags based on a call instruction /// and called function attributes. void TargetLoweringBase::ArgListEntry::setAttributes(ImmutableCallSite *CS, unsigned ArgIdx) { @@ -524,6 +524,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } // Other users may use these bits. + EVT VT = Op.getValueType(); if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) { if (Depth != 0) { // If not at the root, Just compute the Known bits to @@ -537,7 +538,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, } else if (DemandedMask == 0) { // Not demanding any bits from Op. if (!Op.isUndef()) - return TLO.CombineTo(Op, TLO.DAG.getUNDEF(Op.getValueType())); + return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); return false; } else if (Depth == 6) { // Limit search depth. return false; @@ -580,7 +581,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, KnownBits LHSKnown; // Do not increment Depth here; that can cause an infinite loop. TLO.DAG.computeKnownBits(Op0, LHSKnown, Depth); - // If the LHS already has zeros where RHSC does, this and is dead. + // If the LHS already has zeros where RHSC does, this 'and' is dead. if ((LHSKnown.Zero & NewMask) == (~RHSC->getAPIntValue() & NewMask)) return TLO.CombineTo(Op, Op0); @@ -596,8 +597,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1 if (isBitwiseNot(Op0) && Op0.hasOneUse() && LHSKnown.One == ~RHSC->getAPIntValue()) { - SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, Op.getValueType(), - Op0.getOperand(0), Op.getOperand(1)); + SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0), + Op.getOperand(1)); return TLO.CombineTo(Op, Xor); } } @@ -618,7 +619,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, return TLO.CombineTo(Op, Op.getOperand(1)); // If all of the demanded bits in the inputs are known zeros, return zero. if (NewMask.isSubsetOf(Known.Zero | Known2.Zero)) - return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, Op.getValueType())); + return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT)); // If the RHS is a constant, see if we can simplify it. if (ShrinkDemandedConstant(Op, ~Known2.Zero & NewMask, TLO)) return true; @@ -680,7 +681,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // (but not both) turn this into an *inclusive* or. // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 if ((NewMask & ~Known.Zero & ~Known2.Zero) == 0) - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, Op.getValueType(), + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op.getOperand(0), Op.getOperand(1))); @@ -696,7 +697,6 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // NB: it is okay if more bits are known than are requested if (NewMask.isSubsetOf(Known.Zero|Known.One)) { // all known on one side if (Known.One == Known2.One) { // set bits are the same on both sides - EVT VT = Op.getValueType(); SDValue ANDC = TLO.DAG.getConstant(~Known.One & NewMask, dl, VT); return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), ANDC)); @@ -710,7 +710,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (C && !C->isAllOnesValue()) { if (NewMask.isSubsetOf(C->getAPIntValue())) { // We're flipping all demanded bits. Flip the undemanded bits too. - SDValue New = TLO.DAG.getNOT(dl, Op.getOperand(0), Op.getValueType()); + SDValue New = TLO.DAG.getNOT(dl, Op.getOperand(0), VT); return TLO.CombineTo(Op, New); } // If we can't turn this into a 'not', try to shrink the constant. @@ -761,7 +761,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // width as the setcc result, and (3) the result of a setcc conforms to 0 or // -1, we may be able to bypass the setcc. if (NewMask.isSignMask() && Op0.getScalarValueSizeInBits() == BitWidth && - getBooleanContents(Op.getValueType()) == + getBooleanContents(VT) == BooleanContent::ZeroOrNegativeOneBooleanContent) { // If we're testing X < 0, then this compare isn't needed - just use X! // FIXME: We're limiting to integer types here, but this should also work @@ -807,7 +807,6 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, SDValue NewSA = TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType()); - EVT VT = Op.getValueType(); return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, InOp.getOperand(0), NewSA)); @@ -835,8 +834,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, TLO.DAG.getConstant(ShAmt, dl, ShTy)); return TLO.CombineTo(Op, - TLO.DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), - NarrowShl)); + TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl)); } // Repeat the SHL optimization above in cases where an extension // intervenes: (shl (anyext (shr x, c1)), c2) to @@ -854,7 +852,6 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, SDValue NewSA = TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, Op.getOperand(1).getValueType()); - EVT VT = Op.getValueType(); SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, InnerOp.getOperand(0)); return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, @@ -904,7 +901,6 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, SDValue NewSA = TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType()); - EVT VT = Op.getValueType(); return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, InOp.getOperand(0), NewSA)); @@ -930,12 +926,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // the shift amount is >= the size of the datatype, which is undefined. if (NewMask.isOneValue()) return TLO.CombineTo(Op, - TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(), - Op.getOperand(0), Op.getOperand(1))); + TLO.DAG.getNode(ISD::SRL, dl, VT, Op.getOperand(0), + Op.getOperand(1))); if (ConstantSDNode *SA = isConstOrConstSplat(Op.getOperand(1))) { - EVT VT = Op.getValueType(); - // If the shift count is an invalid immediate, don't do anything. if (SA->getAPIntValue().uge(BitWidth)) break; @@ -1000,14 +994,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (!AlreadySignExtended) { // Compute the correct shift amount type, which must be getShiftAmountTy // for scalar types after legalization. - EVT ShiftAmtTy = Op.getValueType(); + EVT ShiftAmtTy = VT; if (TLO.LegalTypes() && !ShiftAmtTy.isVector()) ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL); SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ExVTBits, dl, ShiftAmtTy); - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, - Op.getValueType(), InOp, + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, InOp, ShiftAmt)); } } @@ -1072,8 +1065,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If none of the top bits are demanded, convert this into an any_extend. if (NewMask.getActiveBits() <= OperandBitWidth) - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, - Op.getValueType(), + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op.getOperand(0))); APInt InMask = NewMask.trunc(OperandBitWidth); @@ -1089,8 +1081,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If none of the top bits are demanded, convert this into an any_extend. if (NewMask.getActiveBits() <= InBits) - return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl, - Op.getValueType(), + return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op.getOperand(0))); // Since some of the sign extended bits are demanded, we know that the sign @@ -1107,8 +1098,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If the sign bit is known zero, convert this to a zero extend. if (Known.isNonNegative()) - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, - Op.getValueType(), + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0))); break; } @@ -1139,8 +1129,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, case ISD::SRL: // Shrink SRL by a constant if none of the high bits shifted in are // demanded. - if (TLO.LegalTypes() && - !isTypeDesirableForOp(ISD::SRL, Op.getValueType())) + if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT)) // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is // undesirable. break; @@ -1150,8 +1139,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, SDValue Shift = In.getOperand(1); if (TLO.LegalTypes()) { uint64_t ShVal = ShAmt->getZExtValue(); - Shift = TLO.DAG.getConstant(ShVal, dl, - getShiftAmountTy(Op.getValueType(), DL)); + Shift = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL)); } if (ShAmt->getZExtValue() < BitWidth) { @@ -1163,12 +1151,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (!(HighBits & NewMask)) { // None of the shifted in bits are needed. Add a truncate of the // shift input, then shift it. - SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl, - Op.getValueType(), + SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, In.getOperand(0)); - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, - Op.getValueType(), - NewTrunc, + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, Shift)); } } @@ -1182,9 +1167,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, case ISD::AssertZext: { // AssertZext demands all of the high bits, plus any of the low bits // demanded by its users. - EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); - APInt InMask = APInt::getLowBitsSet(BitWidth, - VT.getSizeInBits()); + EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits()); if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | NewMask, Known, TLO, Depth+1)) return true; @@ -1196,40 +1180,45 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, case ISD::BITCAST: // If this is an FP->Int bitcast and if the sign bit is the only // thing demanded, turn this into a FGETSIGN. - if (!TLO.LegalOperations() && - !Op.getValueType().isVector() && + if (!TLO.LegalOperations() && !VT.isVector() && !Op.getOperand(0).getValueType().isVector() && NewMask == APInt::getSignMask(Op.getValueSizeInBits()) && Op.getOperand(0).getValueType().isFloatingPoint()) { - bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType()); + bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT); bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32); - if ((OpVTLegal || i32Legal) && Op.getValueType().isSimple() && + if ((OpVTLegal || i32Legal) && VT.isSimple() && + Op.getOperand(0).getValueType() != MVT::f16 && Op.getOperand(0).getValueType() != MVT::f128) { // Cannot eliminate/lower SHL for f128 yet. - EVT Ty = OpVTLegal ? Op.getValueType() : MVT::i32; + EVT Ty = OpVTLegal ? VT : MVT::i32; // Make a FGETSIGN + SHL to move the sign bit into the appropriate // place. We expect the SHL to be eliminated by other optimizations. SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Op.getOperand(0)); unsigned OpVTSizeInBits = Op.getValueSizeInBits(); if (!OpVTLegal && OpVTSizeInBits > 32) - Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), Sign); + Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign); unsigned ShVal = Op.getValueSizeInBits() - 1; - SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, Op.getValueType()); - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, - Op.getValueType(), - Sign, ShAmt)); + SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt)); } } + // If this is a bitcast, let computeKnownBits handle it. Only do this on a + // recursive call where Known may be useful to the caller. + if (Depth > 0) { + TLO.DAG.computeKnownBits(Op, Known, Depth); + return false; + } break; case ISD::ADD: case ISD::MUL: case ISD::SUB: { // Add, Sub, and Mul don't demand any bits in positions beyond that // of the highest bit demanded of them. - APInt LoMask = APInt::getLowBitsSet(BitWidth, - BitWidth - NewMask.countLeadingZeros()); - if (SimplifyDemandedBits(Op.getOperand(0), LoMask, Known2, TLO, Depth+1) || - SimplifyDemandedBits(Op.getOperand(1), LoMask, Known2, TLO, Depth+1) || + SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1); + unsigned NewMaskLZ = NewMask.countLeadingZeros(); + APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - NewMaskLZ); + if (SimplifyDemandedBits(Op0, LoMask, Known2, TLO, Depth + 1) || + SimplifyDemandedBits(Op1, LoMask, Known2, TLO, Depth + 1) || // See if the operation should be performed at a smaller bit width. ShrinkDemandedOp(Op, BitWidth, NewMask, TLO)) { SDNodeFlags Flags = Op.getNode()->getFlags(); @@ -1238,13 +1227,33 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // won't wrap after simplification. Flags.setNoSignedWrap(false); Flags.setNoUnsignedWrap(false); - SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), - Op.getOperand(0), Op.getOperand(1), + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags); return TLO.CombineTo(Op, NewOp); } return true; } + + // If we have a constant operand, we may be able to turn it into -1 if we + // do not demand the high bits. This can make the constant smaller to + // encode, allow more general folding, or match specialized instruction + // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that + // is probably not useful (and could be detrimental). + ConstantSDNode *C = isConstOrConstSplat(Op1); + APInt HighMask = APInt::getHighBitsSet(NewMask.getBitWidth(), NewMaskLZ); + if (C && !C->isAllOnesValue() && !C->isOne() && + (C->getAPIntValue() | HighMask).isAllOnesValue()) { + SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT); + // We can't guarantee that the new math op doesn't wrap, so explicitly + // clear those flags to prevent folding with a potential existing node + // that has those flags set. + SDNodeFlags Flags; + Flags.setNoSignedWrap(false); + Flags.setNoUnsignedWrap(false); + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags); + return TLO.CombineTo(Op, NewOp); + } + LLVM_FALLTHROUGH; } default: @@ -1265,10 +1274,384 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (C->isOpaque()) return false; } - return TLO.CombineTo(Op, - TLO.DAG.getConstant(Known.One, dl, Op.getValueType())); + return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT)); + } + + return false; +} + +bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op, + const APInt &DemandedElts, + APInt &KnownUndef, + APInt &KnownZero, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), + !DCI.isBeforeLegalizeOps()); + + bool Simplified = + SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO); + if (Simplified) + DCI.CommitTargetLoweringOpt(TLO); + return Simplified; +} + +bool TargetLowering::SimplifyDemandedVectorElts( + SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, + APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth, + bool AssumeSingleUse) const { + EVT VT = Op.getValueType(); + APInt DemandedElts = DemandedEltMask; + unsigned NumElts = DemandedElts.getBitWidth(); + assert(VT.isVector() && "Expected vector op"); + assert(VT.getVectorNumElements() == NumElts && + "Mask size mismatches value type element count!"); + + KnownUndef = KnownZero = APInt::getNullValue(NumElts); + + // Undef operand. + if (Op.isUndef()) { + KnownUndef.setAllBits(); + return false; + } + + // If Op has other users, assume that all elements are needed. + if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) + DemandedElts.setAllBits(); + + // Not demanding any elements from Op. + if (DemandedElts == 0) { + KnownUndef.setAllBits(); + return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); + } + + // Limit search depth. + if (Depth >= 6) + return false; + + SDLoc DL(Op); + unsigned EltSizeInBits = VT.getScalarSizeInBits(); + + switch (Op.getOpcode()) { + case ISD::SCALAR_TO_VECTOR: { + if (!DemandedElts[0]) { + KnownUndef.setAllBits(); + return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); + } + KnownUndef.setHighBits(NumElts - 1); + break; + } + case ISD::BITCAST: { + SDValue Src = Op.getOperand(0); + EVT SrcVT = Src.getValueType(); + + // We only handle vectors here. + // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits? + if (!SrcVT.isVector()) + break; + + // Fast handling of 'identity' bitcasts. + unsigned NumSrcElts = SrcVT.getVectorNumElements(); + if (NumSrcElts == NumElts) + return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, + KnownZero, TLO, Depth + 1); + + APInt SrcZero, SrcUndef; + APInt SrcDemandedElts = APInt::getNullValue(NumSrcElts); + + // Bitcast from 'large element' src vector to 'small element' vector, we + // must demand a source element if any DemandedElt maps to it. + if ((NumElts % NumSrcElts) == 0) { + unsigned Scale = NumElts / NumSrcElts; + for (unsigned i = 0; i != NumElts; ++i) + if (DemandedElts[i]) + SrcDemandedElts.setBit(i / Scale); + + if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero, + TLO, Depth + 1)) + return true; + + // If the src element is zero/undef then all the output elements will be - + // only demanded elements are guaranteed to be correct. + for (unsigned i = 0; i != NumSrcElts; ++i) { + if (SrcDemandedElts[i]) { + if (SrcZero[i]) + KnownZero.setBits(i * Scale, (i + 1) * Scale); + if (SrcUndef[i]) + KnownUndef.setBits(i * Scale, (i + 1) * Scale); + } + } + } + + // Bitcast from 'small element' src vector to 'large element' vector, we + // demand all smaller source elements covered by the larger demanded element + // of this vector. + if ((NumSrcElts % NumElts) == 0) { + unsigned Scale = NumSrcElts / NumElts; + for (unsigned i = 0; i != NumElts; ++i) + if (DemandedElts[i]) + SrcDemandedElts.setBits(i * Scale, (i + 1) * Scale); + + if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero, + TLO, Depth + 1)) + return true; + + // If all the src elements covering an output element are zero/undef, then + // the output element will be as well, assuming it was demanded. + for (unsigned i = 0; i != NumElts; ++i) { + if (DemandedElts[i]) { + if (SrcZero.extractBits(Scale, i * Scale).isAllOnesValue()) + KnownZero.setBit(i); + if (SrcUndef.extractBits(Scale, i * Scale).isAllOnesValue()) + KnownUndef.setBit(i); + } + } + } + break; + } + case ISD::BUILD_VECTOR: { + // Check all elements and simplify any unused elements with UNDEF. + if (!DemandedElts.isAllOnesValue()) { + // Don't simplify BROADCASTS. + if (llvm::any_of(Op->op_values(), + [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) { + SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end()); + bool Updated = false; + for (unsigned i = 0; i != NumElts; ++i) { + if (!DemandedElts[i] && !Ops[i].isUndef()) { + Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType()); + KnownUndef.setBit(i); + Updated = true; + } + } + if (Updated) + return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops)); + } + } + for (unsigned i = 0; i != NumElts; ++i) { + SDValue SrcOp = Op.getOperand(i); + if (SrcOp.isUndef()) { + KnownUndef.setBit(i); + } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() && + (isNullConstant(SrcOp) || isNullFPConstant(SrcOp))) { + KnownZero.setBit(i); + } + } + break; + } + case ISD::CONCAT_VECTORS: { + EVT SubVT = Op.getOperand(0).getValueType(); + unsigned NumSubVecs = Op.getNumOperands(); + unsigned NumSubElts = SubVT.getVectorNumElements(); + for (unsigned i = 0; i != NumSubVecs; ++i) { + SDValue SubOp = Op.getOperand(i); + APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts); + APInt SubUndef, SubZero; + if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO, + Depth + 1)) + return true; + KnownUndef.insertBits(SubUndef, i * NumSubElts); + KnownZero.insertBits(SubZero, i * NumSubElts); + } + break; + } + case ISD::INSERT_SUBVECTOR: { + if (!isa<ConstantSDNode>(Op.getOperand(2))) + break; + SDValue Base = Op.getOperand(0); + SDValue Sub = Op.getOperand(1); + EVT SubVT = Sub.getValueType(); + unsigned NumSubElts = SubVT.getVectorNumElements(); + const APInt& Idx = cast<ConstantSDNode>(Op.getOperand(2))->getAPIntValue(); + if (Idx.uge(NumElts - NumSubElts)) + break; + unsigned SubIdx = Idx.getZExtValue(); + APInt SubElts = DemandedElts.extractBits(NumSubElts, SubIdx); + APInt SubUndef, SubZero; + if (SimplifyDemandedVectorElts(Sub, SubElts, SubUndef, SubZero, TLO, + Depth + 1)) + return true; + APInt BaseElts = DemandedElts; + BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx); + if (SimplifyDemandedVectorElts(Base, BaseElts, KnownUndef, KnownZero, TLO, + Depth + 1)) + return true; + KnownUndef.insertBits(SubUndef, SubIdx); + KnownZero.insertBits(SubZero, SubIdx); + break; + } + case ISD::EXTRACT_SUBVECTOR: { + if (!isa<ConstantSDNode>(Op.getOperand(1))) + break; + SDValue Src = Op.getOperand(0); + unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); + const APInt& Idx = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue(); + if (Idx.uge(NumSrcElts - NumElts)) + break; + // Offset the demanded elts by the subvector index. + uint64_t SubIdx = Idx.getZExtValue(); + APInt SrcElts = DemandedElts.zext(NumSrcElts).shl(SubIdx); + APInt SrcUndef, SrcZero; + if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO, + Depth + 1)) + return true; + KnownUndef = SrcUndef.extractBits(NumElts, SubIdx); + KnownZero = SrcZero.extractBits(NumElts, SubIdx); + break; + } + case ISD::INSERT_VECTOR_ELT: { + SDValue Vec = Op.getOperand(0); + SDValue Scl = Op.getOperand(1); + auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2)); + + // For a legal, constant insertion index, if we don't need this insertion + // then strip it, else remove it from the demanded elts. + if (CIdx && CIdx->getAPIntValue().ult(NumElts)) { + unsigned Idx = CIdx->getZExtValue(); + if (!DemandedElts[Idx]) + return TLO.CombineTo(Op, Vec); + DemandedElts.clearBit(Idx); + + if (SimplifyDemandedVectorElts(Vec, DemandedElts, KnownUndef, + KnownZero, TLO, Depth + 1)) + return true; + + KnownUndef.clearBit(Idx); + if (Scl.isUndef()) + KnownUndef.setBit(Idx); + + KnownZero.clearBit(Idx); + if (isNullConstant(Scl) || isNullFPConstant(Scl)) + KnownZero.setBit(Idx); + break; + } + + APInt VecUndef, VecZero; + if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO, + Depth + 1)) + return true; + // Without knowing the insertion index we can't set KnownUndef/KnownZero. + break; + } + case ISD::VSELECT: { + APInt DemandedLHS(DemandedElts); + APInt DemandedRHS(DemandedElts); + + // TODO - add support for constant vselect masks. + + // See if we can simplify either vselect operand. + APInt UndefLHS, ZeroLHS; + APInt UndefRHS, ZeroRHS; + if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedLHS, UndefLHS, + ZeroLHS, TLO, Depth + 1)) + return true; + if (SimplifyDemandedVectorElts(Op.getOperand(2), DemandedRHS, UndefRHS, + ZeroRHS, TLO, Depth + 1)) + return true; + + KnownUndef = UndefLHS & UndefRHS; + KnownZero = ZeroLHS & ZeroRHS; + break; + } + case ISD::VECTOR_SHUFFLE: { + ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask(); + + // Collect demanded elements from shuffle operands.. + APInt DemandedLHS(NumElts, 0); + APInt DemandedRHS(NumElts, 0); + for (unsigned i = 0; i != NumElts; ++i) { + int M = ShuffleMask[i]; + if (M < 0 || !DemandedElts[i]) + continue; + assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range"); + if (M < (int)NumElts) + DemandedLHS.setBit(M); + else + DemandedRHS.setBit(M - NumElts); + } + + // See if we can simplify either shuffle operand. + APInt UndefLHS, ZeroLHS; + APInt UndefRHS, ZeroRHS; + if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, UndefLHS, + ZeroLHS, TLO, Depth + 1)) + return true; + if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, UndefRHS, + ZeroRHS, TLO, Depth + 1)) + return true; + + // Simplify mask using undef elements from LHS/RHS. + bool Updated = false; + bool IdentityLHS = true, IdentityRHS = true; + SmallVector<int, 32> NewMask(ShuffleMask.begin(), ShuffleMask.end()); + for (unsigned i = 0; i != NumElts; ++i) { + int &M = NewMask[i]; + if (M < 0) + continue; + if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) || + (M >= (int)NumElts && UndefRHS[M - NumElts])) { + Updated = true; + M = -1; + } + IdentityLHS &= (M < 0) || (M == (int)i); + IdentityRHS &= (M < 0) || ((M - NumElts) == i); + } + + // Update legal shuffle masks based on demanded elements if it won't reduce + // to Identity which can cause premature removal of the shuffle mask. + if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps && + isShuffleMaskLegal(NewMask, VT)) + return TLO.CombineTo(Op, + TLO.DAG.getVectorShuffle(VT, DL, Op.getOperand(0), + Op.getOperand(1), NewMask)); + + // Propagate undef/zero elements from LHS/RHS. + for (unsigned i = 0; i != NumElts; ++i) { + int M = ShuffleMask[i]; + if (M < 0) { + KnownUndef.setBit(i); + } else if (M < (int)NumElts) { + if (UndefLHS[M]) + KnownUndef.setBit(i); + if (ZeroLHS[M]) + KnownZero.setBit(i); + } else { + if (UndefRHS[M - NumElts]) + KnownUndef.setBit(i); + if (ZeroRHS[M - NumElts]) + KnownZero.setBit(i); + } + } + break; + } + case ISD::ADD: + case ISD::SUB: { + APInt SrcUndef, SrcZero; + if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef, + SrcZero, TLO, Depth + 1)) + return true; + if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef, + KnownZero, TLO, Depth + 1)) + return true; + KnownZero &= SrcZero; + KnownUndef &= SrcUndef; + break; + } + case ISD::TRUNCATE: + if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef, + KnownZero, TLO, Depth + 1)) + return true; + break; + default: { + if (Op.getOpcode() >= ISD::BUILTIN_OP_END) + if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef, + KnownZero, TLO, Depth)) + return true; + break; + } } + assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero"); return false; } @@ -1316,6 +1699,18 @@ unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, return 1; } +bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode( + SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero, + TargetLoweringOpt &TLO, unsigned Depth) const { + assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || + Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_VOID) && + "Should use SimplifyDemandedVectorElts if you don't know whether Op" + " is a target node!"); + return false; +} + // FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must // work with truncating build vectors and vectors with elements of less than // 8 bits. @@ -1353,16 +1748,6 @@ bool TargetLowering::isConstTrueVal(const SDNode *N) const { llvm_unreachable("Invalid boolean contents"); } -SDValue TargetLowering::getConstTrueVal(SelectionDAG &DAG, EVT VT, - const SDLoc &DL) const { - unsigned ElementWidth = VT.getScalarSizeInBits(); - APInt TrueInt = - getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent - ? APInt(ElementWidth, 1) - : APInt::getAllOnesValue(ElementWidth); - return DAG.getConstant(TrueInt, DL, VT); -} - bool TargetLowering::isConstFalseVal(const SDNode *N) const { if (!N) return false; @@ -1466,6 +1851,89 @@ SDValue TargetLowering::simplifySetCCWithAnd(EVT VT, SDValue N0, SDValue N1, return SDValue(); } +/// There are multiple IR patterns that could be checking whether certain +/// truncation of a signed number would be lossy or not. The pattern which is +/// best at IR level, may not lower optimally. Thus, we want to unfold it. +/// We are looking for the following pattern: (KeptBits is a constant) +/// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits) +/// KeptBits won't be bitwidth(x), that will be constant-folded to true/false. +/// KeptBits also can't be 1, that would have been folded to %x dstcond 0 +/// We will unfold it into the natural trunc+sext pattern: +/// ((%x << C) a>> C) dstcond %x +/// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x) +SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck( + EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI, + const SDLoc &DL) const { + // We must be comparing with a constant. + ConstantSDNode *C1; + if (!(C1 = dyn_cast<ConstantSDNode>(N1))) + return SDValue(); + + // N0 should be: add %x, (1 << (KeptBits-1)) + if (N0->getOpcode() != ISD::ADD) + return SDValue(); + + // And we must be 'add'ing a constant. + ConstantSDNode *C01; + if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1)))) + return SDValue(); + + SDValue X = N0->getOperand(0); + EVT XVT = X.getValueType(); + + // Validate constants ... + + APInt I1 = C1->getAPIntValue(); + + ISD::CondCode NewCond; + if (Cond == ISD::CondCode::SETULT) { + NewCond = ISD::CondCode::SETEQ; + } else if (Cond == ISD::CondCode::SETULE) { + NewCond = ISD::CondCode::SETEQ; + // But need to 'canonicalize' the constant. + I1 += 1; + } else if (Cond == ISD::CondCode::SETUGT) { + NewCond = ISD::CondCode::SETNE; + // But need to 'canonicalize' the constant. + I1 += 1; + } else if (Cond == ISD::CondCode::SETUGE) { + NewCond = ISD::CondCode::SETNE; + } else + return SDValue(); + + const APInt &I01 = C01->getAPIntValue(); + // Both of them must be power-of-two, and the constant from setcc is bigger. + if (!(I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2())) + return SDValue(); + + // They are power-of-two, so which bit is set? + const unsigned KeptBits = I1.logBase2(); + const unsigned KeptBitsMinusOne = I01.logBase2(); + + // Magic! + if (KeptBits != (KeptBitsMinusOne + 1)) + return SDValue(); + assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable"); + + // We don't want to do this in every single case. + SelectionDAG &DAG = DCI.DAG; + if (!DAG.getTargetLoweringInfo().shouldTransformSignedTruncationCheck( + XVT, KeptBits)) + return SDValue(); + + const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits; + assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable"); + + // Unfold into: ((%x << C) a>> C) cond %x + // Where 'cond' will be either 'eq' or 'ne'. + SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT); + SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt); + SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt); + SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond); + + return T2; +} + /// Try to simplify a setcc built with the specified operands and cc. If it is /// unable to simplify it, return a null SDValue. SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, @@ -1473,25 +1941,21 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, DAGCombinerInfo &DCI, const SDLoc &dl) const { SelectionDAG &DAG = DCI.DAG; + EVT OpVT = N0.getValueType(); // These setcc operations always fold. switch (Cond) { default: break; case ISD::SETFALSE: - case ISD::SETFALSE2: return DAG.getConstant(0, dl, VT); + case ISD::SETFALSE2: return DAG.getBoolConstant(false, dl, VT, OpVT); case ISD::SETTRUE: - case ISD::SETTRUE2: { - TargetLowering::BooleanContent Cnt = - getBooleanContents(N0->getValueType(0)); - return DAG.getConstant( - Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, dl, - VT); - } + case ISD::SETTRUE2: return DAG.getBoolConstant(true, dl, VT, OpVT); } // Ensure that the constant occurs on the RHS and fold constant comparisons. + // TODO: Handle non-splat vector constants. All undef causes trouble. ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond); - if (isa<ConstantSDNode>(N0.getNode()) && + if (isConstOrConstSplat(N0) && (DCI.isBeforeLegalizeOps() || isCondCodeLegal(SwappedCC, N0.getSimpleValueType()))) return DAG.getSetCC(dl, VT, N1, N0, SwappedCC); @@ -1737,7 +2201,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, EVT newVT = N0.getOperand(0).getValueType(); if (DCI.isBeforeLegalizeOps() || (isOperationLegal(ISD::SETCC, newVT) && - getCondCodeAction(Cond, newVT.getSimpleVT()) == Legal)) { + isCondCodeLegal(Cond, newVT.getSimpleVT()))) { EVT NewSetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), newVT); SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT); @@ -1867,8 +2331,18 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } } + if (SDValue V = + optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl)) + return V; + } + + // These simplifications apply to splat vectors as well. + // TODO: Handle more splat vector cases. + if (auto *N1C = isConstOrConstSplat(N1)) { + const APInt &C1 = N1C->getAPIntValue(); + APInt MinVal, MaxVal; - unsigned OperandBitSize = N1C->getValueType(0).getSizeInBits(); + unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits(); if (ISD::isSignedIntSetCC(Cond)) { MinVal = APInt::getSignedMinValue(OperandBitSize); MaxVal = APInt::getSignedMaxValue(OperandBitSize); @@ -1881,84 +2355,105 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (Cond == ISD::SETGE || Cond == ISD::SETUGE) { // X >= MIN --> true if (C1 == MinVal) - return DAG.getConstant(1, dl, VT); - - // X >= C0 --> X > (C0 - 1) - APInt C = C1 - 1; - ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT; - if ((DCI.isBeforeLegalizeOps() || - isCondCodeLegal(NewCC, VT.getSimpleVT())) && - (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 && - isLegalICmpImmediate(C.getSExtValue())))) { - return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(C, dl, N1.getValueType()), - NewCC); + return DAG.getBoolConstant(true, dl, VT, OpVT); + + if (!VT.isVector()) { // TODO: Support this for vectors. + // X >= C0 --> X > (C0 - 1) + APInt C = C1 - 1; + ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT; + if ((DCI.isBeforeLegalizeOps() || + isCondCodeLegal(NewCC, VT.getSimpleVT())) && + (!N1C->isOpaque() || (C.getBitWidth() <= 64 && + isLegalICmpImmediate(C.getSExtValue())))) { + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(C, dl, N1.getValueType()), + NewCC); + } } } if (Cond == ISD::SETLE || Cond == ISD::SETULE) { // X <= MAX --> true if (C1 == MaxVal) - return DAG.getConstant(1, dl, VT); + return DAG.getBoolConstant(true, dl, VT, OpVT); // X <= C0 --> X < (C0 + 1) - APInt C = C1 + 1; - ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT; - if ((DCI.isBeforeLegalizeOps() || - isCondCodeLegal(NewCC, VT.getSimpleVT())) && - (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 && - isLegalICmpImmediate(C.getSExtValue())))) { - return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(C, dl, N1.getValueType()), - NewCC); - } - } - - if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal) - return DAG.getConstant(0, dl, VT); // X < MIN --> false - if ((Cond == ISD::SETGE || Cond == ISD::SETUGE) && C1 == MinVal) - return DAG.getConstant(1, dl, VT); // X >= MIN --> true - if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal) - return DAG.getConstant(0, dl, VT); // X > MAX --> false - if ((Cond == ISD::SETLE || Cond == ISD::SETULE) && C1 == MaxVal) - return DAG.getConstant(1, dl, VT); // X <= MAX --> true - - // Canonicalize setgt X, Min --> setne X, Min - if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MinVal) - return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE); - // Canonicalize setlt X, Max --> setne X, Max - if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MaxVal) - return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE); - - // If we have setult X, 1, turn it into seteq X, 0 - if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1) - return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(MinVal, dl, N0.getValueType()), - ISD::SETEQ); - // If we have setugt X, Max-1, turn it into seteq X, Max - if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1) - return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(MaxVal, dl, N0.getValueType()), - ISD::SETEQ); + if (!VT.isVector()) { // TODO: Support this for vectors. + APInt C = C1 + 1; + ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT; + if ((DCI.isBeforeLegalizeOps() || + isCondCodeLegal(NewCC, VT.getSimpleVT())) && + (!N1C->isOpaque() || (C.getBitWidth() <= 64 && + isLegalICmpImmediate(C.getSExtValue())))) { + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(C, dl, N1.getValueType()), + NewCC); + } + } + } - // If we have "setcc X, C0", check to see if we can shrink the immediate - // by changing cc. + if (Cond == ISD::SETLT || Cond == ISD::SETULT) { + if (C1 == MinVal) + return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false + + // TODO: Support this for vectors after legalize ops. + if (!VT.isVector() || DCI.isBeforeLegalizeOps()) { + // Canonicalize setlt X, Max --> setne X, Max + if (C1 == MaxVal) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE); + + // If we have setult X, 1, turn it into seteq X, 0 + if (C1 == MinVal+1) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(MinVal, dl, N0.getValueType()), + ISD::SETEQ); + } + } - // SETUGT X, SINTMAX -> SETLT X, 0 - if (Cond == ISD::SETUGT && - C1 == APInt::getSignedMaxValue(OperandBitSize)) - return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(0, dl, N1.getValueType()), - ISD::SETLT); + if (Cond == ISD::SETGT || Cond == ISD::SETUGT) { + if (C1 == MaxVal) + return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false + + // TODO: Support this for vectors after legalize ops. + if (!VT.isVector() || DCI.isBeforeLegalizeOps()) { + // Canonicalize setgt X, Min --> setne X, Min + if (C1 == MinVal) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE); + + // If we have setugt X, Max-1, turn it into seteq X, Max + if (C1 == MaxVal-1) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(MaxVal, dl, N0.getValueType()), + ISD::SETEQ); + } + } - // SETULT X, SINTMIN -> SETGT X, -1 - if (Cond == ISD::SETULT && - C1 == APInt::getSignedMinValue(OperandBitSize)) { - SDValue ConstMinusOne = - DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), dl, - N1.getValueType()); - return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT); + // If we have "setcc X, C0", check to see if we can shrink the immediate + // by changing cc. + // TODO: Support this for vectors after legalize ops. + if (!VT.isVector() || DCI.isBeforeLegalizeOps()) { + // SETUGT X, SINTMAX -> SETLT X, 0 + if (Cond == ISD::SETUGT && + C1 == APInt::getSignedMaxValue(OperandBitSize)) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(0, dl, N1.getValueType()), + ISD::SETLT); + + // SETULT X, SINTMIN -> SETGT X, -1 + if (Cond == ISD::SETULT && + C1 == APInt::getSignedMinValue(OperandBitSize)) { + SDValue ConstMinusOne = + DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), dl, + N1.getValueType()); + return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT); + } } + } + + // Back to non-vector simplifications. + // TODO: Can we do these for vector splats? + if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) { + const APInt &C1 = N1C->getAPIntValue(); // Fold bit comparisons when we can. if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && @@ -1967,9 +2462,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, N0.getOpcode() == ISD::AND) { auto &DL = DAG.getDataLayout(); if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { - EVT ShiftTy = DCI.isBeforeLegalize() - ? getPointerTy(DL) - : getShiftAmountTy(N0.getValueType(), DL); + EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL, + !DCI.isBeforeLegalize()); if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3 // Perform the xform if the AND RHS is a single bit. if (AndRHS->getAPIntValue().isPowerOf2()) { @@ -2001,9 +2495,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) { unsigned ShiftBits = AndRHSC.countTrailingZeros(); auto &DL = DAG.getDataLayout(); - EVT ShiftTy = DCI.isBeforeLegalize() - ? getPointerTy(DL) - : getShiftAmountTy(N0.getValueType(), DL); + EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL, + !DCI.isBeforeLegalize()); EVT CmpTy = N0.getValueType(); SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0), DAG.getConstant(ShiftBits, dl, @@ -2033,9 +2526,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (ShiftBits && NewC.getMinSignedBits() <= 64 && isLegalICmpImmediate(NewC.getSExtValue())) { auto &DL = DAG.getDataLayout(); - EVT ShiftTy = DCI.isBeforeLegalize() - ? getPointerTy(DL) - : getShiftAmountTy(N0.getValueType(), DL); + EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL, + !DCI.isBeforeLegalize()); EVT CmpTy = N0.getValueType(); SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0, DAG.getConstant(ShiftBits, dl, ShiftTy)); @@ -2058,9 +2550,9 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, switch (ISD::getUnorderedFlavor(Cond)) { default: llvm_unreachable("Unknown flavor!"); case 0: // Known false. - return DAG.getConstant(0, dl, VT); + return DAG.getBoolConstant(false, dl, VT, OpVT); case 1: // Known true. - return DAG.getConstant(1, dl, VT); + return DAG.getBoolConstant(true, dl, VT, OpVT); case 2: // Undefined. return DAG.getUNDEF(VT); } @@ -2124,31 +2616,24 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (N0 == N1) { // The sext(setcc()) => setcc() optimization relies on the appropriate // constant being emitted. - uint64_t EqVal = 0; - switch (getBooleanContents(N0.getValueType())) { - case UndefinedBooleanContent: - case ZeroOrOneBooleanContent: - EqVal = ISD::isTrueWhenEqual(Cond); - break; - case ZeroOrNegativeOneBooleanContent: - EqVal = ISD::isTrueWhenEqual(Cond) ? -1 : 0; - break; - } + + bool EqTrue = ISD::isTrueWhenEqual(Cond); // We can always fold X == X for integer setcc's. - if (N0.getValueType().isInteger()) { - return DAG.getConstant(EqVal, dl, VT); - } + if (N0.getValueType().isInteger()) + return DAG.getBoolConstant(EqTrue, dl, VT, OpVT); + unsigned UOF = ISD::getUnorderedFlavor(Cond); if (UOF == 2) // FP operators that are undefined on NaNs. - return DAG.getConstant(EqVal, dl, VT); - if (UOF == unsigned(ISD::isTrueWhenEqual(Cond))) - return DAG.getConstant(EqVal, dl, VT); + return DAG.getBoolConstant(EqTrue, dl, VT, OpVT); + if (UOF == unsigned(EqTrue)) + return DAG.getBoolConstant(EqTrue, dl, VT, OpVT); // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO // if it is not already. ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO; - if (NewCond != Cond && (DCI.isBeforeLegalizeOps() || - getCondCodeAction(NewCond, N0.getSimpleValueType()) == Legal)) + if (NewCond != Cond && + (DCI.isBeforeLegalizeOps() || + isCondCodeLegal(NewCond, N0.getSimpleValueType()))) return DAG.getSetCC(dl, VT, N0, N1, NewCond); } @@ -2237,7 +2722,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, SDValue SH = DAG.getNode( ISD::SHL, dl, N1.getValueType(), N1, DAG.getConstant(1, dl, - getShiftAmountTy(N1.getValueType(), DL))); + getShiftAmountTy(N1.getValueType(), DL, + !DCI.isBeforeLegalize()))); if (!DCI.isCalledByLegalizer()) DCI.AddToWorklist(SH.getNode()); return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond); @@ -2262,7 +2748,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // X == (Z-X) --> X<<1 == Z SDValue SH = DAG.getNode( ISD::SHL, dl, N1.getValueType(), N0, - DAG.getConstant(1, dl, getShiftAmountTy(N0.getValueType(), DL))); + DAG.getConstant(1, dl, getShiftAmountTy(N0.getValueType(), DL, + !DCI.isBeforeLegalize()))); if (!DCI.isCalledByLegalizer()) DCI.AddToWorklist(SH.getNode()); return DAG.getSetCC(dl, VT, SH, N1.getOperand(0), Cond); @@ -2276,50 +2763,52 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // Fold away ALL boolean setcc's. SDValue Temp; - if (N0.getValueType() == MVT::i1 && foldBooleans) { + if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) { + EVT OpVT = N0.getValueType(); switch (Cond) { default: llvm_unreachable("Unknown integer setcc!"); case ISD::SETEQ: // X == Y -> ~(X^Y) - Temp = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1); - N0 = DAG.getNOT(dl, Temp, MVT::i1); + Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1); + N0 = DAG.getNOT(dl, Temp, OpVT); if (!DCI.isCalledByLegalizer()) DCI.AddToWorklist(Temp.getNode()); break; case ISD::SETNE: // X != Y --> (X^Y) - N0 = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1); + N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1); break; case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y - Temp = DAG.getNOT(dl, N0, MVT::i1); - N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N1, Temp); + Temp = DAG.getNOT(dl, N0, OpVT); + N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp); if (!DCI.isCalledByLegalizer()) DCI.AddToWorklist(Temp.getNode()); break; case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X - Temp = DAG.getNOT(dl, N1, MVT::i1); - N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N0, Temp); + Temp = DAG.getNOT(dl, N1, OpVT); + N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp); if (!DCI.isCalledByLegalizer()) DCI.AddToWorklist(Temp.getNode()); break; case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y - Temp = DAG.getNOT(dl, N0, MVT::i1); - N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N1, Temp); + Temp = DAG.getNOT(dl, N0, OpVT); + N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp); if (!DCI.isCalledByLegalizer()) DCI.AddToWorklist(Temp.getNode()); break; case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X - Temp = DAG.getNOT(dl, N1, MVT::i1); - N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N0, Temp); + Temp = DAG.getNOT(dl, N1, OpVT); + N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp); break; } - if (VT != MVT::i1) { + if (VT.getScalarType() != MVT::i1) { if (!DCI.isCalledByLegalizer()) DCI.AddToWorklist(N0.getNode()); // FIXME: If running after legalize, we probably can't do this. - N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0); + ISD::NodeType ExtendCode = getExtendForContent(getBooleanContents(OpVT)); + N0 = DAG.getNode(ExtendCode, dl, VT, N0); } return N0; } @@ -2928,7 +3417,7 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, } } -/// \brief Given an exact SDIV by a constant, create a multiplication +/// Given an exact SDIV by a constant, create a multiplication /// with the multiplicative inverse of the constant. static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d, const SDLoc &dl, SelectionDAG &DAG, @@ -2970,7 +3459,7 @@ SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, return SDValue(); } -/// \brief Given an ISD::SDIV node expressing a divide by constant, +/// Given an ISD::SDIV node expressing a divide by constant, /// return a DAG expression to select that will generate the same value by /// multiplying by a magic number. /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". @@ -3034,7 +3523,7 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor, return DAG.getNode(ISD::ADD, dl, VT, Q, T); } -/// \brief Given an ISD::UDIV node expressing a divide by constant, +/// Given an ISD::UDIV node expressing a divide by constant, /// return a DAG expression to select that will generate the same value by /// multiplying by a magic number. /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". @@ -3413,9 +3902,6 @@ SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD, return DAG.getMergeValues({ Value, NewChain }, SL); } -// FIXME: This relies on each element having a byte size, otherwise the stride -// is 0 and just overwrites the same location. ExpandStore currently expects -// this broken behavior. SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const { SDLoc SL(ST); @@ -3432,11 +3918,43 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST, // The type of data as saved in memory. EVT MemSclVT = StVT.getScalarType(); - // Store Stride in bytes - unsigned Stride = MemSclVT.getSizeInBits() / 8; EVT IdxVT = getVectorIdxTy(DAG.getDataLayout()); unsigned NumElem = StVT.getVectorNumElements(); + // A vector must always be stored in memory as-is, i.e. without any padding + // between the elements, since various code depend on it, e.g. in the + // handling of a bitcast of a vector type to int, which may be done with a + // vector store followed by an integer load. A vector that does not have + // elements that are byte-sized must therefore be stored as an integer + // built out of the extracted vector elements. + if (!MemSclVT.isByteSized()) { + unsigned NumBits = StVT.getSizeInBits(); + EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits); + + SDValue CurrVal = DAG.getConstant(0, SL, IntVT); + + for (unsigned Idx = 0; Idx < NumElem; ++Idx) { + SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value, + DAG.getConstant(Idx, SL, IdxVT)); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt); + SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc); + unsigned ShiftIntoIdx = + (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx); + SDValue ShiftAmount = + DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT); + SDValue ShiftedElt = + DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount); + CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt); + } + + return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(), + ST->getAlignment(), ST->getMemOperand()->getFlags(), + ST->getAAInfo()); + } + + // Store Stride in bytes + unsigned Stride = MemSclVT.getSizeInBits() / 8; + assert (Stride && "Zero stride!"); // Extract each of the elements from the original vector and save them into // memory individually. SmallVector<SDValue, 8> Stores; @@ -3475,6 +3993,8 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { if (!isOperationLegalOrCustom(ISD::LOAD, intVT)) { // Scalarize the load and let the individual components be handled. SDValue Scalarized = scalarizeVectorLoad(LD, DAG); + if (Scalarized->getOpcode() == ISD::MERGE_VALUES) + return std::make_pair(Scalarized.getOperand(0), Scalarized.getOperand(1)); return std::make_pair(Scalarized.getValue(0), Scalarized.getValue(1)); } |