diff options
Diffstat (limited to 'lib/CodeGen/SelectionDAG/SelectionDAG.cpp')
-rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 561 |
1 files changed, 438 insertions, 123 deletions
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index e225ba8703b7..003ea5030bfc 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -289,28 +289,28 @@ static int isSignedOp(ISD::CondCode Opcode) { } ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2, - bool isInteger) { - if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3) + bool IsInteger) { + if (IsInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3) // Cannot fold a signed integer setcc with an unsigned integer setcc. return ISD::SETCC_INVALID; unsigned Op = Op1 | Op2; // Combine all of the condition bits. - // If the N and U bits get set then the resultant comparison DOES suddenly - // care about orderedness, and is true when ordered. + // If the N and U bits get set, then the resultant comparison DOES suddenly + // care about orderedness, and it is true when ordered. if (Op > ISD::SETTRUE2) Op &= ~16; // Clear the U bit if the N bit is set. // Canonicalize illegal integer setcc's. - if (isInteger && Op == ISD::SETUNE) // e.g. SETUGT | SETULT + if (IsInteger && Op == ISD::SETUNE) // e.g. SETUGT | SETULT Op = ISD::SETNE; return ISD::CondCode(Op); } ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2, - bool isInteger) { - if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3) + bool IsInteger) { + if (IsInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3) // Cannot fold a signed setcc with an unsigned setcc. return ISD::SETCC_INVALID; @@ -318,7 +318,7 @@ ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2, ISD::CondCode Result = ISD::CondCode(Op1 & Op2); // Canonicalize illegal integer setcc's. - if (isInteger) { + if (IsInteger) { switch (Result) { default: break; case ISD::SETUO : Result = ISD::SETFALSE; break; // SETUGT & SETULT @@ -871,11 +871,13 @@ SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) DbgInfo = new SDDbgInfo(); } -void SelectionDAG::init(MachineFunction &mf) { - MF = &mf; +void SelectionDAG::init(MachineFunction &NewMF, + OptimizationRemarkEmitter &NewORE) { + MF = &NewMF; + ORE = &NewORE; TLI = getSubtarget().getTargetLowering(); TSI = getSubtarget().getSelectionDAGInfo(); - Context = &mf.getFunction()->getContext(); + Context = &MF->getFunction()->getContext(); } SelectionDAG::~SelectionDAG() { @@ -1994,8 +1996,6 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, /// them in the KnownZero/KnownOne bitsets. The DemandedElts argument allows /// us to only collect the known bits that are shared by the requested vector /// elements. -/// TODO: We only support DemandedElts on a few opcodes so far, the remainder -/// should be added when they become necessary. void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, APInt &KnownOne, const APInt &DemandedElts, unsigned Depth) const { @@ -2251,10 +2251,9 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, KnownZero2.countLeadingOnes(), BitWidth) - BitWidth; - TrailZ = std::min(TrailZ, BitWidth); - LeadZ = std::min(LeadZ, BitWidth); - KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) | - APInt::getHighBitsSet(BitWidth, LeadZ); + KnownZero.clearAllBits(); + KnownZero.setLowBits(std::min(TrailZ, BitWidth)); + KnownZero.setHighBits(std::min(LeadZ, BitWidth)); break; } case ISD::UDIV: { @@ -2272,7 +2271,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, LeadZ = std::min(BitWidth, LeadZ + BitWidth - RHSUnknownLeadingOnes - 1); - KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ); + KnownZero.setHighBits(LeadZ); break; } case ISD::SELECT: @@ -2297,10 +2296,6 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, KnownOne &= KnownOne2; KnownZero &= KnownZero2; break; - case ISD::SADDO: - case ISD::UADDO: - case ISD::SSUBO: - case ISD::USUBO: case ISD::SMULO: case ISD::UMULO: if (Op.getResNo() != 1) @@ -2312,14 +2307,14 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, if (TLI->getBooleanContents(Op.getValueType().isVector(), false) == TargetLowering::ZeroOrOneBooleanContent && BitWidth > 1) - KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); + KnownZero.setBitsFrom(1); break; case ISD::SETCC: // If we know the result of a setcc has the top bits zero, use this info. if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) == TargetLowering::ZeroOrOneBooleanContent && BitWidth > 1) - KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); + KnownZero.setBitsFrom(1); break; case ISD::SHL: if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) { @@ -2328,7 +2323,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, KnownZero = KnownZero << *ShAmt; KnownOne = KnownOne << *ShAmt; // Low bits are known zero. - KnownZero |= APInt::getLowBitsSet(BitWidth, ShAmt->getZExtValue()); + KnownZero.setLowBits(ShAmt->getZExtValue()); } break; case ISD::SRL: @@ -2338,8 +2333,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, KnownZero = KnownZero.lshr(*ShAmt); KnownOne = KnownOne.lshr(*ShAmt); // High bits are known zero. - APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt->getZExtValue()); - KnownZero |= HighBits; + KnownZero.setHighBits(ShAmt->getZExtValue()); } break; case ISD::SRA: @@ -2350,13 +2344,12 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, KnownOne = KnownOne.lshr(*ShAmt); // If we know the value of the sign bit, then we know it is copied across // the high bits by the shift amount. - APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt->getZExtValue()); APInt SignBit = APInt::getSignBit(BitWidth); SignBit = SignBit.lshr(*ShAmt); // Adjust to where it is now in the mask. if (KnownZero.intersects(SignBit)) { - KnownZero |= HighBits; // New bits are known zero. + KnownZero.setHighBits(ShAmt->getZExtValue());// New bits are known zero. } else if (KnownOne.intersects(SignBit)) { - KnownOne |= HighBits; // New bits are known one. + KnownOne.setHighBits(ShAmt->getZExtValue()); // New bits are known one. } } break; @@ -2401,9 +2394,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: case ISD::CTPOP: { - unsigned LowBits = Log2_32(BitWidth)+1; - KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); - KnownOne.clearAllBits(); + KnownZero.setBitsFrom(Log2_32(BitWidth)+1); break; } case ISD::LOAD: { @@ -2412,26 +2403,39 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) { EVT VT = LD->getMemoryVT(); unsigned MemBits = VT.getScalarSizeInBits(); - KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); + KnownZero.setBitsFrom(MemBits); } else if (const MDNode *Ranges = LD->getRanges()) { if (LD->getExtensionType() == ISD::NON_EXTLOAD) computeKnownBitsFromRangeMetadata(*Ranges, KnownZero, KnownOne); } break; } + case ISD::ZERO_EXTEND_VECTOR_INREG: { + EVT InVT = Op.getOperand(0).getValueType(); + unsigned InBits = InVT.getScalarSizeInBits(); + KnownZero = KnownZero.trunc(InBits); + KnownOne = KnownOne.trunc(InBits); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, + DemandedElts.zext(InVT.getVectorNumElements()), + Depth + 1); + KnownZero = KnownZero.zext(BitWidth); + KnownOne = KnownOne.zext(BitWidth); + KnownZero.setBitsFrom(InBits); + break; + } case ISD::ZERO_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarSizeInBits(); - APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits); KnownZero = KnownZero.trunc(InBits); KnownOne = KnownOne.trunc(InBits); computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, Depth + 1); KnownZero = KnownZero.zext(BitWidth); KnownOne = KnownOne.zext(BitWidth); - KnownZero |= NewBits; + KnownZero.setBitsFrom(InBits); break; } + // TODO ISD::SIGN_EXTEND_VECTOR_INREG case ISD::SIGN_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarSizeInBits(); @@ -2478,10 +2482,21 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, } case ISD::FGETSIGN: // All bits are zero except the low bit. - KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - 1); + KnownZero.setBitsFrom(1); break; - - case ISD::SUB: { + case ISD::USUBO: + case ISD::SSUBO: + if (Op.getResNo() == 1) { + // If we know the result of a setcc has the top bits zero, use this info. + if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) == + TargetLowering::ZeroOrOneBooleanContent && + BitWidth > 1) + KnownZero.setBitsFrom(1); + break; + } + LLVM_FALLTHROUGH; + case ISD::SUB: + case ISD::SUBC: { if (ConstantSDNode *CLHS = isConstOrConstSplat(Op.getOperand(0))) { // We know that the top bits of C-X are clear if X contains less bits // than C (i.e. no wrap-around can happen). For example, 20-X is @@ -2499,13 +2514,40 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, if ((KnownZero2 & MaskV) == MaskV) { unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros(); // Top bits known zero. - KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2); + KnownZero.setHighBits(NLZ2); } } } - LLVM_FALLTHROUGH; + + // If low bits are know to be zero in both operands, then we know they are + // going to be 0 in the result. Both addition and complement operations + // preserve the low zero bits. + computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, + Depth + 1); + unsigned KnownZeroLow = KnownZero2.countTrailingOnes(); + if (KnownZeroLow == 0) + break; + + computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts, + Depth + 1); + KnownZeroLow = std::min(KnownZeroLow, + KnownZero2.countTrailingOnes()); + KnownZero.setBits(0, KnownZeroLow); + break; } + case ISD::UADDO: + case ISD::SADDO: + if (Op.getResNo() == 1) { + // If we know the result of a setcc has the top bits zero, use this info. + if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) == + TargetLowering::ZeroOrOneBooleanContent && + BitWidth > 1) + KnownZero.setBitsFrom(1); + break; + } + LLVM_FALLTHROUGH; case ISD::ADD: + case ISD::ADDC: case ISD::ADDE: { // Output known-0 bits are known if clear or set in both the low clear bits // common to both LHS & RHS. For example, 8+(X<<3) is known to have the @@ -2526,19 +2568,19 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, KnownZeroLow = std::min(KnownZeroLow, KnownZero2.countTrailingOnes()); - if (Opcode == ISD::ADD) { - KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroLow); - if (KnownZeroHigh > 1) - KnownZero |= APInt::getHighBitsSet(BitWidth, KnownZeroHigh - 1); + if (Opcode == ISD::ADDE) { + // With ADDE, a carry bit may be added in, so we can only use this + // information if we know (at least) that the low two bits are clear. + // We then return to the caller that the low bit is unknown but that + // other bits are known zero. + if (KnownZeroLow >= 2) + KnownZero.setBits(1, KnownZeroLow); break; } - // With ADDE, a carry bit may be added in, so we can only use this - // information if we know (at least) that the low two bits are clear. We - // then return to the caller that the low bit is unknown but that other bits - // are known zero. - if (KnownZeroLow >= 2) // ADDE - KnownZero |= APInt::getBitsSet(BitWidth, 1, KnownZeroLow); + KnownZero.setLowBits(KnownZeroLow); + if (KnownZeroHigh > 1) + KnownZero.setHighBits(KnownZeroHigh - 1); break; } case ISD::SREM: @@ -2591,7 +2633,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, uint32_t Leaders = std::max(KnownZero.countLeadingOnes(), KnownZero2.countLeadingOnes()); KnownOne.clearAllBits(); - KnownZero = APInt::getHighBitsSet(BitWidth, Leaders); + KnownZero.clearAllBits(); + KnownZero.setHighBits(Leaders); break; } case ISD::EXTRACT_ELEMENT: { @@ -2673,6 +2716,13 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, } break; } + case ISD::BITREVERSE: { + computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, + Depth + 1); + KnownZero = KnownZero2.reverseBits(); + KnownOne = KnownOne2.reverseBits(); + break; + } case ISD::BSWAP: { computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, Depth + 1); @@ -2680,12 +2730,62 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, KnownOne = KnownOne2.byteSwap(); break; } - case ISD::SMIN: - case ISD::SMAX: - case ISD::UMIN: + case ISD::ABS: { + computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts, + Depth + 1); + + // If the source's MSB is zero then we know the rest of the bits already. + if (KnownZero2[BitWidth - 1]) { + KnownZero = KnownZero2; + KnownOne = KnownOne2; + break; + } + + // We only know that the absolute values's MSB will be zero iff there is + // a set bit that isn't the sign bit (otherwise it could be INT_MIN). + KnownOne2.clearBit(BitWidth - 1); + if (KnownOne2.getBoolValue()) { + KnownZero = APInt::getSignBit(BitWidth); + break; + } + break; + } + case ISD::UMIN: { + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, + Depth + 1); + computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts, + Depth + 1); + + // UMIN - we know that the result will have the maximum of the + // known zero leading bits of the inputs. + unsigned LeadZero = KnownZero.countLeadingOnes(); + LeadZero = std::max(LeadZero, KnownZero2.countLeadingOnes()); + + KnownZero &= KnownZero2; + KnownOne &= KnownOne2; + KnownZero.setHighBits(LeadZero); + break; + } case ISD::UMAX: { computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, Depth + 1); + computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts, + Depth + 1); + + // UMAX - we know that the result will have the maximum of the + // known one leading bits of the inputs. + unsigned LeadOne = KnownOne.countLeadingOnes(); + LeadOne = std::max(LeadOne, KnownOne2.countLeadingOnes()); + + KnownZero &= KnownZero2; + KnownOne &= KnownOne2; + KnownOne.setHighBits(LeadOne); + break; + } + case ISD::SMIN: + case ISD::SMAX: { + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts, + Depth + 1); // If we don't know any bits, early out. if (!KnownOne && !KnownZero) break; @@ -2699,7 +2799,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, case ISD::TargetFrameIndex: if (unsigned Align = InferPtrAlignment(Op)) { // The low bits are known zero if the pointer is aligned. - KnownZero = APInt::getLowBitsSet(BitWidth, Log2_32(Align)); + KnownZero.setLowBits(Log2_32(Align)); break; } break; @@ -2712,13 +2812,48 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, case ISD::INTRINSIC_W_CHAIN: case ISD::INTRINSIC_VOID: // Allow the target to implement this method for its nodes. - TLI->computeKnownBitsForTargetNode(Op, KnownZero, KnownOne, *this, Depth); + TLI->computeKnownBitsForTargetNode(Op, KnownZero, KnownOne, DemandedElts, + *this, Depth); break; } assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); } +SelectionDAG::OverflowKind SelectionDAG::computeOverflowKind(SDValue N0, + SDValue N1) const { + // X + 0 never overflow + if (isNullConstant(N1)) + return OFK_Never; + + APInt N1Zero, N1One; + computeKnownBits(N1, N1Zero, N1One); + if (N1Zero.getBoolValue()) { + APInt N0Zero, N0One; + computeKnownBits(N0, N0Zero, N0One); + + bool overflow; + (~N0Zero).uadd_ov(~N1Zero, overflow); + if (!overflow) + return OFK_Never; + } + + // mulhi + 1 never overflow + if (N0.getOpcode() == ISD::UMUL_LOHI && N0.getResNo() == 1 && + (~N1Zero & 0x01) == ~N1Zero) + return OFK_Never; + + if (N1.getOpcode() == ISD::UMUL_LOHI && N1.getResNo() == 1) { + APInt N0Zero, N0One; + computeKnownBits(N0, N0Zero, N0One); + + if ((~N0Zero & 0x01) == ~N0Zero) + return OFK_Never; + } + + return OFK_Sometime; +} + bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const { EVT OpVT = Val.getValueType(); unsigned BitWidth = OpVT.getScalarSizeInBits(); @@ -2745,7 +2880,7 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const { // Are all operands of a build vector constant powers of two? if (Val.getOpcode() == ISD::BUILD_VECTOR) - if (llvm::all_of(Val->ops(), [this, BitWidth](SDValue E) { + if (llvm::all_of(Val->ops(), [BitWidth](SDValue E) { if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(E)) return C->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2(); return false; @@ -2764,6 +2899,15 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const { unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const { EVT VT = Op.getValueType(); + APInt DemandedElts = VT.isVector() + ? APInt::getAllOnesValue(VT.getVectorNumElements()) + : APInt(1, 1); + return ComputeNumSignBits(Op, DemandedElts, Depth); +} + +unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, + unsigned Depth) const { + EVT VT = Op.getValueType(); assert(VT.isInteger() && "Invalid VT!"); unsigned VTBits = VT.getScalarSizeInBits(); unsigned Tmp, Tmp2; @@ -2772,6 +2916,9 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const { if (Depth == 6) return 1; // Limit search depth. + if (!DemandedElts) + return 1; // No demanded elts, better to assume we don't know anything. + switch (Op.getOpcode()) { default: break; case ISD::AssertSext: @@ -2786,7 +2933,28 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const { return Val.getNumSignBits(); } + case ISD::BUILD_VECTOR: + Tmp = VTBits; + for (unsigned i = 0, e = Op.getNumOperands(); (i < e) && (Tmp > 1); ++i) { + if (!DemandedElts[i]) + continue; + + SDValue SrcOp = Op.getOperand(i); + Tmp2 = ComputeNumSignBits(Op.getOperand(i), Depth + 1); + + // BUILD_VECTOR can implicitly truncate sources, we must handle this. + if (SrcOp.getValueSizeInBits() != VTBits) { + assert(SrcOp.getValueSizeInBits() > VTBits && + "Expected BUILD_VECTOR implicit truncation"); + unsigned ExtraBits = SrcOp.getValueSizeInBits() - VTBits; + Tmp2 = (Tmp2 > ExtraBits ? Tmp2 - ExtraBits : 1); + } + Tmp = std::min(Tmp, Tmp2); + } + return Tmp; + case ISD::SIGN_EXTEND: + case ISD::SIGN_EXTEND_VECTOR_INREG: Tmp = VTBits - Op.getOperand(0).getScalarValueSizeInBits(); return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp; @@ -2799,7 +2967,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const { return std::max(Tmp, Tmp2); case ISD::SRA: - Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1); // SRA X, C -> adds C sign bits. if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1))) { APInt ShiftVal = C->getAPIntValue(); @@ -2887,6 +3055,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const { } break; case ISD::ADD: + case ISD::ADDC: // Add can have at most one carry bit. Thus we know that the output // is, at worst, one more bit than the inputs. Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); @@ -2961,19 +3130,63 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const { // result. Otherwise it gives either negative or > bitwidth result return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0); } + case ISD::INSERT_VECTOR_ELT: { + SDValue InVec = Op.getOperand(0); + SDValue InVal = Op.getOperand(1); + SDValue EltNo = Op.getOperand(2); + unsigned NumElts = InVec.getValueType().getVectorNumElements(); + + ConstantSDNode *CEltNo = dyn_cast<ConstantSDNode>(EltNo); + if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)) { + // If we know the element index, split the demand between the + // source vector and the inserted element. + unsigned EltIdx = CEltNo->getZExtValue(); + + // If we demand the inserted element then get its sign bits. + Tmp = UINT_MAX; + if (DemandedElts[EltIdx]) + Tmp = ComputeNumSignBits(InVal, Depth + 1); + + // If we demand the source vector then get its sign bits, and determine + // the minimum. + APInt VectorElts = DemandedElts; + VectorElts.clearBit(EltIdx); + if (!!VectorElts) { + Tmp2 = ComputeNumSignBits(InVec, VectorElts, Depth + 1); + Tmp = std::min(Tmp, Tmp2); + } + } else { + // Unknown element index, so ignore DemandedElts and demand them all. + Tmp = ComputeNumSignBits(InVec, Depth + 1); + Tmp2 = ComputeNumSignBits(InVal, Depth + 1); + Tmp = std::min(Tmp, Tmp2); + } + assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); + return Tmp; + } case ISD::EXTRACT_VECTOR_ELT: { - // At the moment we keep this simple and skip tracking the specific - // element. This way we get the lowest common denominator for all elements - // of the vector. - // TODO: get information for given vector element + SDValue InVec = Op.getOperand(0); + SDValue EltNo = Op.getOperand(1); + EVT VecVT = InVec.getValueType(); const unsigned BitWidth = Op.getValueSizeInBits(); const unsigned EltBitWidth = Op.getOperand(0).getScalarValueSizeInBits(); + const unsigned NumSrcElts = VecVT.getVectorNumElements(); + // If BitWidth > EltBitWidth the value is anyext:ed, and we do not know // anything about sign bits. But if the sizes match we can derive knowledge // about sign bits from the vector operand. - if (BitWidth == EltBitWidth) - return ComputeNumSignBits(Op.getOperand(0), Depth+1); - break; + if (BitWidth != EltBitWidth) + break; + + // If we know the element index, just demand that vector element, else for + // an unknown element index, ignore DemandedElts and demand them all. + APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts); + ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo); + if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts)) + DemandedSrcElts = + APInt::getOneBitSet(NumSrcElts, ConstEltNo->getZExtValue()); + + return ComputeNumSignBits(InVec, DemandedSrcElts, Depth + 1); } case ISD::EXTRACT_SUBVECTOR: return ComputeNumSignBits(Op.getOperand(0), Depth + 1); @@ -3008,14 +3221,16 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const { Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) { - unsigned NumBits = TLI->ComputeNumSignBitsForTargetNode(Op, *this, Depth); - if (NumBits > 1) FirstAnswer = std::max(FirstAnswer, NumBits); + unsigned NumBits = + TLI->ComputeNumSignBitsForTargetNode(Op, DemandedElts, *this, Depth); + if (NumBits > 1) + FirstAnswer = std::max(FirstAnswer, NumBits); } // Finally, if we can prove that the top bits of the result are 0's or 1's, // use this information. APInt KnownZero, KnownOne; - computeKnownBits(Op, KnownZero, KnownOne, Depth); + computeKnownBits(Op, KnownZero, KnownOne, DemandedElts, Depth); APInt Mask; if (KnownZero.isNegative()) { // sign bit is 0 @@ -3054,6 +3269,9 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op) const { if (getTarget().Options.NoNaNsFPMath) return true; + if (const BinaryWithFlagsSDNode *BF = dyn_cast<BinaryWithFlagsSDNode>(Op)) + return BF->Flags.hasNoNaNs(); + // If the value is a constant, we can obviously see if it is a NaN or not. if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op)) return !C->getValueAPF().isNaN(); @@ -3206,6 +3424,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (VT == MVT::f128 && C->getValueType(0) == MVT::i128) return getConstantFP(APFloat(APFloat::IEEEquad(), Val), DL, VT); break; + case ISD::ABS: + return getConstant(Val.abs(), DL, VT, C->isTargetOpcode(), + C->isOpaque()); + case ISD::BITREVERSE: + return getConstant(Val.reverseBits(), DL, VT, C->isTargetOpcode(), + C->isOpaque()); case ISD::BSWAP: return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(), C->isOpaque()); @@ -3220,6 +3444,17 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::CTTZ_ZERO_UNDEF: return getConstant(Val.countTrailingZeros(), DL, VT, C->isTargetOpcode(), C->isOpaque()); + case ISD::FP16_TO_FP: { + bool Ignored; + APFloat FPV(APFloat::IEEEhalf(), + (Val.getBitWidth() == 16) ? Val : Val.trunc(16)); + + // This can return overflow, underflow, or inexact; we don't care. + // FIXME need to be more flexible about rounding mode. + (void)FPV.convert(EVTToAPFloatSemantics(VT), + APFloat::rmNearestTiesToEven, &Ignored); + return getConstantFP(FPV, DL, VT); + } } } @@ -3261,17 +3496,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, } case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: { - integerPart x[2]; bool ignored; - static_assert(integerPartWidth >= 64, "APFloat parts too small!"); + APSInt IntVal(VT.getSizeInBits(), Opcode == ISD::FP_TO_UINT); // FIXME need to be more flexible about rounding mode. - APFloat::opStatus s = V.convertToInteger(x, VT.getSizeInBits(), - Opcode==ISD::FP_TO_SINT, - APFloat::rmTowardZero, &ignored); - if (s==APFloat::opInvalidOp) // inexact is OK, in fact usual + APFloat::opStatus s = + V.convertToInteger(IntVal, APFloat::rmTowardZero, &ignored); + if (s == APFloat::opInvalidOp) // inexact is OK, in fact usual break; - APInt api(VT.getSizeInBits(), x); - return getConstant(api, DL, VT); + return getConstant(IntVal, DL, VT); } case ISD::BITCAST: if (VT == MVT::i16 && C->getValueType(0) == MVT::f16) @@ -3281,6 +3513,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64) return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT); break; + case ISD::FP_TO_FP16: { + bool Ignored; + // This can return overflow, underflow, or inexact; we don't care. + // FIXME need to be more flexible about rounding mode. + (void)V.convert(APFloat::IEEEhalf(), + APFloat::rmNearestTiesToEven, &Ignored); + return getConstant(V.bitcastToAPInt(), DL, VT); + } } } @@ -3303,6 +3543,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::TRUNCATE: case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: + case ISD::ABS: + case ISD::BITREVERSE: case ISD::BSWAP: case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: @@ -3420,6 +3662,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (OpOpcode == ISD::UNDEF) return getUNDEF(VT); break; + case ISD::ABS: + assert(VT.isInteger() && VT == Operand.getValueType() && + "Invalid ABS!"); + if (OpOpcode == ISD::UNDEF) + return getUNDEF(VT); + break; case ISD::BSWAP: assert(VT.isInteger() && VT == Operand.getValueType() && "Invalid BSWAP!"); @@ -3569,6 +3817,30 @@ SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT, GA->getOffset() + uint64_t(Offset)); } +bool SelectionDAG::isUndef(unsigned Opcode, ArrayRef<SDValue> Ops) { + switch (Opcode) { + case ISD::SDIV: + case ISD::UDIV: + case ISD::SREM: + case ISD::UREM: { + // If a divisor is zero/undef or any element of a divisor vector is + // zero/undef, the whole op is undef. + assert(Ops.size() == 2 && "Div/rem should have 2 operands"); + SDValue Divisor = Ops[1]; + if (Divisor.isUndef() || isNullConstant(Divisor)) + return true; + + return ISD::isBuildVectorOfConstantSDNodes(Divisor.getNode()) && + any_of(Divisor->op_values(), + [](SDValue V) { return V.isUndef() || isNullConstant(V); }); + // TODO: Handle signed overflow. + } + // TODO: Handle oversized shifts. + default: + return false; + } +} + SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, SDNode *Cst1, SDNode *Cst2) { @@ -3578,6 +3850,9 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, if (Opcode >= ISD::BUILTIN_OP_END) return SDValue(); + if (isUndef(Opcode, {SDValue(Cst1, 0), SDValue(Cst2, 0)})) + return getUNDEF(VT); + // Handle the case of two scalars. if (const ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1)) { if (const ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2)) { @@ -3645,6 +3920,9 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, if (Opcode >= ISD::BUILTIN_OP_END) return SDValue(); + if (isUndef(Opcode, Ops)) + return getUNDEF(VT); + // We can only fold vectors - maybe merge with FoldConstantArithmetic someday? if (!VT.isVector()) return SDValue(); @@ -3676,7 +3954,7 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, // Find legal integer scalar type for constant promotion and // ensure that its scalar size is at least as large as source. EVT LegalSVT = VT.getScalarType(); - if (LegalSVT.isInteger()) { + if (NewNodesMustHaveLegalTypes && LegalSVT.isInteger()) { LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT); if (LegalSVT.bitsLT(VT.getScalarType())) return SDValue(); @@ -3910,35 +4188,31 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, assert(EVT.bitsLE(VT) && "Not extending!"); if (EVT == VT) return N1; // Not actually extending - auto SignExtendInReg = [&](APInt Val) { + auto SignExtendInReg = [&](APInt Val, llvm::EVT ConstantVT) { unsigned FromBits = EVT.getScalarSizeInBits(); Val <<= Val.getBitWidth() - FromBits; Val = Val.ashr(Val.getBitWidth() - FromBits); - return getConstant(Val, DL, VT.getScalarType()); + return getConstant(Val, DL, ConstantVT); }; if (N1C) { const APInt &Val = N1C->getAPIntValue(); - return SignExtendInReg(Val); + return SignExtendInReg(Val, VT); } if (ISD::isBuildVectorOfConstantSDNodes(N1.getNode())) { SmallVector<SDValue, 8> Ops; + llvm::EVT OpVT = N1.getOperand(0).getValueType(); for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) { SDValue Op = N1.getOperand(i); if (Op.isUndef()) { - Ops.push_back(getUNDEF(VT.getScalarType())); + Ops.push_back(getUNDEF(OpVT)); continue; } - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { - APInt Val = C->getAPIntValue(); - Val = Val.zextOrTrunc(VT.getScalarSizeInBits()); - Ops.push_back(SignExtendInReg(Val)); - continue; - } - break; + ConstantSDNode *C = cast<ConstantSDNode>(Op); + APInt Val = C->getAPIntValue(); + Ops.push_back(SignExtendInReg(Val, OpVT)); } - if (Ops.size() == VT.getVectorNumElements()) - return getBuildVector(VT, DL, Ops); + return getBuildVector(VT, DL, Ops); } break; } @@ -4040,6 +4314,19 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (VT.getSimpleVT() == N1.getSimpleValueType()) return N1; + // EXTRACT_SUBVECTOR of an UNDEF is an UNDEF. + if (N1.isUndef()) + return getUNDEF(VT); + + // EXTRACT_SUBVECTOR of CONCAT_VECTOR can be simplified if the pieces of + // the concat have the same type as the extract. + if (N2C && N1.getOpcode() == ISD::CONCAT_VECTORS && + N1.getNumOperands() > 0 && + VT == N1.getOperand(0).getValueType()) { + unsigned Factor = VT.getVectorNumElements(); + return N1.getOperand(N2C->getZExtValue() / Factor); + } + // EXTRACT_SUBVECTOR of INSERT_SUBVECTOR is often created // during shuffle legalization. if (N1.getOpcode() == ISD::INSERT_SUBVECTOR && N2 == N1.getOperand(2) && @@ -4943,11 +5230,11 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, TargetLowering::CallLoweringInfo CLI(*this); CLI.setDebugLoc(dl) .setChain(Chain) - .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMCPY), - Dst.getValueType().getTypeForEVT(*getContext()), - getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY), - TLI->getPointerTy(getDataLayout())), - std::move(Args)) + .setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMCPY), + Dst.getValueType().getTypeForEVT(*getContext()), + getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY), + TLI->getPointerTy(getDataLayout())), + std::move(Args)) .setDiscardResult() .setTailCall(isTailCall); @@ -5004,11 +5291,11 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, TargetLowering::CallLoweringInfo CLI(*this); CLI.setDebugLoc(dl) .setChain(Chain) - .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMMOVE), - Dst.getValueType().getTypeForEVT(*getContext()), - getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE), - TLI->getPointerTy(getDataLayout())), - std::move(Args)) + .setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMMOVE), + Dst.getValueType().getTypeForEVT(*getContext()), + getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE), + TLI->getPointerTy(getDataLayout())), + std::move(Args)) .setDiscardResult() .setTailCall(isTailCall); @@ -5066,11 +5353,11 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, TargetLowering::CallLoweringInfo CLI(*this); CLI.setDebugLoc(dl) .setChain(Chain) - .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET), - Dst.getValueType().getTypeForEVT(*getContext()), - getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET), - TLI->getPointerTy(getDataLayout())), - std::move(Args)) + .setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET), + Dst.getValueType().getTypeForEVT(*getContext()), + getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET), + TLI->getPointerTy(getDataLayout())), + std::move(Args)) .setDiscardResult() .setTailCall(isTailCall); @@ -7049,6 +7336,21 @@ bool SDNode::isOnlyUserOf(const SDNode *N) const { return Seen; } +/// Return true if the only users of N are contained in Nodes. +bool SDNode::areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N) { + bool Seen = false; + for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) { + SDNode *User = *I; + if (llvm::any_of(Nodes, + [&User](const SDNode *Node) { return User == Node; })) + Seen = true; + else + return false; + } + + return Seen; +} + /// isOperand - Return true if this node is an operand of N. /// bool SDValue::isOperandOf(const SDNode *N) const { @@ -7070,21 +7372,39 @@ bool SDNode::isOperandOf(const SDNode *N) const { /// side-effecting instructions on any chain path. In practice, this looks /// through token factors and non-volatile loads. In order to remain efficient, /// this only looks a couple of nodes in, it does not do an exhaustive search. +/// +/// Note that we only need to examine chains when we're searching for +/// side-effects; SelectionDAG requires that all side-effects are represented +/// by chains, even if another operand would force a specific ordering. This +/// constraint is necessary to allow transformations like splitting loads. bool SDValue::reachesChainWithoutSideEffects(SDValue Dest, - unsigned Depth) const { + unsigned Depth) const { if (*this == Dest) return true; // Don't search too deeply, we just want to be able to see through // TokenFactor's etc. if (Depth == 0) return false; - // If this is a token factor, all inputs to the TF happen in parallel. If any - // of the operands of the TF does not reach dest, then we cannot do the xform. + // If this is a token factor, all inputs to the TF happen in parallel. if (getOpcode() == ISD::TokenFactor) { - for (unsigned i = 0, e = getNumOperands(); i != e; ++i) - if (!getOperand(i).reachesChainWithoutSideEffects(Dest, Depth-1)) - return false; - return true; + // First, try a shallow search. + if (is_contained((*this)->ops(), Dest)) { + // We found the chain we want as an operand of this TokenFactor. + // Essentially, we reach the chain without side-effects if we could + // serialize the TokenFactor into a simple chain of operations with + // Dest as the last operation. This is automatically true if the + // chain has one use: there are no other ordering constraints. + // If the chain has more than one use, we give up: some other + // use of Dest might force a side-effect between Dest and the current + // node. + if (Dest.hasOneUse()) + return true; + } + // Next, try a deep search: check whether every operand of the TokenFactor + // reaches Dest. + return all_of((*this)->ops(), [=](SDValue Op) { + return Op.reachesChainWithoutSideEffects(Dest, Depth - 1); + }); } // Loads don't have side effects, look through them. @@ -7102,11 +7422,6 @@ bool SDNode::hasPredecessor(const SDNode *N) const { return hasPredecessorHelper(N, Visited, Worklist); } -uint64_t SDNode::getConstantOperandVal(unsigned Num) const { - assert(Num < NumOperands && "Invalid child # of SDNode!"); - return cast<ConstantSDNode>(OperandList[Num])->getZExtValue(); -} - const SDNodeFlags *SDNode::getFlags() const { if (auto *FlagsNode = dyn_cast<BinaryWithFlagsSDNode>(this)) return &FlagsNode->Flags; @@ -7377,13 +7692,13 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, unsigned BitPos = j * EltBitSize; if (OpVal.isUndef()) - SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos + EltBitSize); + SplatUndef.setBits(BitPos, BitPos + EltBitSize); else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) - SplatValue |= CN->getAPIntValue().zextOrTrunc(EltBitSize). - zextOrTrunc(sz) << BitPos; + SplatValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltBitSize), + BitPos); else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) - SplatValue |= CN->getValueAPF().bitcastToAPInt().zextOrTrunc(sz) <<BitPos; - else + SplatValue.insertBits(CN->getValueAPF().bitcastToAPInt(), BitPos); + else return false; } |