diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 888 |
1 files changed, 776 insertions, 112 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index f6d1fa87676f..a6b471ea22b7 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -13,13 +13,13 @@ #include "llvm/CodeGen/TargetLowering.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/CodeGenCommonISel.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetRegisterInfo.h" -#include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalVariable.h" @@ -30,7 +30,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include <cctype> using namespace llvm; @@ -94,6 +93,8 @@ bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI, // (We look for a CopyFromReg reading a virtual register that is used // for the function live-in value of register Reg) SDValue Value = OutVals[I]; + if (Value->getOpcode() == ISD::AssertZext) + Value = Value.getOperand(0); if (Value->getOpcode() != ISD::CopyFromReg) return false; Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg(); @@ -121,7 +122,7 @@ void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call, IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError); Alignment = Call->getParamStackAlign(ArgIdx); IndirectType = nullptr; - assert(IsByVal + IsPreallocated + IsInAlloca <= 1 && + assert(IsByVal + IsPreallocated + IsInAlloca + IsSRet <= 1 && "multiple ABI attributes?"); if (IsByVal) { IndirectType = Call->getParamByValType(ArgIdx); @@ -132,6 +133,8 @@ void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call, IndirectType = Call->getParamPreallocatedType(ArgIdx); if (IsInAlloca) IndirectType = Call->getParamInAllocaType(ArgIdx); + if (IsSRet) + IndirectType = Call->getParamStructRetType(ArgIdx); } /// Generate a libcall taking the given operands as arguments and returning a @@ -193,7 +196,8 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, bool TargetLowering::findOptimalMemOpLowering( std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const { - if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign()) + if (Limit != ~unsigned(0) && Op.isMemcpyWithFixedDstAlign() && + Op.getSrcAlign() < Op.getDstAlign()) return false; EVT VT = getOptimalMemOpType(Op, FuncAttributes); @@ -905,6 +909,132 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts( Depth); } +// Attempt to form ext(avgfloor(A, B)) from shr(add(ext(A), ext(B)), 1). +// or to form ext(avgceil(A, B)) from shr(add(ext(A), ext(B), 1), 1). +static SDValue combineShiftToAVG(SDValue Op, SelectionDAG &DAG, + const TargetLowering &TLI, + const APInt &DemandedBits, + const APInt &DemandedElts, + unsigned Depth) { + assert((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SRA) && + "SRL or SRA node is required here!"); + // Is the right shift using an immediate value of 1? + ConstantSDNode *N1C = isConstOrConstSplat(Op.getOperand(1), DemandedElts); + if (!N1C || !N1C->isOne()) + return SDValue(); + + // We are looking for an avgfloor + // add(ext, ext) + // or one of these as a avgceil + // add(add(ext, ext), 1) + // add(add(ext, 1), ext) + // add(ext, add(ext, 1)) + SDValue Add = Op.getOperand(0); + if (Add.getOpcode() != ISD::ADD) + return SDValue(); + + SDValue ExtOpA = Add.getOperand(0); + SDValue ExtOpB = Add.getOperand(1); + auto MatchOperands = [&](SDValue Op1, SDValue Op2, SDValue Op3) { + ConstantSDNode *ConstOp; + if ((ConstOp = isConstOrConstSplat(Op1, DemandedElts)) && + ConstOp->isOne()) { + ExtOpA = Op2; + ExtOpB = Op3; + return true; + } + if ((ConstOp = isConstOrConstSplat(Op2, DemandedElts)) && + ConstOp->isOne()) { + ExtOpA = Op1; + ExtOpB = Op3; + return true; + } + if ((ConstOp = isConstOrConstSplat(Op3, DemandedElts)) && + ConstOp->isOne()) { + ExtOpA = Op1; + ExtOpB = Op2; + return true; + } + return false; + }; + bool IsCeil = + (ExtOpA.getOpcode() == ISD::ADD && + MatchOperands(ExtOpA.getOperand(0), ExtOpA.getOperand(1), ExtOpB)) || + (ExtOpB.getOpcode() == ISD::ADD && + MatchOperands(ExtOpB.getOperand(0), ExtOpB.getOperand(1), ExtOpA)); + + // If the shift is signed (sra): + // - Needs >= 2 sign bit for both operands. + // - Needs >= 2 zero bits. + // If the shift is unsigned (srl): + // - Needs >= 1 zero bit for both operands. + // - Needs 1 demanded bit zero and >= 2 sign bits. + unsigned ShiftOpc = Op.getOpcode(); + bool IsSigned = false; + unsigned KnownBits; + unsigned NumSignedA = DAG.ComputeNumSignBits(ExtOpA, DemandedElts, Depth); + unsigned NumSignedB = DAG.ComputeNumSignBits(ExtOpB, DemandedElts, Depth); + unsigned NumSigned = std::min(NumSignedA, NumSignedB) - 1; + unsigned NumZeroA = + DAG.computeKnownBits(ExtOpA, DemandedElts, Depth).countMinLeadingZeros(); + unsigned NumZeroB = + DAG.computeKnownBits(ExtOpB, DemandedElts, Depth).countMinLeadingZeros(); + unsigned NumZero = std::min(NumZeroA, NumZeroB); + + switch (ShiftOpc) { + default: + llvm_unreachable("Unexpected ShiftOpc in combineShiftToAVG"); + case ISD::SRA: { + if (NumZero >= 2 && NumSigned < NumZero) { + IsSigned = false; + KnownBits = NumZero; + break; + } + if (NumSigned >= 1) { + IsSigned = true; + KnownBits = NumSigned; + break; + } + return SDValue(); + } + case ISD::SRL: { + if (NumZero >= 1 && NumSigned < NumZero) { + IsSigned = false; + KnownBits = NumZero; + break; + } + if (NumSigned >= 1 && DemandedBits.isSignBitClear()) { + IsSigned = true; + KnownBits = NumSigned; + break; + } + return SDValue(); + } + } + + unsigned AVGOpc = IsCeil ? (IsSigned ? ISD::AVGCEILS : ISD::AVGCEILU) + : (IsSigned ? ISD::AVGFLOORS : ISD::AVGFLOORU); + + // Find the smallest power-2 type that is legal for this vector size and + // operation, given the original type size and the number of known sign/zero + // bits. + EVT VT = Op.getValueType(); + unsigned MinWidth = + std::max<unsigned>(VT.getScalarSizeInBits() - KnownBits, 8); + EVT NVT = EVT::getIntegerVT(*DAG.getContext(), PowerOf2Ceil(MinWidth)); + if (VT.isVector()) + NVT = EVT::getVectorVT(*DAG.getContext(), NVT, VT.getVectorElementCount()); + if (!TLI.isOperationLegalOrCustom(AVGOpc, NVT)) + return SDValue(); + + SDLoc DL(Op); + SDValue ResultAVG = + DAG.getNode(AVGOpc, DL, NVT, DAG.getNode(ISD::TRUNCATE, DL, NVT, ExtOpA), + DAG.getNode(ISD::TRUNCATE, DL, NVT, ExtOpB)); + return DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL, VT, + ResultAVG); +} + /// Look at Op. At this point, we know that only the OriginalDemandedBits of the /// result of Op are ever used downstream. If we can use this information to /// simplify Op, create a new simplified DAG node and return true, returning the @@ -989,7 +1119,7 @@ bool TargetLowering::SimplifyDemandedBits( KnownBits SrcKnown; SDValue Src = Op.getOperand(0); unsigned SrcBitWidth = Src.getScalarValueSizeInBits(); - APInt SrcDemandedBits = DemandedBits.zextOrSelf(SrcBitWidth); + APInt SrcDemandedBits = DemandedBits.zext(SrcBitWidth); if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1)) return true; @@ -1105,7 +1235,7 @@ bool TargetLowering::SimplifyDemandedBits( break; uint64_t Idx = Op.getConstantOperandVal(1); unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); - APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); + APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx); if (SimplifyDemandedBits(Src, DemandedBits, DemandedSrcElts, Known, TLO, Depth + 1)) @@ -1409,6 +1539,19 @@ bool TargetLowering::SimplifyDemandedBits( // Only known if known in both the LHS and RHS. Known = KnownBits::commonBits(Known, Known2); break; + case ISD::VSELECT: + if (SimplifyDemandedBits(Op.getOperand(2), DemandedBits, DemandedElts, + Known, TLO, Depth + 1)) + return true; + if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedElts, + Known2, TLO, Depth + 1)) + return true; + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); + assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); + + // Only known if known in both the LHS and RHS. + Known = KnownBits::commonBits(Known, Known2); + break; case ISD::SELECT_CC: if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO, Depth + 1)) @@ -1542,6 +1685,16 @@ bool TargetLowering::SimplifyDemandedBits( // low bits known zero. Known.Zero.setLowBits(ShAmt); + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!InDemandedMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { + SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( + Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1); + if (DemandedOp0) { + SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1); + return TLO.CombineTo(Op, NewOp); + } + } + // Try shrinking the operation as long as the shift amount will still be // in range. if ((ShAmt < DemandedBits.getActiveBits()) && @@ -1567,6 +1720,11 @@ bool TargetLowering::SimplifyDemandedBits( SDValue Op1 = Op.getOperand(1); EVT ShiftVT = Op1.getValueType(); + // Try to match AVG patterns. + if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits, + DemandedElts, Depth + 1)) + return TLO.CombineTo(Op, AVG); + if (const APInt *SA = TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) { unsigned ShAmt = SA->getZExtValue(); @@ -1633,6 +1791,11 @@ bool TargetLowering::SimplifyDemandedBits( if (DemandedBits.isOne()) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1)); + // Try to match AVG patterns. + if (SDValue AVG = combineShiftToAVG(Op, TLO.DAG, *this, DemandedBits, + DemandedElts, Depth + 1)) + return TLO.CombineTo(Op, AVG); + if (const APInt *SA = TLO.DAG.getValidShiftAmountConstant(Op, DemandedElts)) { unsigned ShAmt = SA->getZExtValue(); @@ -1727,6 +1890,22 @@ bool TargetLowering::SimplifyDemandedBits( Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt); Known.One |= Known2.One; Known.Zero |= Known2.Zero; + + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!Demanded0.isAllOnes() || !Demanded1.isAllOnes() || + !DemandedElts.isAllOnes()) { + SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( + Op0, Demanded0, DemandedElts, TLO.DAG, Depth + 1); + SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( + Op1, Demanded1, DemandedElts, TLO.DAG, Depth + 1); + if (DemandedOp0 || DemandedOp1) { + DemandedOp0 = DemandedOp0 ? DemandedOp0 : Op0; + DemandedOp1 = DemandedOp1 ? DemandedOp1 : Op1; + SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedOp0, + DemandedOp1, Op2); + return TLO.CombineTo(Op, NewOp); + } + } } // For pow-2 bitwidths we only demand the bottom modulo amt bits. @@ -1899,7 +2078,8 @@ bool TargetLowering::SimplifyDemandedBits( // bit is demanded. InputDemandedBits.setBit(ExVTBits - 1); - if (SimplifyDemandedBits(Op0, InputDemandedBits, Known, TLO, Depth + 1)) + if (SimplifyDemandedBits(Op0, InputDemandedBits, DemandedElts, Known, TLO, + Depth + 1)) return true; assert(!Known.hasConflict() && "Bits known to be one AND zero?"); @@ -1965,7 +2145,7 @@ bool TargetLowering::SimplifyDemandedBits( } APInt InDemandedBits = DemandedBits.trunc(InBits); - APInt InDemandedElts = DemandedElts.zextOrSelf(InElts); + APInt InDemandedElts = DemandedElts.zext(InElts); if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO, Depth + 1)) return true; @@ -2002,7 +2182,7 @@ bool TargetLowering::SimplifyDemandedBits( } APInt InDemandedBits = DemandedBits.trunc(InBits); - APInt InDemandedElts = DemandedElts.zextOrSelf(InElts); + APInt InDemandedElts = DemandedElts.zext(InElts); // Since some of the sign extended bits are demanded, we know that the sign // bit is demanded. @@ -2046,7 +2226,7 @@ bool TargetLowering::SimplifyDemandedBits( return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src)); APInt InDemandedBits = DemandedBits.trunc(InBits); - APInt InDemandedElts = DemandedElts.zextOrSelf(InElts); + APInt InDemandedElts = DemandedElts.zext(InElts); if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO, Depth + 1)) return true; @@ -2265,9 +2445,27 @@ bool TargetLowering::SimplifyDemandedBits( break; } case ISD::MUL: - // 'Quadratic Reciprocity': mul(x,x) -> 0 if we're only demanding bit[1] - if (DemandedBits == 2 && Op.getOperand(0) == Op.getOperand(1)) - return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT)); + if (DemandedBits.isPowerOf2()) { + // The LSB of X*Y is set only if (X & 1) == 1 and (Y & 1) == 1. + // If we demand exactly one bit N and we have "X * (C' << N)" where C' is + // odd (has LSB set), then the left-shifted low bit of X is the answer. + unsigned CTZ = DemandedBits.countTrailingZeros(); + ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1), DemandedElts); + if (C && C->getAPIntValue().countTrailingZeros() == CTZ) { + EVT ShiftAmtTy = getShiftAmountTy(VT, TLO.DAG.getDataLayout()); + SDValue AmtC = TLO.DAG.getConstant(CTZ, dl, ShiftAmtTy); + SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, Op.getOperand(0), AmtC); + return TLO.CombineTo(Op, Shl); + } + } + // For a squared value "X * X", the bottom 2 bits are 0 and X[0] because: + // X * X is odd iff X is odd. + // 'Quadratic Reciprocity': X * X -> 0 for bit[1] + if (Op.getOperand(0) == Op.getOperand(1) && DemandedBits.ult(4)) { + SDValue One = TLO.DAG.getConstant(1, dl, VT); + SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One); + return TLO.CombineTo(Op, And1); + } LLVM_FALLTHROUGH; case ISD::ADD: case ISD::SUB: { @@ -2330,6 +2528,49 @@ bool TargetLowering::SimplifyDemandedBits( return TLO.CombineTo(Op, NewOp); } + // Match a multiply with a disguised negated-power-of-2 and convert to a + // an equivalent shift-left amount. + // Example: (X * MulC) + Op1 --> Op1 - (X << log2(-MulC)) + auto getShiftLeftAmt = [&HighMask](SDValue Mul) -> unsigned { + if (Mul.getOpcode() != ISD::MUL || !Mul.hasOneUse()) + return 0; + + // Don't touch opaque constants. Also, ignore zero and power-of-2 + // multiplies. Those will get folded later. + ConstantSDNode *MulC = isConstOrConstSplat(Mul.getOperand(1)); + if (MulC && !MulC->isOpaque() && !MulC->isZero() && + !MulC->getAPIntValue().isPowerOf2()) { + APInt UnmaskedC = MulC->getAPIntValue() | HighMask; + if (UnmaskedC.isNegatedPowerOf2()) + return (-UnmaskedC).logBase2(); + } + return 0; + }; + + auto foldMul = [&](ISD::NodeType NT, SDValue X, SDValue Y, unsigned ShlAmt) { + EVT ShiftAmtTy = getShiftAmountTy(VT, TLO.DAG.getDataLayout()); + SDValue ShlAmtC = TLO.DAG.getConstant(ShlAmt, dl, ShiftAmtTy); + SDValue Shl = TLO.DAG.getNode(ISD::SHL, dl, VT, X, ShlAmtC); + SDValue Res = TLO.DAG.getNode(NT, dl, VT, Y, Shl); + return TLO.CombineTo(Op, Res); + }; + + if (isOperationLegalOrCustom(ISD::SHL, VT)) { + if (Op.getOpcode() == ISD::ADD) { + // (X * MulC) + Op1 --> Op1 - (X << log2(-MulC)) + if (unsigned ShAmt = getShiftLeftAmt(Op0)) + return foldMul(ISD::SUB, Op0.getOperand(0), Op1, ShAmt); + // Op0 + (X * MulC) --> Op0 - (X << log2(-MulC)) + if (unsigned ShAmt = getShiftLeftAmt(Op1)) + return foldMul(ISD::SUB, Op1.getOperand(0), Op0, ShAmt); + } + if (Op.getOpcode() == ISD::SUB) { + // Op0 - (X * MulC) --> Op0 + (X << log2(-MulC)) + if (unsigned ShAmt = getShiftLeftAmt(Op1)) + return foldMul(ISD::ADD, Op1.getOperand(0), Op0, ShAmt); + } + } + LLVM_FALLTHROUGH; } default: @@ -2347,7 +2588,8 @@ bool TargetLowering::SimplifyDemandedBits( // If we know the value of all of the demanded bits, return this as a // constant. - if (DemandedBits.isSubsetOf(Known.Zero | Known.One)) { + if (!isTargetCanonicalConstantNode(Op) && + DemandedBits.isSubsetOf(Known.Zero | Known.One)) { // Avoid folding to a constant if any OpaqueConstant is involved. const SDNode *N = Op.getNode(); for (SDNode *Op : @@ -2370,13 +2612,12 @@ bool TargetLowering::SimplifyDemandedBits( bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts, - APInt &KnownUndef, - APInt &KnownZero, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), !DCI.isBeforeLegalizeOps()); + APInt KnownUndef, KnownZero; bool Simplified = SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO); if (Simplified) { @@ -2447,6 +2688,10 @@ bool TargetLowering::SimplifyDemandedVectorElts( KnownUndef = KnownZero = APInt::getZero(NumElts); + const TargetLowering &TLI = TLO.DAG.getTargetLoweringInfo(); + if (!TLI.shouldSimplifyDemandedVectorElts(Op, TLO)) + return false; + // TODO: For now we assume we know nothing about scalable vectors. if (VT.isScalableVector()) return false; @@ -2565,6 +2810,21 @@ bool TargetLowering::SimplifyDemandedVectorElts( if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcDemandedElts, Known, TLO, Depth + 1)) return true; + + // The bitcast has split each wide element into a number of + // narrow subelements. We have just computed the Known bits + // for wide elements. See if element splitting results in + // some subelements being zero. Only for demanded elements! + for (unsigned SubElt = 0; SubElt != Scale; ++SubElt) { + if (!Known.Zero.extractBits(EltSizeInBits, SubElt * EltSizeInBits) + .isAllOnes()) + continue; + for (unsigned SrcElt = 0; SrcElt != NumSrcElts; ++SrcElt) { + unsigned Elt = Scale * SrcElt + SubElt; + if (DemandedElts[Elt]) + KnownZero.setBit(Elt); + } + } } // If the src element is zero/undef then all the output elements will be - @@ -2646,6 +2906,25 @@ bool TargetLowering::SimplifyDemandedVectorElts( KnownUndef.insertBits(SubUndef, i * NumSubElts); KnownZero.insertBits(SubZero, i * NumSubElts); } + + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!DemandedElts.isAllOnes()) { + bool FoundNewSub = false; + SmallVector<SDValue, 2> DemandedSubOps; + for (unsigned i = 0; i != NumSubVecs; ++i) { + SDValue SubOp = Op.getOperand(i); + APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts); + SDValue NewSubOp = SimplifyMultipleUseDemandedVectorElts( + SubOp, SubElts, TLO.DAG, Depth + 1); + DemandedSubOps.push_back(NewSubOp ? NewSubOp : SubOp); + FoundNewSub = NewSubOp ? true : FoundNewSub; + } + if (FoundNewSub) { + SDValue NewOp = + TLO.DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, DemandedSubOps); + return TLO.CombineTo(Op, NewOp); + } + } break; } case ISD::INSERT_SUBVECTOR: { @@ -2699,7 +2978,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( break; uint64_t Idx = Op.getConstantOperandVal(1); unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); - APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx); + APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx); APInt SrcUndef, SrcZero; if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO, @@ -2858,7 +3137,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( APInt SrcUndef, SrcZero; SDValue Src = Op.getOperand(0); unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); - APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts); + APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts); if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO, Depth + 1)) return true; @@ -3618,6 +3897,115 @@ static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT, return SDValue(); } +static SDValue foldSetCCWithRotate(EVT VT, SDValue N0, SDValue N1, + ISD::CondCode Cond, const SDLoc &dl, + SelectionDAG &DAG) { + if (Cond != ISD::SETEQ && Cond != ISD::SETNE) + return SDValue(); + + auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true); + if (!C1 || !(C1->isZero() || C1->isAllOnes())) + return SDValue(); + + auto getRotateSource = [](SDValue X) { + if (X.getOpcode() == ISD::ROTL || X.getOpcode() == ISD::ROTR) + return X.getOperand(0); + return SDValue(); + }; + + // Peek through a rotated value compared against 0 or -1: + // (rot X, Y) == 0/-1 --> X == 0/-1 + // (rot X, Y) != 0/-1 --> X != 0/-1 + if (SDValue R = getRotateSource(N0)) + return DAG.getSetCC(dl, VT, R, N1, Cond); + + // Peek through an 'or' of a rotated value compared against 0: + // or (rot X, Y), Z ==/!= 0 --> (or X, Z) ==/!= 0 + // or Z, (rot X, Y) ==/!= 0 --> (or X, Z) ==/!= 0 + // + // TODO: Add the 'and' with -1 sibling. + // TODO: Recurse through a series of 'or' ops to find the rotate. + EVT OpVT = N0.getValueType(); + if (N0.hasOneUse() && N0.getOpcode() == ISD::OR && C1->isZero()) { + if (SDValue R = getRotateSource(N0.getOperand(0))) { + SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(1)); + return DAG.getSetCC(dl, VT, NewOr, N1, Cond); + } + if (SDValue R = getRotateSource(N0.getOperand(1))) { + SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, R, N0.getOperand(0)); + return DAG.getSetCC(dl, VT, NewOr, N1, Cond); + } + } + + return SDValue(); +} + +static SDValue foldSetCCWithFunnelShift(EVT VT, SDValue N0, SDValue N1, + ISD::CondCode Cond, const SDLoc &dl, + SelectionDAG &DAG) { + // If we are testing for all-bits-clear, we might be able to do that with + // less shifting since bit-order does not matter. + if (Cond != ISD::SETEQ && Cond != ISD::SETNE) + return SDValue(); + + auto *C1 = isConstOrConstSplat(N1, /* AllowUndefs */ true); + if (!C1 || !C1->isZero()) + return SDValue(); + + if (!N0.hasOneUse() || + (N0.getOpcode() != ISD::FSHL && N0.getOpcode() != ISD::FSHR)) + return SDValue(); + + unsigned BitWidth = N0.getScalarValueSizeInBits(); + auto *ShAmtC = isConstOrConstSplat(N0.getOperand(2)); + if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth)) + return SDValue(); + + // Canonicalize fshr as fshl to reduce pattern-matching. + unsigned ShAmt = ShAmtC->getZExtValue(); + if (N0.getOpcode() == ISD::FSHR) + ShAmt = BitWidth - ShAmt; + + // Match an 'or' with a specific operand 'Other' in either commuted variant. + SDValue X, Y; + auto matchOr = [&X, &Y](SDValue Or, SDValue Other) { + if (Or.getOpcode() != ISD::OR || !Or.hasOneUse()) + return false; + if (Or.getOperand(0) == Other) { + X = Or.getOperand(0); + Y = Or.getOperand(1); + return true; + } + if (Or.getOperand(1) == Other) { + X = Or.getOperand(1); + Y = Or.getOperand(0); + return true; + } + return false; + }; + + EVT OpVT = N0.getValueType(); + EVT ShAmtVT = N0.getOperand(2).getValueType(); + SDValue F0 = N0.getOperand(0); + SDValue F1 = N0.getOperand(1); + if (matchOr(F0, F1)) { + // fshl (or X, Y), X, C ==/!= 0 --> or (shl Y, C), X ==/!= 0 + SDValue NewShAmt = DAG.getConstant(ShAmt, dl, ShAmtVT); + SDValue Shift = DAG.getNode(ISD::SHL, dl, OpVT, Y, NewShAmt); + SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X); + return DAG.getSetCC(dl, VT, NewOr, N1, Cond); + } + if (matchOr(F1, F0)) { + // fshl X, (or X, Y), C ==/!= 0 --> or (srl Y, BW-C), X ==/!= 0 + SDValue NewShAmt = DAG.getConstant(BitWidth - ShAmt, dl, ShAmtVT); + SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, Y, NewShAmt); + SDValue NewOr = DAG.getNode(ISD::OR, dl, OpVT, Shift, X); + return DAG.getSetCC(dl, VT, NewOr, N1, Cond); + } + + return SDValue(); +} + /// Try to simplify a setcc built with the specified operands and cc. If it is /// unable to simplify it, return a null SDValue. SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, @@ -3632,13 +4020,17 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl)) return Fold; + bool N0ConstOrSplat = + isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true); + bool N1ConstOrSplat = + isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true); + // Ensure that the constant occurs on the RHS and fold constant comparisons. // TODO: Handle non-splat vector constants. All undef causes trouble. // FIXME: We can't yet fold constant scalable vector splats, so avoid an // infinite loop here when we encounter one. ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond); - if (isConstOrConstSplat(N0) && - (!OpVT.isScalableVector() || !isConstOrConstSplat(N1)) && + if (N0ConstOrSplat && (!OpVT.isScalableVector() || !N1ConstOrSplat) && (DCI.isBeforeLegalizeOps() || isCondCodeLegal(SwappedCC, N0.getSimpleValueType()))) return DAG.getSetCC(dl, VT, N1, N0, SwappedCC); @@ -3647,13 +4039,19 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // -- but in reverse order -- then try to commute the operands of this setcc // to match. A matching pair of setcc (cmp) and sub may be combined into 1 // instruction on some targets. - if (!isConstOrConstSplat(N0) && !isConstOrConstSplat(N1) && + if (!N0ConstOrSplat && !N1ConstOrSplat && (DCI.isBeforeLegalizeOps() || isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) && DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) && !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1})) return DAG.getSetCC(dl, VT, N1, N0, SwappedCC); + if (SDValue V = foldSetCCWithRotate(VT, N0, N1, Cond, dl, DAG)) + return V; + + if (SDValue V = foldSetCCWithFunnelShift(VT, N0, N1, Cond, dl, DAG)) + return V; + if (auto *N1C = isConstOrConstSplat(N1)) { const APInt &C1 = N1C->getAPIntValue(); @@ -4399,37 +4797,30 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) { if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { // Turn (X+C1) == C2 --> X == C2-C1 - if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) { - return DAG.getSetCC(dl, VT, N0.getOperand(0), - DAG.getConstant(RHSC->getAPIntValue()- - LHSR->getAPIntValue(), - dl, N0.getValueType()), Cond); - } - - // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0. - if (N0.getOpcode() == ISD::XOR) - // If we know that all of the inverted bits are zero, don't bother - // performing the inversion. - if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getAPIntValue())) - return - DAG.getSetCC(dl, VT, N0.getOperand(0), - DAG.getConstant(LHSR->getAPIntValue() ^ - RHSC->getAPIntValue(), - dl, N0.getValueType()), - Cond); + if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) + return DAG.getSetCC( + dl, VT, N0.getOperand(0), + DAG.getConstant(RHSC->getAPIntValue() - LHSR->getAPIntValue(), + dl, N0.getValueType()), + Cond); + + // Turn (X^C1) == C2 --> X == C1^C2 + if (N0.getOpcode() == ISD::XOR && N0.getNode()->hasOneUse()) + return DAG.getSetCC( + dl, VT, N0.getOperand(0), + DAG.getConstant(LHSR->getAPIntValue() ^ RHSC->getAPIntValue(), + dl, N0.getValueType()), + Cond); } // Turn (C1-X) == C2 --> X == C1-C2 - if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) { - if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) { - return - DAG.getSetCC(dl, VT, N0.getOperand(1), - DAG.getConstant(SUBC->getAPIntValue() - - RHSC->getAPIntValue(), - dl, N0.getValueType()), - Cond); - } - } + if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) + if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) + return DAG.getSetCC( + dl, VT, N0.getOperand(1), + DAG.getConstant(SUBC->getAPIntValue() - RHSC->getAPIntValue(), + dl, N0.getValueType()), + Cond); // Could RHSC fold directly into a compare? if (RHSC->getValueType(0).getSizeInBits() <= 64) @@ -4582,13 +4973,14 @@ TargetLowering::getConstraintType(StringRef Constraint) const { case 'o': // offsetable case 'V': // not offsetable return C_Memory; + case 'p': // Address. + return C_Address; case 'n': // Simple Integer case 'E': // Floating Point Constant case 'F': // Floating Point Constant return C_Immediate; case 'i': // Simple Integer or Relocatable Constant case 's': // Relocatable Constant - case 'p': // Address. case 'X': // Allow ANY value. case 'I': // Target registers. case 'J': @@ -4826,8 +5218,8 @@ TargetLowering::ParseConstraints(const DataLayout &DL, if (OpInfo.CallOperandVal) { llvm::Type *OpTy = OpInfo.CallOperandVal->getType(); if (OpInfo.isIndirect) { - OpTy = Call.getAttributes().getParamElementType(ArgNo); - assert(OpTy && "Indirect opernad must have elementtype attribute"); + OpTy = Call.getParamElementType(ArgNo); + assert(OpTy && "Indirect operand must have elementtype attribute"); } // Look for vector wrapped in a struct. e.g. { <16 x i8> }. @@ -4962,6 +5354,7 @@ static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) { case TargetLowering::C_RegisterClass: return 2; case TargetLowering::C_Memory: + case TargetLowering::C_Address: return 3; } llvm_unreachable("Invalid constraint type"); @@ -5232,6 +5625,17 @@ SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, return SDValue(); } +SDValue +TargetLowering::BuildSREMPow2(SDNode *N, const APInt &Divisor, + SelectionDAG &DAG, + SmallVectorImpl<SDNode *> &Created) const { + AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (TLI.isIntDivCheap(N->getValueType(0), Attr)) + return SDValue(N, 0); // Lower SREM as SREM + return SDValue(); +} + /// Given an ISD::SDIV node expressing a divide by constant, /// return a DAG expression to select that will generate the same value by /// multiplying by a magic number. @@ -7016,6 +7420,30 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result, return true; } +SDValue +TargetLowering::createSelectForFMINNUM_FMAXNUM(SDNode *Node, + SelectionDAG &DAG) const { + unsigned Opcode = Node->getOpcode(); + assert((Opcode == ISD::FMINNUM || Opcode == ISD::FMAXNUM || + Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) && + "Wrong opcode"); + + if (Node->getFlags().hasNoNaNs()) { + ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT; + SDValue Op1 = Node->getOperand(0); + SDValue Op2 = Node->getOperand(1); + SDValue SelCC = DAG.getSelectCC(SDLoc(Node), Op1, Op2, Op1, Op2, Pred); + // Copy FMF flags, but always set the no-signed-zeros flag + // as this is implied by the FMINNUM/FMAXNUM semantics. + SDNodeFlags Flags = Node->getFlags(); + Flags.setNoSignedZeros(true); + SelCC->setFlags(Flags); + return SelCC; + } + + return SDValue(); +} + SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node, SelectionDAG &DAG) const { SDLoc dl(Node); @@ -7058,29 +7486,234 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node, } } - // If none of the above worked, but there are no NaNs, then expand to - // a compare/select sequence. This is required for correctness since - // InstCombine might have canonicalized a fcmp+select sequence to a - // FMINNUM/FMAXNUM node. If we were to fall through to the default - // expansion to libcall, we might introduce a link-time dependency - // on libm into a file that originally did not have one. - if (Node->getFlags().hasNoNaNs()) { - ISD::CondCode Pred = - Node->getOpcode() == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT; - SDValue Op1 = Node->getOperand(0); - SDValue Op2 = Node->getOperand(1); - SDValue SelCC = DAG.getSelectCC(dl, Op1, Op2, Op1, Op2, Pred); - // Copy FMF flags, but always set the no-signed-zeros flag - // as this is implied by the FMINNUM/FMAXNUM semantics. - SDNodeFlags Flags = Node->getFlags(); - Flags.setNoSignedZeros(true); - SelCC->setFlags(Flags); + if (SDValue SelCC = createSelectForFMINNUM_FMAXNUM(Node, DAG)) return SelCC; - } return SDValue(); } +SDValue TargetLowering::expandIS_FPCLASS(EVT ResultVT, SDValue Op, + unsigned Test, SDNodeFlags Flags, + const SDLoc &DL, + SelectionDAG &DAG) const { + EVT OperandVT = Op.getValueType(); + assert(OperandVT.isFloatingPoint()); + + // Degenerated cases. + if (Test == 0) + return DAG.getBoolConstant(false, DL, ResultVT, OperandVT); + if ((Test & fcAllFlags) == fcAllFlags) + return DAG.getBoolConstant(true, DL, ResultVT, OperandVT); + + // PPC double double is a pair of doubles, of which the higher part determines + // the value class. + if (OperandVT == MVT::ppcf128) { + Op = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::f64, Op, + DAG.getConstant(1, DL, MVT::i32)); + OperandVT = MVT::f64; + } + + // Some checks may be represented as inversion of simpler check, for example + // "inf|normal|subnormal|zero" => !"nan". + bool IsInverted = false; + if (unsigned InvertedCheck = getInvertedFPClassTest(Test)) { + IsInverted = true; + Test = InvertedCheck; + } + + // Floating-point type properties. + EVT ScalarFloatVT = OperandVT.getScalarType(); + const Type *FloatTy = ScalarFloatVT.getTypeForEVT(*DAG.getContext()); + const llvm::fltSemantics &Semantics = FloatTy->getFltSemantics(); + bool IsF80 = (ScalarFloatVT == MVT::f80); + + // Some checks can be implemented using float comparisons, if floating point + // exceptions are ignored. + if (Flags.hasNoFPExcept() && + isOperationLegalOrCustom(ISD::SETCC, OperandVT.getScalarType())) { + if (Test == fcZero) + return DAG.getSetCC(DL, ResultVT, Op, + DAG.getConstantFP(0.0, DL, OperandVT), + IsInverted ? ISD::SETUNE : ISD::SETOEQ); + if (Test == fcNan) + return DAG.getSetCC(DL, ResultVT, Op, Op, + IsInverted ? ISD::SETO : ISD::SETUO); + } + + // In the general case use integer operations. + unsigned BitSize = OperandVT.getScalarSizeInBits(); + EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), BitSize); + if (OperandVT.isVector()) + IntVT = EVT::getVectorVT(*DAG.getContext(), IntVT, + OperandVT.getVectorElementCount()); + SDValue OpAsInt = DAG.getBitcast(IntVT, Op); + + // Various masks. + APInt SignBit = APInt::getSignMask(BitSize); + APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign. + APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit. + const unsigned ExplicitIntBitInF80 = 63; + APInt ExpMask = Inf; + if (IsF80) + ExpMask.clearBit(ExplicitIntBitInF80); + APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf; + APInt QNaNBitMask = + APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1); + APInt InvertionMask = APInt::getAllOnesValue(ResultVT.getScalarSizeInBits()); + + SDValue ValueMaskV = DAG.getConstant(ValueMask, DL, IntVT); + SDValue SignBitV = DAG.getConstant(SignBit, DL, IntVT); + SDValue ExpMaskV = DAG.getConstant(ExpMask, DL, IntVT); + SDValue ZeroV = DAG.getConstant(0, DL, IntVT); + SDValue InfV = DAG.getConstant(Inf, DL, IntVT); + SDValue ResultInvertionMask = DAG.getConstant(InvertionMask, DL, ResultVT); + + SDValue Res; + const auto appendResult = [&](SDValue PartialRes) { + if (PartialRes) { + if (Res) + Res = DAG.getNode(ISD::OR, DL, ResultVT, Res, PartialRes); + else + Res = PartialRes; + } + }; + + SDValue IntBitIsSetV; // Explicit integer bit in f80 mantissa is set. + const auto getIntBitIsSet = [&]() -> SDValue { + if (!IntBitIsSetV) { + APInt IntBitMask(BitSize, 0); + IntBitMask.setBit(ExplicitIntBitInF80); + SDValue IntBitMaskV = DAG.getConstant(IntBitMask, DL, IntVT); + SDValue IntBitV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, IntBitMaskV); + IntBitIsSetV = DAG.getSetCC(DL, ResultVT, IntBitV, ZeroV, ISD::SETNE); + } + return IntBitIsSetV; + }; + + // Split the value into sign bit and absolute value. + SDValue AbsV = DAG.getNode(ISD::AND, DL, IntVT, OpAsInt, ValueMaskV); + SDValue SignV = DAG.getSetCC(DL, ResultVT, OpAsInt, + DAG.getConstant(0.0, DL, IntVT), ISD::SETLT); + + // Tests that involve more than one class should be processed first. + SDValue PartialRes; + + if (IsF80) + ; // Detect finite numbers of f80 by checking individual classes because + // they have different settings of the explicit integer bit. + else if ((Test & fcFinite) == fcFinite) { + // finite(V) ==> abs(V) < exp_mask + PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT); + Test &= ~fcFinite; + } else if ((Test & fcFinite) == fcPosFinite) { + // finite(V) && V > 0 ==> V < exp_mask + PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ExpMaskV, ISD::SETULT); + Test &= ~fcPosFinite; + } else if ((Test & fcFinite) == fcNegFinite) { + // finite(V) && V < 0 ==> abs(V) < exp_mask && signbit == 1 + PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ExpMaskV, ISD::SETLT); + PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV); + Test &= ~fcNegFinite; + } + appendResult(PartialRes); + + // Check for individual classes. + + if (unsigned PartialCheck = Test & fcZero) { + if (PartialCheck == fcPosZero) + PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, ZeroV, ISD::SETEQ); + else if (PartialCheck == fcZero) + PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, ZeroV, ISD::SETEQ); + else // ISD::fcNegZero + PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, SignBitV, ISD::SETEQ); + appendResult(PartialRes); + } + + if (unsigned PartialCheck = Test & fcInf) { + if (PartialCheck == fcPosInf) + PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, InfV, ISD::SETEQ); + else if (PartialCheck == fcInf) + PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETEQ); + else { // ISD::fcNegInf + APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt(); + SDValue NegInfV = DAG.getConstant(NegInf, DL, IntVT); + PartialRes = DAG.getSetCC(DL, ResultVT, OpAsInt, NegInfV, ISD::SETEQ); + } + appendResult(PartialRes); + } + + if (unsigned PartialCheck = Test & fcNan) { + APInt InfWithQnanBit = Inf | QNaNBitMask; + SDValue InfWithQnanBitV = DAG.getConstant(InfWithQnanBit, DL, IntVT); + if (PartialCheck == fcNan) { + // isnan(V) ==> abs(V) > int(inf) + PartialRes = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT); + if (IsF80) { + // Recognize unsupported values as NaNs for compatibility with glibc. + // In them (exp(V)==0) == int_bit. + SDValue ExpBits = DAG.getNode(ISD::AND, DL, IntVT, AbsV, ExpMaskV); + SDValue ExpIsZero = + DAG.getSetCC(DL, ResultVT, ExpBits, ZeroV, ISD::SETEQ); + SDValue IsPseudo = + DAG.getSetCC(DL, ResultVT, getIntBitIsSet(), ExpIsZero, ISD::SETEQ); + PartialRes = DAG.getNode(ISD::OR, DL, ResultVT, PartialRes, IsPseudo); + } + } else if (PartialCheck == fcQNan) { + // isquiet(V) ==> abs(V) >= (unsigned(Inf) | quiet_bit) + PartialRes = + DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETGE); + } else { // ISD::fcSNan + // issignaling(V) ==> abs(V) > unsigned(Inf) && + // abs(V) < (unsigned(Inf) | quiet_bit) + SDValue IsNan = DAG.getSetCC(DL, ResultVT, AbsV, InfV, ISD::SETGT); + SDValue IsNotQnan = + DAG.getSetCC(DL, ResultVT, AbsV, InfWithQnanBitV, ISD::SETLT); + PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, IsNan, IsNotQnan); + } + appendResult(PartialRes); + } + + if (unsigned PartialCheck = Test & fcSubnormal) { + // issubnormal(V) ==> unsigned(abs(V) - 1) < (all mantissa bits set) + // issubnormal(V) && V>0 ==> unsigned(V - 1) < (all mantissa bits set) + SDValue V = (PartialCheck == fcPosSubnormal) ? OpAsInt : AbsV; + SDValue MantissaV = DAG.getConstant(AllOneMantissa, DL, IntVT); + SDValue VMinusOneV = + DAG.getNode(ISD::SUB, DL, IntVT, V, DAG.getConstant(1, DL, IntVT)); + PartialRes = DAG.getSetCC(DL, ResultVT, VMinusOneV, MantissaV, ISD::SETULT); + if (PartialCheck == fcNegSubnormal) + PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV); + appendResult(PartialRes); + } + + if (unsigned PartialCheck = Test & fcNormal) { + // isnormal(V) ==> (0 < exp < max_exp) ==> (unsigned(exp-1) < (max_exp-1)) + APInt ExpLSB = ExpMask & ~(ExpMask.shl(1)); + SDValue ExpLSBV = DAG.getConstant(ExpLSB, DL, IntVT); + SDValue ExpMinus1 = DAG.getNode(ISD::SUB, DL, IntVT, AbsV, ExpLSBV); + APInt ExpLimit = ExpMask - ExpLSB; + SDValue ExpLimitV = DAG.getConstant(ExpLimit, DL, IntVT); + PartialRes = DAG.getSetCC(DL, ResultVT, ExpMinus1, ExpLimitV, ISD::SETULT); + if (PartialCheck == fcNegNormal) + PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, SignV); + else if (PartialCheck == fcPosNormal) { + SDValue PosSignV = + DAG.getNode(ISD::XOR, DL, ResultVT, SignV, ResultInvertionMask); + PartialRes = DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, PosSignV); + } + if (IsF80) + PartialRes = + DAG.getNode(ISD::AND, DL, ResultVT, PartialRes, getIntBitIsSet()); + appendResult(PartialRes); + } + + if (!Res) + return DAG.getConstant(IsInverted, DL, ResultVT); + if (IsInverted) + Res = DAG.getNode(ISD::XOR, DL, ResultVT, Res, ResultInvertionMask); + return Res; +} + // Only expand vector types if we have the appropriate vector bit operations. static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) { assert(VT.isVector() && "Expected vector type"); @@ -7116,8 +7749,6 @@ SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const { DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT); SDValue Mask0F = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT); - SDValue Mask01 = - DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT); // v = v - ((v >> 1) & 0x55555555...) Op = DAG.getNode(ISD::SUB, dl, VT, Op, @@ -7137,13 +7768,28 @@ SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const { DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(4, dl, ShVT))), Mask0F); - // v = (v * 0x01010101...) >> (Len - 8) - if (Len > 8) - Op = - DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::MUL, dl, VT, Op, Mask01), - DAG.getConstant(Len - 8, dl, ShVT)); - return Op; + if (Len <= 8) + return Op; + + // Avoid the multiply if we only have 2 bytes to add. + // TODO: Only doing this for scalars because vectors weren't as obviously + // improved. + if (Len == 16 && !VT.isVector()) { + // v = (v + (v >> 8)) & 0x00FF; + return DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(ISD::ADD, dl, VT, Op, + DAG.getNode(ISD::SRL, dl, VT, Op, + DAG.getConstant(8, dl, ShVT))), + DAG.getConstant(0xFF, dl, VT)); + } + + // v = (v * 0x01010101...) >> (Len - 8) + SDValue Mask01 = + DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT); + return DAG.getNode(ISD::SRL, dl, VT, + DAG.getNode(ISD::MUL, dl, VT, Op, Mask01), + DAG.getConstant(Len - 8, dl, ShVT)); } SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const { @@ -7265,6 +7911,7 @@ SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG, if (!IsNegative && isOperationLegal(ISD::SUB, VT) && isOperationLegal(ISD::UMIN, VT)) { SDValue Zero = DAG.getConstant(0, dl, VT); + Op = DAG.getFreeze(Op); return DAG.getNode(ISD::UMIN, dl, VT, Op, DAG.getNode(ISD::SUB, dl, VT, Zero, Op)); } @@ -7272,6 +7919,7 @@ SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG, // 0 - abs(x) -> smin(x, sub(0,x)) if (IsNegative && isOperationLegal(ISD::SUB, VT) && isOperationLegal(ISD::SMIN, VT)) { + Op = DAG.getFreeze(Op); SDValue Zero = DAG.getConstant(0, dl, VT); return DAG.getNode(ISD::SMIN, dl, VT, Op, DAG.getNode(ISD::SUB, dl, VT, Zero, Op)); @@ -7285,16 +7933,17 @@ SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG, !isOperationLegalOrCustomOrPromote(ISD::XOR, VT))) return SDValue(); + Op = DAG.getFreeze(Op); SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, Op, DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT)); - if (!IsNegative) { - SDValue Add = DAG.getNode(ISD::ADD, dl, VT, Op, Shift); - return DAG.getNode(ISD::XOR, dl, VT, Add, Shift); - } + SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift); + + // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y) + if (!IsNegative) + return DAG.getNode(ISD::SUB, dl, VT, Xor, Shift); // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y)) - SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift); return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor); } @@ -8041,23 +8690,6 @@ SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op, return SDValue(); } -// Convert redundant addressing modes (e.g. scaling is redundant -// when accessing bytes). -ISD::MemIndexType -TargetLowering::getCanonicalIndexType(ISD::MemIndexType IndexType, EVT MemVT, - SDValue Offsets) const { - bool IsScaledIndex = - (IndexType == ISD::SIGNED_SCALED) || (IndexType == ISD::UNSIGNED_SCALED); - bool IsSignedIndex = - (IndexType == ISD::SIGNED_SCALED) || (IndexType == ISD::SIGNED_UNSCALED); - - // Scaling is unimportant for bytes, canonicalize to unscaled. - if (IsScaledIndex && MemVT.getScalarType() == MVT::i8) - return IsSignedIndex ? ISD::SIGNED_UNSCALED : ISD::UNSIGNED_UNSCALED; - - return IndexType; -} - SDValue TargetLowering::expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const { SDValue Op0 = Node->getOperand(0); SDValue Op1 = Node->getOperand(1); @@ -8473,8 +9105,20 @@ void TargetLowering::expandUADDSUBO( EVT ResultType = Node->getValueType(1); EVT SetCCType = getSetCCResultType( DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0)); - ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT; - SDValue SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC); + SDValue SetCC; + if (IsAdd && isOneConstant(RHS)) { + // Special case: uaddo X, 1 overflowed if X+1 is 0. This potential reduces + // the live range of X. We assume comparing with 0 is cheap. + // The general case (X + C) < C is not necessarily beneficial. Although we + // reduce the live range of X, we may introduce the materialization of + // constant C. + SetCC = + DAG.getSetCC(dl, SetCCType, Result, + DAG.getConstant(0, dl, Node->getValueType(0)), ISD::SETEQ); + } else { + ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT; + SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC); + } Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType); } @@ -8773,11 +9417,11 @@ SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node, // floating-point values. APInt MinInt, MaxInt; if (IsSigned) { - MinInt = APInt::getSignedMinValue(SatWidth).sextOrSelf(DstWidth); - MaxInt = APInt::getSignedMaxValue(SatWidth).sextOrSelf(DstWidth); + MinInt = APInt::getSignedMinValue(SatWidth).sext(DstWidth); + MaxInt = APInt::getSignedMaxValue(SatWidth).sext(DstWidth); } else { - MinInt = APInt::getMinValue(SatWidth).zextOrSelf(DstWidth); - MaxInt = APInt::getMaxValue(SatWidth).zextOrSelf(DstWidth); + MinInt = APInt::getMinValue(SatWidth).zext(DstWidth); + MaxInt = APInt::getMaxValue(SatWidth).zext(DstWidth); } // We cannot risk emitting FP_TO_XINT nodes with a source VT of f16, as @@ -8931,13 +9575,16 @@ SDValue TargetLowering::expandVectorSplice(SDNode *Node, bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS, SDValue &RHS, - SDValue &CC, bool &NeedInvert, + SDValue &CC, SDValue Mask, + SDValue EVL, bool &NeedInvert, const SDLoc &dl, SDValue &Chain, bool IsSignaling) const { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); MVT OpVT = LHS.getSimpleValueType(); ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get(); NeedInvert = false; + assert(!EVL == !Mask && "VP Mask and EVL must either both be set or unset"); + bool IsNonVP = !EVL; switch (TLI.getCondCodeAction(CCCode, OpVT)) { default: llvm_unreachable("Unknown condition code action!"); @@ -9044,17 +9691,34 @@ bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, if (CCCode != ISD::SETO && CCCode != ISD::SETUO) { // If we aren't the ordered or unorder operation, // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS). - SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling); - SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling); + if (IsNonVP) { + SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1, Chain, IsSignaling); + SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2, Chain, IsSignaling); + } else { + SetCC1 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC1, Mask, EVL); + SetCC2 = DAG.getSetCCVP(dl, VT, LHS, RHS, CC2, Mask, EVL); + } } else { // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS) - SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling); - SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling); + if (IsNonVP) { + SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1, Chain, IsSignaling); + SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2, Chain, IsSignaling); + } else { + SetCC1 = DAG.getSetCCVP(dl, VT, LHS, LHS, CC1, Mask, EVL); + SetCC2 = DAG.getSetCCVP(dl, VT, RHS, RHS, CC2, Mask, EVL); + } } if (Chain) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, SetCC1.getValue(1), SetCC2.getValue(1)); - LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2); + if (IsNonVP) + LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2); + else { + // Transform the binary opcode to the VP equivalent. + assert((Opc == ISD::OR || Opc == ISD::AND) && "Unexpected opcode"); + Opc = Opc == ISD::OR ? ISD::VP_OR : ISD::VP_AND; + LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2, Mask, EVL); + } RHS = SDValue(); CC = SDValue(); return true; |