diff options
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp')
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 102 |
1 files changed, 69 insertions, 33 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index e4a69adff05b..737695b5eabe 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -645,6 +645,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( if (DemandedBits == 0 || DemandedElts == 0) return DAG.getUNDEF(Op.getValueType()); + bool IsLE = DAG.getDataLayout().isLittleEndian(); unsigned NumElts = DemandedElts.getBitWidth(); unsigned BitWidth = DemandedBits.getBitWidth(); KnownBits LHSKnown, RHSKnown; @@ -663,16 +664,15 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( Src, DemandedBits, DemandedElts, DAG, Depth + 1)) return DAG.getBitcast(DstVT, V); - // TODO - bigendian once we have test coverage. - if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0 && - DAG.getDataLayout().isLittleEndian()) { + if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0) { unsigned Scale = NumDstEltBits / NumSrcEltBits; unsigned NumSrcElts = SrcVT.getVectorNumElements(); APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits); APInt DemandedSrcElts = APInt::getZero(NumSrcElts); for (unsigned i = 0; i != Scale; ++i) { - unsigned Offset = i * NumSrcEltBits; - APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset); + unsigned EltOffset = IsLE ? i : (Scale - 1 - i); + unsigned BitOffset = EltOffset * NumSrcEltBits; + APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset); if (!Sub.isZero()) { DemandedSrcBits |= Sub; for (unsigned j = 0; j != NumElts; ++j) @@ -687,8 +687,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( } // TODO - bigendian once we have test coverage. - if ((NumSrcEltBits % NumDstEltBits) == 0 && - DAG.getDataLayout().isLittleEndian()) { + if (IsLE && (NumSrcEltBits % NumDstEltBits) == 0) { unsigned Scale = NumSrcEltBits / NumDstEltBits; unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits); @@ -802,8 +801,8 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( SDValue Src = Op.getOperand(0); EVT SrcVT = Src.getValueType(); EVT DstVT = Op.getValueType(); - if (DemandedElts == 1 && DstVT.getSizeInBits() == SrcVT.getSizeInBits() && - DAG.getDataLayout().isLittleEndian() && + if (IsLE && DemandedElts == 1 && + DstVT.getSizeInBits() == SrcVT.getSizeInBits() && DemandedBits.getActiveBits() <= SrcVT.getScalarSizeInBits()) { return DAG.getBitcast(DstVT, Src); } @@ -913,6 +912,7 @@ bool TargetLowering::SimplifyDemandedBits( if (Op.getValueType().isScalableVector()) return false; + bool IsLE = TLO.DAG.getDataLayout().isLittleEndian(); unsigned NumElts = OriginalDemandedElts.getBitWidth(); assert((!Op.getValueType().isVector() || NumElts == Op.getValueType().getVectorNumElements()) && @@ -1725,11 +1725,40 @@ bool TargetLowering::SimplifyDemandedBits( case ISD::ROTR: { SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); + bool IsROTL = (Op.getOpcode() == ISD::ROTL); // If we're rotating an 0/-1 value, then it stays an 0/-1 value. if (BitWidth == TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1)) return TLO.CombineTo(Op, Op0); + if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) { + unsigned Amt = SA->getAPIntValue().urem(BitWidth); + unsigned RevAmt = BitWidth - Amt; + + // rotl: (Op0 << Amt) | (Op0 >> (BW - Amt)) + // rotr: (Op0 << (BW - Amt)) | (Op0 >> Amt) + APInt Demanded0 = DemandedBits.rotr(IsROTL ? Amt : RevAmt); + if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO, + Depth + 1)) + return true; + + // rot*(x, 0) --> x + if (Amt == 0) + return TLO.CombineTo(Op, Op0); + + // See if we don't demand either half of the rotated bits. + if ((!TLO.LegalOperations() || isOperationLegal(ISD::SHL, VT)) && + DemandedBits.countTrailingZeros() >= (IsROTL ? Amt : RevAmt)) { + Op1 = TLO.DAG.getConstant(IsROTL ? Amt : RevAmt, dl, Op1.getValueType()); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, Op0, Op1)); + } + if ((!TLO.LegalOperations() || isOperationLegal(ISD::SRL, VT)) && + DemandedBits.countLeadingZeros() >= (IsROTL ? RevAmt : Amt)) { + Op1 = TLO.DAG.getConstant(IsROTL ? RevAmt : Amt, dl, Op1.getValueType()); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1)); + } + } + // For pow-2 bitwidths we only demand the bottom modulo amt bits. if (isPowerOf2_32(BitWidth)) { APInt DemandedAmtBits(Op1.getScalarValueSizeInBits(), BitWidth - 1); @@ -1887,9 +1916,8 @@ bool TargetLowering::SimplifyDemandedBits( if (DemandedBits.getActiveBits() <= InBits) { // If we only need the non-extended bits of the bottom element // then we can just bitcast to the result. - if (IsVecInReg && DemandedElts == 1 && - VT.getSizeInBits() == SrcVT.getSizeInBits() && - TLO.DAG.getDataLayout().isLittleEndian()) + if (IsLE && IsVecInReg && DemandedElts == 1 && + VT.getSizeInBits() == SrcVT.getSizeInBits()) return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src)); unsigned Opc = @@ -1925,9 +1953,8 @@ bool TargetLowering::SimplifyDemandedBits( if (DemandedBits.getActiveBits() <= InBits) { // If we only need the non-extended bits of the bottom element // then we can just bitcast to the result. - if (IsVecInReg && DemandedElts == 1 && - VT.getSizeInBits() == SrcVT.getSizeInBits() && - TLO.DAG.getDataLayout().isLittleEndian()) + if (IsLE && IsVecInReg && DemandedElts == 1 && + VT.getSizeInBits() == SrcVT.getSizeInBits()) return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src)); unsigned Opc = @@ -1976,9 +2003,8 @@ bool TargetLowering::SimplifyDemandedBits( // If we only need the bottom element then we can just bitcast. // TODO: Handle ANY_EXTEND? - if (IsVecInReg && DemandedElts == 1 && - VT.getSizeInBits() == SrcVT.getSizeInBits() && - TLO.DAG.getDataLayout().isLittleEndian()) + if (IsLE && IsVecInReg && DemandedElts == 1 && + VT.getSizeInBits() == SrcVT.getSizeInBits()) return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src)); APInt InDemandedBits = DemandedBits.trunc(InBits); @@ -2140,16 +2166,15 @@ bool TargetLowering::SimplifyDemandedBits( // Bitcast from a vector using SimplifyDemanded Bits/VectorElts. // Demand the elt/bit if any of the original elts/bits are demanded. - // TODO - bigendian once we have test coverage. - if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0 && - TLO.DAG.getDataLayout().isLittleEndian()) { + if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0) { unsigned Scale = BitWidth / NumSrcEltBits; unsigned NumSrcElts = SrcVT.getVectorNumElements(); APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits); APInt DemandedSrcElts = APInt::getZero(NumSrcElts); for (unsigned i = 0; i != Scale; ++i) { - unsigned Offset = i * NumSrcEltBits; - APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset); + unsigned EltOffset = IsLE ? i : (Scale - 1 - i); + unsigned BitOffset = EltOffset * NumSrcEltBits; + APInt Sub = DemandedBits.extractBits(NumSrcEltBits, BitOffset); if (!Sub.isZero()) { DemandedSrcBits |= Sub; for (unsigned j = 0; j != NumElts; ++j) @@ -2167,8 +2192,8 @@ bool TargetLowering::SimplifyDemandedBits( if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts, KnownSrcBits, TLO, Depth + 1)) return true; - } else if ((NumSrcEltBits % BitWidth) == 0 && - TLO.DAG.getDataLayout().isLittleEndian()) { + } else if (IsLE && (NumSrcEltBits % BitWidth) == 0) { + // TODO - bigendian once we have test coverage. unsigned Scale = NumSrcEltBits / BitWidth; unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits); @@ -2409,6 +2434,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( SDLoc DL(Op); unsigned EltSizeInBits = VT.getScalarSizeInBits(); + bool IsLE = TLO.DAG.getDataLayout().isLittleEndian(); // Helper for demanding the specified elements and all the bits of both binary // operands. @@ -2484,7 +2510,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( // Try calling SimplifyDemandedBits, converting demanded elts to the bits // of the large element. // TODO - bigendian once we have test coverage. - if (TLO.DAG.getDataLayout().isLittleEndian()) { + if (IsLE) { unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits(); APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits); for (unsigned i = 0; i != NumElts; ++i) @@ -2797,9 +2823,9 @@ bool TargetLowering::SimplifyDemandedVectorElts( KnownZero = SrcZero.zextOrTrunc(NumElts); KnownUndef = SrcUndef.zextOrTrunc(NumElts); - if (Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG && + if (IsLE && Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG && Op.getValueSizeInBits() == Src.getValueSizeInBits() && - DemandedSrcElts == 1 && TLO.DAG.getDataLayout().isLittleEndian()) { + DemandedSrcElts == 1) { // aext - if we just need the bottom element then we can bitcast. return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src)); } @@ -2812,8 +2838,8 @@ bool TargetLowering::SimplifyDemandedVectorElts( // zext - if we just need the bottom element then we can mask: // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and. - if (DemandedSrcElts == 1 && TLO.DAG.getDataLayout().isLittleEndian() && - Src.getOpcode() == ISD::AND && Op->isOnlyUserOf(Src.getNode()) && + if (IsLE && DemandedSrcElts == 1 && Src.getOpcode() == ISD::AND && + Op->isOnlyUserOf(Src.getNode()) && Op.getValueSizeInBits() == Src.getValueSizeInBits()) { SDLoc DL(Op); EVT SrcVT = Src.getValueType(); @@ -2834,9 +2860,19 @@ bool TargetLowering::SimplifyDemandedVectorElts( // TODO: There are more binop opcodes that could be handled here - MIN, // MAX, saturated math, etc. + case ISD::ADD: { + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + if (Op0 == Op1 && Op->isOnlyUserOf(Op0.getNode())) { + APInt UndefLHS, ZeroLHS; + if (SimplifyDemandedVectorElts(Op0, DemandedElts, UndefLHS, ZeroLHS, TLO, + Depth + 1, /*AssumeSingleUse*/ true)) + return true; + } + LLVM_FALLTHROUGH; + } case ISD::OR: case ISD::XOR: - case ISD::ADD: case ISD::SUB: case ISD::FADD: case ISD::FSUB: @@ -5586,7 +5622,7 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, .multiplicativeInverse(APInt::getSignedMinValue(W + 1)) .trunc(W); assert(!P.isZero() && "No multiplicative inverse!"); // unreachable - assert((D0 * P).isOne() && "Multiplicative inverse sanity check."); + assert((D0 * P).isOne() && "Multiplicative inverse basic check failed."); // Q = floor((2^W - 1) u/ D) // R = ((2^W - 1) u% D) @@ -5832,7 +5868,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, .multiplicativeInverse(APInt::getSignedMinValue(W + 1)) .trunc(W); assert(!P.isZero() && "No multiplicative inverse!"); // unreachable - assert((D0 * P).isOne() && "Multiplicative inverse sanity check."); + assert((D0 * P).isOne() && "Multiplicative inverse basic check failed."); // A = floor((2^(W - 1) - 1) / D0) & -2^K APInt A = APInt::getSignedMaxValue(W).udiv(D0); |