diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 1042 |
1 files changed, 778 insertions, 264 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 8b3e6189a07f..5760132e44a0 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -93,7 +93,7 @@ bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI, SDValue Value = OutVals[I]; if (Value->getOpcode() != ISD::CopyFromReg) return false; - MCRegister ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg(); + Register ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg(); if (MRI.getLiveInPhysReg(ArgReg) != Reg) return false; } @@ -250,7 +250,7 @@ bool TargetLowering::findOptimalMemOpLowering( bool Fast; if (NumMemOps && Op.allowOverlap() && NewVTSize < Size && allowsMisalignedMemoryAccesses( - VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign().value() : 0, + VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign().value() : 1, MachineMemOperand::MONone, &Fast) && Fast) VTSize = Size; @@ -912,8 +912,14 @@ bool TargetLowering::SimplifyDemandedBits( if (Op.getOpcode() == ISD::Constant) { // We know all of the bits for a constant! - Known.One = cast<ConstantSDNode>(Op)->getAPIntValue(); - Known.Zero = ~Known.One; + Known = KnownBits::makeConstant(cast<ConstantSDNode>(Op)->getAPIntValue()); + return false; + } + + if (Op.getOpcode() == ISD::ConstantFP) { + // We know all of the bits for a floating point constant! + Known = KnownBits::makeConstant( + cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt()); return false; } @@ -1009,10 +1015,8 @@ bool TargetLowering::SimplifyDemandedBits( Depth + 1)) return true; - if (!!DemandedVecElts) { - Known.One &= KnownVec.One; - Known.Zero &= KnownVec.Zero; - } + if (!!DemandedVecElts) + Known = KnownBits::commonBits(Known, KnownVec); return false; } @@ -1037,14 +1041,10 @@ bool TargetLowering::SimplifyDemandedBits( Known.Zero.setAllBits(); Known.One.setAllBits(); - if (!!DemandedSubElts) { - Known.One &= KnownSub.One; - Known.Zero &= KnownSub.Zero; - } - if (!!DemandedSrcElts) { - Known.One &= KnownSrc.One; - Known.Zero &= KnownSrc.Zero; - } + if (!!DemandedSubElts) + Known = KnownBits::commonBits(Known, KnownSub); + if (!!DemandedSrcElts) + Known = KnownBits::commonBits(Known, KnownSrc); // Attempt to avoid multi-use src if we don't need anything from it. if (!DemandedBits.isAllOnesValue() || !DemandedSubElts.isAllOnesValue() || @@ -1101,10 +1101,8 @@ bool TargetLowering::SimplifyDemandedBits( Known2, TLO, Depth + 1)) return true; // Known bits are shared by every demanded subvector element. - if (!!DemandedSubElts) { - Known.One &= Known2.One; - Known.Zero &= Known2.Zero; - } + if (!!DemandedSubElts) + Known = KnownBits::commonBits(Known, Known2); } break; } @@ -1142,15 +1140,13 @@ bool TargetLowering::SimplifyDemandedBits( if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO, Depth + 1)) return true; - Known.One &= Known2.One; - Known.Zero &= Known2.Zero; + Known = KnownBits::commonBits(Known, Known2); } if (!!DemandedRHS) { if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO, Depth + 1)) return true; - Known.One &= Known2.One; - Known.Zero &= Known2.Zero; + Known = KnownBits::commonBits(Known, Known2); } // Attempt to avoid multi-use ops if we don't need anything from them. @@ -1325,15 +1321,15 @@ bool TargetLowering::SimplifyDemandedBits( return true; // If all of the unknown bits are known to be zero on one side or the other - // (but not both) turn this into an *inclusive* or. + // turn this into an *inclusive* or. // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 if (DemandedBits.isSubsetOf(Known.Zero | Known2.Zero)) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT, Op0, Op1)); ConstantSDNode* C = isConstOrConstSplat(Op1, DemandedElts); if (C) { - // If one side is a constant, and all of the known set bits on the other - // side are also set in the constant, turn this into an AND, as we know + // If one side is a constant, and all of the set bits in the constant are + // also known set on the other side, turn this into an AND, as we know // the bits will be cleared. // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 // NB: it is okay if more bits are known than are requested @@ -1377,8 +1373,7 @@ bool TargetLowering::SimplifyDemandedBits( return true; // Only known if known in both the LHS and RHS. - Known.One &= Known2.One; - Known.Zero &= Known2.Zero; + Known = KnownBits::commonBits(Known, Known2); break; case ISD::SELECT_CC: if (SimplifyDemandedBits(Op.getOperand(3), DemandedBits, Known, TLO, @@ -1395,8 +1390,7 @@ bool TargetLowering::SimplifyDemandedBits( return true; // Only known if known in both the LHS and RHS. - Known.One &= Known2.One; - Known.Zero &= Known2.Zero; + Known = KnownBits::commonBits(Known, Known2); break; case ISD::SETCC: { SDValue Op0 = Op.getOperand(0); @@ -1728,6 +1722,32 @@ bool TargetLowering::SimplifyDemandedBits( } break; } + case ISD::UMIN: { + // Check if one arg is always less than (or equal) to the other arg. + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1); + KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1); + Known = KnownBits::umin(Known0, Known1); + if (Optional<bool> IsULE = KnownBits::ule(Known0, Known1)) + return TLO.CombineTo(Op, IsULE.getValue() ? Op0 : Op1); + if (Optional<bool> IsULT = KnownBits::ult(Known0, Known1)) + return TLO.CombineTo(Op, IsULT.getValue() ? Op0 : Op1); + break; + } + case ISD::UMAX: { + // Check if one arg is always greater than (or equal) to the other arg. + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1); + KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1); + Known = KnownBits::umax(Known0, Known1); + if (Optional<bool> IsUGE = KnownBits::uge(Known0, Known1)) + return TLO.CombineTo(Op, IsUGE.getValue() ? Op0 : Op1); + if (Optional<bool> IsUGT = KnownBits::ugt(Known0, Known1)) + return TLO.CombineTo(Op, IsUGT.getValue() ? Op0 : Op1); + break; + } case ISD::BITREVERSE: { SDValue Src = Op.getOperand(0); APInt DemandedSrcBits = DemandedBits.reverseBits(); @@ -1748,6 +1768,17 @@ bool TargetLowering::SimplifyDemandedBits( Known.Zero = Known2.Zero.byteSwap(); break; } + case ISD::CTPOP: { + // If only 1 bit is demanded, replace with PARITY as long as we're before + // op legalization. + // FIXME: Limit to scalars for now. + if (DemandedBits.isOneValue() && !TLO.LegalOps && !VT.isVector()) + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT, + Op.getOperand(0))); + + Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth); + break; + } case ISD::SIGN_EXTEND_INREG: { SDValue Op0 = Op.getOperand(0); EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); @@ -1858,6 +1889,11 @@ bool TargetLowering::SimplifyDemandedBits( assert(!Known.hasConflict() && "Bits known to be one AND zero?"); assert(Known.getBitWidth() == InBits && "Src width has changed?"); Known = Known.zext(BitWidth); + + // Attempt to avoid multi-use ops if we don't need anything from them. + if (SDValue NewSrc = SimplifyMultipleUseDemandedBits( + Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1)) + return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc)); break; } case ISD::SIGN_EXTEND: @@ -1906,6 +1942,11 @@ bool TargetLowering::SimplifyDemandedBits( if (!TLO.LegalOperations() || isOperationLegal(Opc, VT)) return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src)); } + + // Attempt to avoid multi-use ops if we don't need anything from them. + if (SDValue NewSrc = SimplifyMultipleUseDemandedBits( + Src, InDemandedBits, InDemandedElts, TLO.DAG, Depth + 1)) + return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc)); break; } case ISD::ANY_EXTEND: @@ -1945,7 +1986,8 @@ bool TargetLowering::SimplifyDemandedBits( // zero/one bits live out. unsigned OperandBitWidth = Src.getScalarValueSizeInBits(); APInt TruncMask = DemandedBits.zext(OperandBitWidth); - if (SimplifyDemandedBits(Src, TruncMask, Known, TLO, Depth + 1)) + if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, Known, TLO, + Depth + 1)) return true; Known = Known.trunc(BitWidth); @@ -1968,9 +2010,9 @@ bool TargetLowering::SimplifyDemandedBits( // undesirable. break; - SDValue ShAmt = Src.getOperand(1); - auto *ShAmtC = dyn_cast<ConstantSDNode>(ShAmt); - if (!ShAmtC || ShAmtC->getAPIntValue().uge(BitWidth)) + const APInt *ShAmtC = + TLO.DAG.getValidShiftAmountConstant(Src, DemandedElts); + if (!ShAmtC) break; uint64_t ShVal = ShAmtC->getZExtValue(); @@ -1982,12 +2024,12 @@ bool TargetLowering::SimplifyDemandedBits( if (!(HighBits & DemandedBits)) { // None of the shifted in bits are needed. Add a truncate of the // shift input, then shift it. - if (TLO.LegalTypes()) - ShAmt = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL)); + SDValue NewShAmt = TLO.DAG.getConstant( + ShVal, dl, getShiftAmountTy(VT, DL, TLO.LegalTypes())); SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0)); return TLO.CombineTo( - Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, ShAmt)); + Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt)); } break; } @@ -2012,10 +2054,14 @@ bool TargetLowering::SimplifyDemandedBits( case ISD::EXTRACT_VECTOR_ELT: { SDValue Src = Op.getOperand(0); SDValue Idx = Op.getOperand(1); - unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); + ElementCount SrcEltCnt = Src.getValueType().getVectorElementCount(); unsigned EltBitWidth = Src.getScalarValueSizeInBits(); + if (SrcEltCnt.isScalable()) + return false; + // Demand the bits from every vector element without a constant index. + unsigned NumSrcElts = SrcEltCnt.getFixedValue(); APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts); if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx)) if (CIdx->getAPIntValue().ult(NumSrcElts)) @@ -2229,9 +2275,13 @@ bool TargetLowering::SimplifyDemandedBits( if (C->isOpaque()) return false; } - // TODO: Handle float bits as well. if (VT.isInteger()) return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT)); + if (VT.isFloatingPoint()) + return TLO.CombineTo( + Op, + TLO.DAG.getConstantFP( + APFloat(TLO.DAG.EVTToAPFloatSemantics(VT), Known.One), dl, VT)); } return false; @@ -2593,13 +2643,9 @@ bool TargetLowering::SimplifyDemandedVectorElts( KnownZero, TLO, Depth + 1)) return true; - KnownUndef.clearBit(Idx); - if (Scl.isUndef()) - KnownUndef.setBit(Idx); + KnownUndef.setBitVal(Idx, Scl.isUndef()); - KnownZero.clearBit(Idx); - if (isNullConstant(Scl) || isNullFPConstant(Scl)) - KnownZero.setBit(Idx); + KnownZero.setBitVal(Idx, isNullConstant(Scl) || isNullFPConstant(Scl)); break; } @@ -3347,6 +3393,74 @@ SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1, return DAG.getSetCC(DL, VT, X, YShl1, Cond); } +static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT, + SDValue N0, const APInt &C1, + ISD::CondCode Cond, const SDLoc &dl, + SelectionDAG &DAG) { + // Look through truncs that don't change the value of a ctpop. + // FIXME: Add vector support? Need to be careful with setcc result type below. + SDValue CTPOP = N0; + if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() && !VT.isVector() && + N0.getScalarValueSizeInBits() > Log2_32(N0.getOperand(0).getScalarValueSizeInBits())) + CTPOP = N0.getOperand(0); + + if (CTPOP.getOpcode() != ISD::CTPOP || !CTPOP.hasOneUse()) + return SDValue(); + + EVT CTVT = CTPOP.getValueType(); + SDValue CTOp = CTPOP.getOperand(0); + + // If this is a vector CTPOP, keep the CTPOP if it is legal. + // TODO: Should we check if CTPOP is legal(or custom) for scalars? + if (VT.isVector() && TLI.isOperationLegal(ISD::CTPOP, CTVT)) + return SDValue(); + + // (ctpop x) u< 2 -> (x & x-1) == 0 + // (ctpop x) u> 1 -> (x & x-1) != 0 + if (Cond == ISD::SETULT || Cond == ISD::SETUGT) { + unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond); + if (C1.ugt(CostLimit + (Cond == ISD::SETULT))) + return SDValue(); + if (C1 == 0 && (Cond == ISD::SETULT)) + return SDValue(); // This is handled elsewhere. + + unsigned Passes = C1.getLimitedValue() - (Cond == ISD::SETULT); + + SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT); + SDValue Result = CTOp; + for (unsigned i = 0; i < Passes; i++) { + SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, Result, NegOne); + Result = DAG.getNode(ISD::AND, dl, CTVT, Result, Add); + } + ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE; + return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC); + } + + // If ctpop is not supported, expand a power-of-2 comparison based on it. + if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) { + // For scalars, keep CTPOP if it is legal or custom. + if (!VT.isVector() && TLI.isOperationLegalOrCustom(ISD::CTPOP, CTVT)) + return SDValue(); + // This is based on X86's custom lowering for CTPOP which produces more + // instructions than the expansion here. + + // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0) + // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0) + SDValue Zero = DAG.getConstant(0, dl, CTVT); + SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT); + assert(CTVT.isInteger()); + ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, CTVT); + SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne); + SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add); + SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond); + SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond); + unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR; + return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS); + } + + return SDValue(); +} + /// Try to simplify a setcc built with the specified operands and cc. If it is /// unable to simplify it, return a null SDValue. SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, @@ -3363,8 +3477,11 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // Ensure that the constant occurs on the RHS and fold constant comparisons. // TODO: Handle non-splat vector constants. All undef causes trouble. + // FIXME: We can't yet fold constant scalable vector splats, so avoid an + // infinite loop here when we encounter one. ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond); if (isConstOrConstSplat(N0) && + (!OpVT.isScalableVector() || !isConstOrConstSplat(N1)) && (DCI.isBeforeLegalizeOps() || isCondCodeLegal(SwappedCC, N0.getSimpleValueType()))) return DAG.getSetCC(dl, VT, N1, N0, SwappedCC); @@ -3376,75 +3493,46 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (!isConstOrConstSplat(N0) && !isConstOrConstSplat(N1) && (DCI.isBeforeLegalizeOps() || isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) && - DAG.getNodeIfExists(ISD::SUB, DAG.getVTList(OpVT), { N1, N0 } ) && - !DAG.getNodeIfExists(ISD::SUB, DAG.getVTList(OpVT), { N0, N1 } )) + DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) && + !DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N0, N1})) return DAG.getSetCC(dl, VT, N1, N0, SwappedCC); - if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) { + if (auto *N1C = isConstOrConstSplat(N1)) { const APInt &C1 = N1C->getAPIntValue(); + // Optimize some CTPOP cases. + if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG)) + return V; + // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an // equality comparison, then we're just comparing whether X itself is // zero. if (N0.getOpcode() == ISD::SRL && (C1.isNullValue() || C1.isOneValue()) && N0.getOperand(0).getOpcode() == ISD::CTLZ && - N0.getOperand(1).getOpcode() == ISD::Constant) { - const APInt &ShAmt = N0.getConstantOperandAPInt(1); - if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && - ShAmt == Log2_32(N0.getValueSizeInBits())) { - if ((C1 == 0) == (Cond == ISD::SETEQ)) { - // (srl (ctlz x), 5) == 0 -> X != 0 - // (srl (ctlz x), 5) != 1 -> X != 0 - Cond = ISD::SETNE; - } else { - // (srl (ctlz x), 5) != 0 -> X == 0 - // (srl (ctlz x), 5) == 1 -> X == 0 - Cond = ISD::SETEQ; + isPowerOf2_32(N0.getScalarValueSizeInBits())) { + if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) { + if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + ShAmt->getAPIntValue() == Log2_32(N0.getScalarValueSizeInBits())) { + if ((C1 == 0) == (Cond == ISD::SETEQ)) { + // (srl (ctlz x), 5) == 0 -> X != 0 + // (srl (ctlz x), 5) != 1 -> X != 0 + Cond = ISD::SETNE; + } else { + // (srl (ctlz x), 5) != 0 -> X == 0 + // (srl (ctlz x), 5) == 1 -> X == 0 + Cond = ISD::SETEQ; + } + SDValue Zero = DAG.getConstant(0, dl, N0.getValueType()); + return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), Zero, + Cond); } - SDValue Zero = DAG.getConstant(0, dl, N0.getValueType()); - return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), - Zero, Cond); } } + } - SDValue CTPOP = N0; - // Look through truncs that don't change the value of a ctpop. - if (N0.hasOneUse() && N0.getOpcode() == ISD::TRUNCATE) - CTPOP = N0.getOperand(0); - - if (CTPOP.hasOneUse() && CTPOP.getOpcode() == ISD::CTPOP && - (N0 == CTPOP || - N0.getValueSizeInBits() > Log2_32_Ceil(CTPOP.getValueSizeInBits()))) { - EVT CTVT = CTPOP.getValueType(); - SDValue CTOp = CTPOP.getOperand(0); - - // (ctpop x) u< 2 -> (x & x-1) == 0 - // (ctpop x) u> 1 -> (x & x-1) != 0 - if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){ - SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT); - SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne); - SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add); - ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE; - return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, dl, CTVT), CC); - } - - // If ctpop is not supported, expand a power-of-2 comparison based on it. - if (C1 == 1 && !isOperationLegalOrCustom(ISD::CTPOP, CTVT) && - (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { - // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0) - // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0) - SDValue Zero = DAG.getConstant(0, dl, CTVT); - SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT); - assert(CTVT.isInteger()); - ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, CTVT); - SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne); - SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add); - SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond); - SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond); - unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR; - return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS); - } - } + // FIXME: Support vectors. + if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) { + const APInt &C1 = N1C->getAPIntValue(); // (zext x) == C --> x == (trunc C) // (sext x) == C --> x == (trunc C) @@ -3578,11 +3666,12 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, shouldReduceLoadWidth(Lod, ISD::NON_EXTLOAD, newVT)) { SDValue Ptr = Lod->getBasePtr(); if (bestOffset != 0) - Ptr = DAG.getMemBasePlusOffset(Ptr, bestOffset, dl); - unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset); - SDValue NewLoad = DAG.getLoad( - newVT, dl, Lod->getChain(), Ptr, - Lod->getPointerInfo().getWithOffset(bestOffset), NewAlign); + Ptr = + DAG.getMemBasePlusOffset(Ptr, TypeSize::Fixed(bestOffset), dl); + SDValue NewLoad = + DAG.getLoad(newVT, dl, Lod->getChain(), Ptr, + Lod->getPointerInfo().getWithOffset(bestOffset), + Lod->getOriginalAlign()); return DAG.getSetCC(dl, VT, DAG.getNode(ISD::AND, dl, newVT, NewLoad, DAG.getConstant(bestMask.trunc(bestWidth), @@ -3647,7 +3736,9 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, break; // todo, be more careful with signed comparisons } } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && - (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { + (Cond == ISD::SETEQ || Cond == ISD::SETNE) && + !isSExtCheaperThanZExt(cast<VTSDNode>(N0.getOperand(1))->getVT(), + OpVT)) { EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT(); unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits(); EVT ExtDstTy = N0.getValueType(); @@ -3656,26 +3747,18 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // If the constant doesn't fit into the number of bits for the source of // the sign extension, it is impossible for both sides to be equal. if (C1.getMinSignedBits() > ExtSrcTyBits) - return DAG.getConstant(Cond == ISD::SETNE, dl, VT); + return DAG.getBoolConstant(Cond == ISD::SETNE, dl, VT, OpVT); - SDValue ZextOp; - EVT Op0Ty = N0.getOperand(0).getValueType(); - if (Op0Ty == ExtSrcTy) { - ZextOp = N0.getOperand(0); - } else { - APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits); - ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0), - DAG.getConstant(Imm, dl, Op0Ty)); - } + assert(ExtDstTy == N0.getOperand(0).getValueType() && + ExtDstTy != ExtSrcTy && "Unexpected types!"); + APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits); + SDValue ZextOp = DAG.getNode(ISD::AND, dl, ExtDstTy, N0.getOperand(0), + DAG.getConstant(Imm, dl, ExtDstTy)); if (!DCI.isCalledByLegalizer()) DCI.AddToWorklist(ZextOp.getNode()); // Otherwise, make this a use of a zext. return DAG.getSetCC(dl, VT, ZextOp, - DAG.getConstant(C1 & APInt::getLowBitsSet( - ExtDstTyBits, - ExtSrcTyBits), - dl, ExtDstTy), - Cond); + DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond); } else if ((N1C->isNullValue() || N1C->isOne()) && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC @@ -3699,8 +3782,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::XOR && N0.getOperand(1) == N0.getOperand(0).getOperand(1))) && - isa<ConstantSDNode>(N0.getOperand(1)) && - cast<ConstantSDNode>(N0.getOperand(1))->isOne()) { + isOneConstant(N0.getOperand(1))) { // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We // can only do this if the top bits are known zero. unsigned BitWidth = N0.getValueSizeInBits(); @@ -3744,9 +3826,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, return DAG.getSetCC(dl, VT, XorLHS, XorRHS, Cond); } } - if (Op0.getOpcode() == ISD::AND && - isa<ConstantSDNode>(Op0.getOperand(1)) && - cast<ConstantSDNode>(Op0.getOperand(1))->isOne()) { + if (Op0.getOpcode() == ISD::AND && isOneConstant(Op0.getOperand(1))) { // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0. if (Op0.getValueType().bitsGT(VT)) Op0 = DAG.getNode(ISD::AND, dl, VT, @@ -3884,6 +3964,67 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift( VT, N0, N1, Cond, DCI, dl)) return CC; + + // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y). + // For example, when high 32-bits of i64 X are known clear: + // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0 + // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1 + bool CmpZero = N1C->getAPIntValue().isNullValue(); + bool CmpNegOne = N1C->getAPIntValue().isAllOnesValue(); + if ((CmpZero || CmpNegOne) && N0.hasOneUse()) { + // Match or(lo,shl(hi,bw/2)) pattern. + auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) { + unsigned EltBits = V.getScalarValueSizeInBits(); + if (V.getOpcode() != ISD::OR || (EltBits % 2) != 0) + return false; + SDValue LHS = V.getOperand(0); + SDValue RHS = V.getOperand(1); + APInt HiBits = APInt::getHighBitsSet(EltBits, EltBits / 2); + // Unshifted element must have zero upperbits. + if (RHS.getOpcode() == ISD::SHL && + isa<ConstantSDNode>(RHS.getOperand(1)) && + RHS.getConstantOperandAPInt(1) == (EltBits / 2) && + DAG.MaskedValueIsZero(LHS, HiBits)) { + Lo = LHS; + Hi = RHS.getOperand(0); + return true; + } + if (LHS.getOpcode() == ISD::SHL && + isa<ConstantSDNode>(LHS.getOperand(1)) && + LHS.getConstantOperandAPInt(1) == (EltBits / 2) && + DAG.MaskedValueIsZero(RHS, HiBits)) { + Lo = RHS; + Hi = LHS.getOperand(0); + return true; + } + return false; + }; + + auto MergeConcat = [&](SDValue Lo, SDValue Hi) { + unsigned EltBits = N0.getScalarValueSizeInBits(); + unsigned HalfBits = EltBits / 2; + APInt HiBits = APInt::getHighBitsSet(EltBits, HalfBits); + SDValue LoBits = DAG.getConstant(~HiBits, dl, OpVT); + SDValue HiMask = DAG.getNode(ISD::AND, dl, OpVT, Hi, LoBits); + SDValue NewN0 = + DAG.getNode(CmpZero ? ISD::OR : ISD::AND, dl, OpVT, Lo, HiMask); + SDValue NewN1 = CmpZero ? DAG.getConstant(0, dl, OpVT) : LoBits; + return DAG.getSetCC(dl, VT, NewN0, NewN1, Cond); + }; + + SDValue Lo, Hi; + if (IsConcat(N0, Lo, Hi)) + return MergeConcat(Lo, Hi); + + if (N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR) { + SDValue Lo0, Lo1, Hi0, Hi1; + if (IsConcat(N0.getOperand(0), Lo0, Hi0) && + IsConcat(N0.getOperand(1), Lo1, Hi1)) { + return MergeConcat(DAG.getNode(N0.getOpcode(), dl, OpVT, Lo0, Lo1), + DAG.getNode(N0.getOpcode(), dl, OpVT, Hi0, Hi1)); + } + } + } } // If we have "setcc X, C0", check to see if we can shrink the immediate @@ -3891,20 +4032,20 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // TODO: Support this for vectors after legalize ops. if (!VT.isVector() || DCI.isBeforeLegalizeOps()) { // SETUGT X, SINTMAX -> SETLT X, 0 - if (Cond == ISD::SETUGT && - C1 == APInt::getSignedMaxValue(OperandBitSize)) + // SETUGE X, SINTMIN -> SETLT X, 0 + if ((Cond == ISD::SETUGT && C1.isMaxSignedValue()) || + (Cond == ISD::SETUGE && C1.isMinSignedValue())) return DAG.getSetCC(dl, VT, N0, DAG.getConstant(0, dl, N1.getValueType()), ISD::SETLT); // SETULT X, SINTMIN -> SETGT X, -1 - if (Cond == ISD::SETULT && - C1 == APInt::getSignedMinValue(OperandBitSize)) { - SDValue ConstMinusOne = - DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), dl, - N1.getValueType()); - return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT); - } + // SETULE X, SINTMAX -> SETGT X, -1 + if ((Cond == ISD::SETULT && C1.isMinSignedValue()) || + (Cond == ISD::SETULE && C1.isMaxSignedValue())) + return DAG.getSetCC(dl, VT, N0, + DAG.getAllOnesConstant(dl, N1.getValueType()), + ISD::SETGT); } } @@ -3915,8 +4056,13 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, const APInt &C1 = N1C->getAPIntValue(); EVT ShValTy = N0.getValueType(); - // Fold bit comparisons when we can. - if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + // Fold bit comparisons when we can. This will result in an + // incorrect value when boolean false is negative one, unless + // the bitsize is 1 in which case the false value is the same + // in practice regardless of the representation. + if ((VT.getSizeInBits() == 1 || + getBooleanContents(N0.getValueType()) == ZeroOrOneBooleanContent) && + (Cond == ISD::SETEQ || Cond == ISD::SETNE) && (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) && N0.getOpcode() == ISD::AND) { if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { @@ -4312,8 +4458,8 @@ const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const { } SDValue TargetLowering::LowerAsmOutputForConstraint( - SDValue &Chain, SDValue &Flag, SDLoc DL, const AsmOperandInfo &OpInfo, - SelectionDAG &DAG) const { + SDValue &Chain, SDValue &Flag, const SDLoc &DL, + const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const { return SDValue(); } @@ -4887,9 +5033,15 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, return SDValue(); SDValue Shift, Factor; - if (VT.isVector()) { + if (VT.isFixedLengthVector()) { Shift = DAG.getBuildVector(ShVT, dl, Shifts); Factor = DAG.getBuildVector(VT, dl, Factors); + } else if (VT.isScalableVector()) { + assert(Shifts.size() == 1 && Factors.size() == 1 && + "Expected matchUnaryPredicate to return one element for scalable " + "vectors"); + Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]); + Factor = DAG.getSplatVector(VT, dl, Factors[0]); } else { Shift = Shifts[0]; Factor = Factors[0]; @@ -4982,11 +5134,20 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, return SDValue(); SDValue MagicFactor, Factor, Shift, ShiftMask; - if (VT.isVector()) { + if (VT.isFixedLengthVector()) { MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors); Factor = DAG.getBuildVector(VT, dl, Factors); Shift = DAG.getBuildVector(ShVT, dl, Shifts); ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks); + } else if (VT.isScalableVector()) { + assert(MagicFactors.size() == 1 && Factors.size() == 1 && + Shifts.size() == 1 && ShiftMasks.size() == 1 && + "Expected matchUnaryPredicate to return one element for scalable " + "vectors"); + MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]); + Factor = DAG.getSplatVector(VT, dl, Factors[0]); + Shift = DAG.getSplatVector(ShVT, dl, Shifts[0]); + ShiftMask = DAG.getSplatVector(VT, dl, ShiftMasks[0]); } else { MagicFactor = MagicFactors[0]; Factor = Factors[0]; @@ -5100,11 +5261,19 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, return SDValue(); SDValue PreShift, PostShift, MagicFactor, NPQFactor; - if (VT.isVector()) { + if (VT.isFixedLengthVector()) { PreShift = DAG.getBuildVector(ShVT, dl, PreShifts); MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors); NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors); PostShift = DAG.getBuildVector(ShVT, dl, PostShifts); + } else if (VT.isScalableVector()) { + assert(PreShifts.size() == 1 && MagicFactors.size() == 1 && + NPQFactors.size() == 1 && PostShifts.size() == 1 && + "Expected matchUnaryPredicate to return one for scalable vectors"); + PreShift = DAG.getSplatVector(ShVT, dl, PreShifts[0]); + MagicFactor = DAG.getSplatVector(VT, dl, MagicFactors[0]); + NPQFactor = DAG.getSplatVector(VT, dl, NPQFactors[0]); + PostShift = DAG.getSplatVector(ShVT, dl, PostShifts[0]); } else { PreShift = PreShifts[0]; MagicFactor = MagicFactors[0]; @@ -5156,8 +5325,10 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift); Created.push_back(Q.getNode()); + EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + SDValue One = DAG.getConstant(1, dl, VT); - SDValue IsOne = DAG.getSetCC(dl, VT, N1, One, ISD::SETEQ); + SDValue IsOne = DAG.getSetCC(dl, SetCCVT, N1, One, ISD::SETEQ); return DAG.getSelect(dl, VT, IsOne, N0, Q); } @@ -5584,7 +5755,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, return SDValue(); SDValue PVal, AVal, KVal, QVal; - if (VT.isVector()) { + if (VT.isFixedLengthVector()) { if (HadOneDivisor) { // Try to turn PAmts into a splat, since we don't care about the values // that are currently '0'. If we can't, just keep '0'`s. @@ -5603,6 +5774,15 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, AVal = DAG.getBuildVector(VT, DL, AAmts); KVal = DAG.getBuildVector(ShVT, DL, KAmts); QVal = DAG.getBuildVector(VT, DL, QAmts); + } else if (VT.isScalableVector()) { + assert(PAmts.size() == 1 && AAmts.size() == 1 && KAmts.size() == 1 && + QAmts.size() == 1 && + "Expected matchUnaryPredicate to return one element for scalable " + "vectors"); + PVal = DAG.getSplatVector(VT, DL, PAmts[0]); + AVal = DAG.getSplatVector(VT, DL, AAmts[0]); + KVal = DAG.getSplatVector(ShVT, DL, KAmts[0]); + QVal = DAG.getSplatVector(VT, DL, QAmts[0]); } else { PVal = PAmts[0]; AVal = AAmts[0]; @@ -5697,6 +5877,28 @@ verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const { return false; } +SDValue TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG, + const DenormalMode &Mode) const { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + SDValue FPZero = DAG.getConstantFP(0.0, DL, VT); + // Testing it with denormal inputs to avoid wrong estimate. + if (Mode.Input == DenormalMode::IEEE) { + // This is specifically a check for the handling of denormal inputs, + // not the result. + + // Test = fabs(X) < SmallestNormal + const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT); + APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem); + SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT); + SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op); + return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT); + } + // Test = X == 0.0 + return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ); +} + SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, @@ -5941,7 +6143,7 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, // Legalization Utilities //===----------------------------------------------------------------------===// -bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl, +bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, SDValue LHS, SDValue RHS, SmallVectorImpl<SDValue> &Result, EVT HiLoVT, SelectionDAG &DAG, @@ -5964,8 +6166,6 @@ bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl, unsigned OuterBitSize = VT.getScalarSizeInBits(); unsigned InnerBitSize = HiLoVT.getScalarSizeInBits(); - unsigned LHSSB = DAG.ComputeNumSignBits(LHS); - unsigned RHSSB = DAG.ComputeNumSignBits(RHS); // LL, LH, RL, and RH must be either all NULL or all set to a value. assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) || @@ -6014,8 +6214,9 @@ bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl, } } - if (!VT.isVector() && Opcode == ISD::MUL && LHSSB > InnerBitSize && - RHSSB > InnerBitSize) { + if (!VT.isVector() && Opcode == ISD::MUL && + DAG.ComputeNumSignBits(LHS) > InnerBitSize && + DAG.ComputeNumSignBits(RHS) > InnerBitSize) { // The input values are both sign-extended. // TODO non-MUL case? if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) { @@ -6129,7 +6330,7 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, SDValue LL, SDValue LH, SDValue RL, SDValue RH) const { SmallVector<SDValue, 2> Result; - bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), N, + bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), SDLoc(N), N->getOperand(0), N->getOperand(1), Result, HiLoVT, DAG, Kind, LL, LH, RL, RH); if (Ok) { @@ -6141,7 +6342,7 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, } // Check that (every element of) Z is undef or not an exact multiple of BW. -static bool isNonZeroModBitWidth(SDValue Z, unsigned BW) { +static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) { return ISD::matchUnaryPredicate( Z, [=](ConstantSDNode *C) { return !C || C->getAPIntValue().urem(BW) != 0; }, @@ -6168,9 +6369,35 @@ bool TargetLowering::expandFunnelShift(SDNode *Node, SDValue &Result, EVT ShVT = Z.getValueType(); + // If a funnel shift in the other direction is more supported, use it. + unsigned RevOpcode = IsFSHL ? ISD::FSHR : ISD::FSHL; + if (!isOperationLegalOrCustom(Node->getOpcode(), VT) && + isOperationLegalOrCustom(RevOpcode, VT) && isPowerOf2_32(BW)) { + if (isNonZeroModBitWidthOrUndef(Z, BW)) { + // fshl X, Y, Z -> fshr X, Y, -Z + // fshr X, Y, Z -> fshl X, Y, -Z + SDValue Zero = DAG.getConstant(0, DL, ShVT); + Z = DAG.getNode(ISD::SUB, DL, VT, Zero, Z); + } else { + // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z + // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z + SDValue One = DAG.getConstant(1, DL, ShVT); + if (IsFSHL) { + Y = DAG.getNode(RevOpcode, DL, VT, X, Y, One); + X = DAG.getNode(ISD::SRL, DL, VT, X, One); + } else { + X = DAG.getNode(RevOpcode, DL, VT, X, Y, One); + Y = DAG.getNode(ISD::SHL, DL, VT, Y, One); + } + Z = DAG.getNOT(DL, Z, ShVT); + } + Result = DAG.getNode(RevOpcode, DL, VT, X, Y, Z); + return true; + } + SDValue ShX, ShY; SDValue ShAmt, InvShAmt; - if (isNonZeroModBitWidth(Z, BW)) { + if (isNonZeroModBitWidthOrUndef(Z, BW)) { // fshl: X << C | Y >> (BW - C) // fshr: X << (BW - C) | Y >> C // where C = Z % BW is not zero @@ -6210,8 +6437,8 @@ bool TargetLowering::expandFunnelShift(SDNode *Node, SDValue &Result, } // TODO: Merge with expandFunnelShift. -bool TargetLowering::expandROT(SDNode *Node, SDValue &Result, - SelectionDAG &DAG) const { +bool TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps, + SDValue &Result, SelectionDAG &DAG) const { EVT VT = Node->getValueType(0); unsigned EltSizeInBits = VT.getScalarSizeInBits(); bool IsLeft = Node->getOpcode() == ISD::ROTL; @@ -6222,36 +6449,47 @@ bool TargetLowering::expandROT(SDNode *Node, SDValue &Result, EVT ShVT = Op1.getValueType(); SDValue Zero = DAG.getConstant(0, DL, ShVT); - assert(isPowerOf2_32(EltSizeInBits) && EltSizeInBits > 1 && - "Expecting the type bitwidth to be a power of 2"); - // If a rotate in the other direction is supported, use it. unsigned RevRot = IsLeft ? ISD::ROTR : ISD::ROTL; - if (isOperationLegalOrCustom(RevRot, VT)) { + if (isOperationLegalOrCustom(RevRot, VT) && isPowerOf2_32(EltSizeInBits)) { SDValue Sub = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1); Result = DAG.getNode(RevRot, DL, VT, Op0, Sub); return true; } - if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) || - !isOperationLegalOrCustom(ISD::SRL, VT) || - !isOperationLegalOrCustom(ISD::SUB, VT) || - !isOperationLegalOrCustomOrPromote(ISD::OR, VT) || - !isOperationLegalOrCustomOrPromote(ISD::AND, VT))) + if (!AllowVectorOps && VT.isVector() && + (!isOperationLegalOrCustom(ISD::SHL, VT) || + !isOperationLegalOrCustom(ISD::SRL, VT) || + !isOperationLegalOrCustom(ISD::SUB, VT) || + !isOperationLegalOrCustomOrPromote(ISD::OR, VT) || + !isOperationLegalOrCustomOrPromote(ISD::AND, VT))) return false; - // Otherwise, - // (rotl x, c) -> (or (shl x, (and c, w-1)), (srl x, (and -c, w-1))) - // (rotr x, c) -> (or (srl x, (and c, w-1)), (shl x, (and -c, w-1))) - // unsigned ShOpc = IsLeft ? ISD::SHL : ISD::SRL; unsigned HsOpc = IsLeft ? ISD::SRL : ISD::SHL; SDValue BitWidthMinusOneC = DAG.getConstant(EltSizeInBits - 1, DL, ShVT); - SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1); - SDValue And0 = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC); - SDValue And1 = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC); - Result = DAG.getNode(ISD::OR, DL, VT, DAG.getNode(ShOpc, DL, VT, Op0, And0), - DAG.getNode(HsOpc, DL, VT, Op0, And1)); + SDValue ShVal; + SDValue HsVal; + if (isPowerOf2_32(EltSizeInBits)) { + // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1)) + // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1)) + SDValue NegOp1 = DAG.getNode(ISD::SUB, DL, ShVT, Zero, Op1); + SDValue ShAmt = DAG.getNode(ISD::AND, DL, ShVT, Op1, BitWidthMinusOneC); + ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt); + SDValue HsAmt = DAG.getNode(ISD::AND, DL, ShVT, NegOp1, BitWidthMinusOneC); + HsVal = DAG.getNode(HsOpc, DL, VT, Op0, HsAmt); + } else { + // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w)) + // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w)) + SDValue BitWidthC = DAG.getConstant(EltSizeInBits, DL, ShVT); + SDValue ShAmt = DAG.getNode(ISD::UREM, DL, ShVT, Op1, BitWidthC); + ShVal = DAG.getNode(ShOpc, DL, VT, Op0, ShAmt); + SDValue HsAmt = DAG.getNode(ISD::SUB, DL, ShVT, BitWidthMinusOneC, ShAmt); + SDValue One = DAG.getConstant(1, DL, ShVT); + HsVal = + DAG.getNode(HsOpc, DL, VT, DAG.getNode(HsOpc, DL, VT, Op0, One), HsAmt); + } + Result = DAG.getNode(ISD::OR, DL, VT, ShVal, HsVal); return true; } @@ -6270,7 +6508,7 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, if (Node->isStrictFPOpcode()) // When a NaN is converted to an integer a trap is allowed. We can't // use this expansion here because it would eliminate that trap. Other - // traps are also allowed and cannot be eliminated. See + // traps are also allowed and cannot be eliminated. See // IEEE 754-2008 sec 5.8. return false; @@ -6341,7 +6579,7 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT); // Only expand vector types if we have the appropriate vector bit operations. - unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT : + unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT : ISD::FP_TO_SINT; if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) || !isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT))) @@ -6356,14 +6594,19 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, if (APFloat::opOverflow & APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) { if (Node->isStrictFPOpcode()) { - Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other }, - { Node->getOperand(0), Src }); + Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other }, + { Node->getOperand(0), Src }); Chain = Result.getValue(1); } else Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src); return true; } + // Don't expand it if there isn't cheap fsub instruction. + if (!isOperationLegalOrCustom( + Node->isStrictFPOpcode() ? ISD::STRICT_FSUB : ISD::FSUB, SrcVT)) + return false; + SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT); SDValue Sel; @@ -6395,9 +6638,9 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, DAG.getConstant(SignMask, dl, DstVT)); SDValue SInt; if (Node->isStrictFPOpcode()) { - SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other }, + SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other }, { Chain, Src, FltOfs }); - SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other }, + SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other }, { Val.getValue(1), Val }); Chain = SInt.getValue(1); } else { @@ -6426,8 +6669,13 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const { - unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0; - SDValue Src = Node->getOperand(OpNo); + // This transform is not correct for converting 0 when rounding mode is set + // to round toward negative infinity which will produce -0.0. So disable under + // strictfp. + if (Node->isStrictFPOpcode()) + return false; + + SDValue Src = Node->getOperand(0); EVT SrcVT = Src.getValueType(); EVT DstVT = Node->getValueType(0); @@ -6446,9 +6694,10 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result, EVT ShiftVT = getShiftAmountTy(SrcVT, DAG.getDataLayout()); // Implementation of unsigned i64 to f64 following the algorithm in - // __floatundidf in compiler_rt. This implementation has the advantage - // of performing rounding correctly, both in the default rounding mode - // and in all alternate rounding modes. + // __floatundidf in compiler_rt. This implementation performs rounding + // correctly in all rounding modes with the exception of converting 0 + // when rounding toward negative infinity. In that case the fsub will produce + // -0.0. This will be added to +0.0 and produce -0.0 which is incorrect. SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT); SDValue TwoP84PlusTwoP52 = DAG.getConstantFP( BitsToDouble(UINT64_C(0x4530000000100000)), dl, DstVT); @@ -6462,18 +6711,9 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result, SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84); SDValue LoFlt = DAG.getBitcast(DstVT, LoOr); SDValue HiFlt = DAG.getBitcast(DstVT, HiOr); - if (Node->isStrictFPOpcode()) { - SDValue HiSub = - DAG.getNode(ISD::STRICT_FSUB, dl, {DstVT, MVT::Other}, - {Node->getOperand(0), HiFlt, TwoP84PlusTwoP52}); - Result = DAG.getNode(ISD::STRICT_FADD, dl, {DstVT, MVT::Other}, - {HiSub.getValue(1), LoFlt, HiSub}); - Chain = Result.getValue(1); - } else { - SDValue HiSub = - DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52); - Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub); - } + SDValue HiSub = + DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52); + Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub); return true; } @@ -6483,6 +6723,11 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node, unsigned NewOp = Node->getOpcode() == ISD::FMINNUM ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE; EVT VT = Node->getValueType(0); + + if (VT.isScalableVector()) + report_fatal_error( + "Expanding fminnum/fmaxnum for scalable vectors is undefined."); + if (isOperationLegalOrCustom(NewOp, VT)) { SDValue Quiet0 = Node->getOperand(0); SDValue Quiet1 = Node->getOperand(1); @@ -6706,23 +6951,58 @@ bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result, } bool TargetLowering::expandABS(SDNode *N, SDValue &Result, - SelectionDAG &DAG) const { + SelectionDAG &DAG, bool IsNegative) const { SDLoc dl(N); EVT VT = N->getValueType(0); EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); SDValue Op = N->getOperand(0); + // abs(x) -> smax(x,sub(0,x)) + if (!IsNegative && isOperationLegal(ISD::SUB, VT) && + isOperationLegal(ISD::SMAX, VT)) { + SDValue Zero = DAG.getConstant(0, dl, VT); + Result = DAG.getNode(ISD::SMAX, dl, VT, Op, + DAG.getNode(ISD::SUB, dl, VT, Zero, Op)); + return true; + } + + // abs(x) -> umin(x,sub(0,x)) + if (!IsNegative && isOperationLegal(ISD::SUB, VT) && + isOperationLegal(ISD::UMIN, VT)) { + SDValue Zero = DAG.getConstant(0, dl, VT); + Result = DAG.getNode(ISD::UMIN, dl, VT, Op, + DAG.getNode(ISD::SUB, dl, VT, Zero, Op)); + return true; + } + + // 0 - abs(x) -> smin(x, sub(0,x)) + if (IsNegative && isOperationLegal(ISD::SUB, VT) && + isOperationLegal(ISD::SMIN, VT)) { + SDValue Zero = DAG.getConstant(0, dl, VT); + Result = DAG.getNode(ISD::SMIN, dl, VT, Op, + DAG.getNode(ISD::SUB, dl, VT, Zero, Op)); + return true; + } + // Only expand vector types if we have the appropriate vector operations. - if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SRA, VT) || - !isOperationLegalOrCustom(ISD::ADD, VT) || - !isOperationLegalOrCustomOrPromote(ISD::XOR, VT))) + if (VT.isVector() && + (!isOperationLegalOrCustom(ISD::SRA, VT) || + (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) || + (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) || + !isOperationLegalOrCustomOrPromote(ISD::XOR, VT))) return false; SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, Op, DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT)); - SDValue Add = DAG.getNode(ISD::ADD, dl, VT, Op, Shift); - Result = DAG.getNode(ISD::XOR, dl, VT, Add, Shift); + if (!IsNegative) { + SDValue Add = DAG.getNode(ISD::ADD, dl, VT, Op, Shift); + Result = DAG.getNode(ISD::XOR, dl, VT, Add, Shift); + } else { + // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y)) + SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift); + Result = DAG.getNode(ISD::SUB, dl, VT, Shift, Xor); + } return true; } @@ -6736,6 +7016,9 @@ TargetLowering::scalarizeVectorLoad(LoadSDNode *LD, EVT DstVT = LD->getValueType(0); ISD::LoadExtType ExtType = LD->getExtensionType(); + if (SrcVT.isScalableVector()) + report_fatal_error("Cannot scalarize scalable vector loads"); + unsigned NumElem = SrcVT.getVectorNumElements(); EVT SrcEltVT = SrcVT.getScalarType(); @@ -6762,7 +7045,7 @@ TargetLowering::scalarizeVectorLoad(LoadSDNode *LD, // the codegen worse. SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, SL, LoadVT, Chain, BasePTR, - LD->getPointerInfo(), SrcIntVT, LD->getAlignment(), + LD->getPointerInfo(), SrcIntVT, LD->getOriginalAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo()); SmallVector<SDValue, 8> Vals; @@ -6799,10 +7082,10 @@ TargetLowering::scalarizeVectorLoad(LoadSDNode *LD, SDValue ScalarLoad = DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride), - SrcEltVT, MinAlign(LD->getAlignment(), Idx * Stride), + SrcEltVT, LD->getOriginalAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo()); - BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, Stride); + BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, TypeSize::Fixed(Stride)); Vals.push_back(ScalarLoad.getValue(0)); LoadChains.push_back(ScalarLoad.getValue(1)); @@ -6823,6 +7106,9 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST, SDValue Value = ST->getValue(); EVT StVT = ST->getMemoryVT(); + if (StVT.isScalableVector()) + report_fatal_error("Cannot scalarize scalable vector stores"); + // The type of the data we want to save EVT RegVT = Value.getValueType(); EVT RegSclVT = RegVT.getScalarType(); @@ -6859,7 +7145,7 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST, } return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(), - ST->getAlignment(), ST->getMemOperand()->getFlags(), + ST->getOriginalAlign(), ST->getMemOperand()->getFlags(), ST->getAAInfo()); } @@ -6873,13 +7159,14 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST, SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value, DAG.getVectorIdxConstant(Idx, SL)); - SDValue Ptr = DAG.getObjectPtrOffset(SL, BasePtr, Idx * Stride); + SDValue Ptr = + DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::Fixed(Idx * Stride)); // This scalar TruncStore may be illegal, but we legalize it later. SDValue Store = DAG.getTruncStore( Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride), - MemSclVT, MinAlign(ST->getAlignment(), Idx * Stride), - ST->getMemOperand()->getFlags(), ST->getAAInfo()); + MemSclVT, ST->getOriginalAlign(), ST->getMemOperand()->getFlags(), + ST->getAAInfo()); Stores.push_back(Store); } @@ -6944,7 +7231,7 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { // Load one integer register's worth from the original location. SDValue Load = DAG.getLoad( RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset), - MinAlign(LD->getAlignment(), Offset), LD->getMemOperand()->getFlags(), + LD->getOriginalAlign(), LD->getMemOperand()->getFlags(), LD->getAAInfo()); // Follow the load with a store to the stack slot. Remember the store. Stores.push_back(DAG.getStore( @@ -6963,8 +7250,8 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset), MemVT, - MinAlign(LD->getAlignment(), Offset), - LD->getMemOperand()->getFlags(), LD->getAAInfo()); + LD->getOriginalAlign(), LD->getMemOperand()->getFlags(), + LD->getAAInfo()); // Follow the load with a store to the stack slot. Remember the store. // On big-endian machines this requires a truncating store to ensure // that the bits end up in the right place. @@ -6994,7 +7281,7 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2); NumBits >>= 1; - unsigned Alignment = LD->getAlignment(); + Align Alignment = LD->getOriginalAlign(); unsigned IncrementSize = NumBits / 8; ISD::LoadExtType HiExtType = LD->getExtensionType(); @@ -7009,21 +7296,21 @@ TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const { NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(), LD->getAAInfo()); - Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); + Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize)); Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), - NewLoadedVT, MinAlign(Alignment, IncrementSize), - LD->getMemOperand()->getFlags(), LD->getAAInfo()); + NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(), + LD->getAAInfo()); } else { Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(), NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(), LD->getAAInfo()); - Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); + Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize)); Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), - NewLoadedVT, MinAlign(Alignment, IncrementSize), - LD->getMemOperand()->getFlags(), LD->getAAInfo()); + NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(), + LD->getAAInfo()); } // aggregate the two parts @@ -7047,7 +7334,7 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, SDValue Ptr = ST->getBasePtr(); SDValue Val = ST->getValue(); EVT VT = Val.getValueType(); - int Alignment = ST->getAlignment(); + Align Alignment = ST->getOriginalAlign(); auto &MF = DAG.getMachineFunction(); EVT StoreMemVT = ST->getMemoryVT(); @@ -7104,7 +7391,7 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, // Store it to the final location. Remember the store. Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr, ST->getPointerInfo().getWithOffset(Offset), - MinAlign(ST->getAlignment(), Offset), + ST->getOriginalAlign(), ST->getMemOperand()->getFlags())); // Increment the pointers. Offset += RegBytes; @@ -7126,7 +7413,7 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, Stores.push_back( DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr, ST->getPointerInfo().getWithOffset(Offset), LoadMemVT, - MinAlign(ST->getAlignment(), Offset), + ST->getOriginalAlign(), ST->getMemOperand()->getFlags(), ST->getAAInfo())); // The order of the stores doesn't matter - say it with a TokenFactor. SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); @@ -7137,8 +7424,8 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, "Unaligned store of unknown type."); // Get the half-size VT EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext()); - int NumBits = NewStoredVT.getSizeInBits(); - int IncrementSize = NumBits / 8; + unsigned NumBits = NewStoredVT.getFixedSizeInBits(); + unsigned IncrementSize = NumBits / 8; // Divide the stored value in two parts. SDValue ShiftAmount = DAG.getConstant( @@ -7153,8 +7440,7 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST, Ptr, ST->getPointerInfo(), NewStoredVT, Alignment, ST->getMemOperand()->getFlags()); - Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize); - Alignment = MinAlign(Alignment, IncrementSize); + Ptr = DAG.getObjectPtrOffset(dl, Ptr, TypeSize::Fixed(IncrementSize)); Store2 = DAG.getTruncStore( Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment, @@ -7173,9 +7459,12 @@ TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask, SDValue Increment; EVT AddrVT = Addr.getValueType(); EVT MaskVT = Mask.getValueType(); - assert(DataVT.getVectorNumElements() == MaskVT.getVectorNumElements() && + assert(DataVT.getVectorElementCount() == MaskVT.getVectorElementCount() && "Incompatible types of Data and Mask"); if (IsCompressedMemory) { + if (DataVT.isScalableVector()) + report_fatal_error( + "Cannot currently handle compressed memory with scalable vectors"); // Incrementing the pointer according to number of '1's in the mask. EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits()); SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask); @@ -7191,6 +7480,10 @@ TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask, SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL, AddrVT); Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale); + } else if (DataVT.isScalableVector()) { + Increment = DAG.getVScale(DL, AddrVT, + APInt(AddrVT.getFixedSizeInBits(), + DataVT.getStoreSize().getKnownMinSize())); } else Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT); @@ -7201,16 +7494,26 @@ static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx, EVT VecVT, const SDLoc &dl) { - if (isa<ConstantSDNode>(Idx)) + if (!VecVT.isScalableVector() && isa<ConstantSDNode>(Idx)) return Idx; EVT IdxVT = Idx.getValueType(); - unsigned NElts = VecVT.getVectorNumElements(); - if (isPowerOf2_32(NElts)) { - APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), - Log2_32(NElts)); - return DAG.getNode(ISD::AND, dl, IdxVT, Idx, - DAG.getConstant(Imm, dl, IdxVT)); + unsigned NElts = VecVT.getVectorMinNumElements(); + if (VecVT.isScalableVector()) { + SDValue VS = DAG.getVScale(dl, IdxVT, + APInt(IdxVT.getFixedSizeInBits(), + NElts)); + SDValue Sub = DAG.getNode(ISD::SUB, dl, IdxVT, VS, + DAG.getConstant(1, dl, IdxVT)); + + return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, Sub); + } else { + if (isPowerOf2_32(NElts)) { + APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(), + Log2_32(NElts)); + return DAG.getNode(ISD::AND, dl, IdxVT, Idx, + DAG.getConstant(Imm, dl, IdxVT)); + } } return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx, @@ -7227,8 +7530,8 @@ SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG, EVT EltVT = VecVT.getVectorElementType(); // Calculate the element offset and add it to the pointer. - unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size. - assert(EltSize * 8 == EltVT.getSizeInBits() && + unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size. + assert(EltSize * 8 == EltVT.getFixedSizeInBits() && "Converting bits to bytes lost precision"); Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl); @@ -7306,6 +7609,65 @@ SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op, return SDValue(); } +// Convert redundant addressing modes (e.g. scaling is redundant +// when accessing bytes). +ISD::MemIndexType +TargetLowering::getCanonicalIndexType(ISD::MemIndexType IndexType, EVT MemVT, + SDValue Offsets) const { + bool IsScaledIndex = + (IndexType == ISD::SIGNED_SCALED) || (IndexType == ISD::UNSIGNED_SCALED); + bool IsSignedIndex = + (IndexType == ISD::SIGNED_SCALED) || (IndexType == ISD::SIGNED_UNSCALED); + + // Scaling is unimportant for bytes, canonicalize to unscaled. + if (IsScaledIndex && MemVT.getScalarType() == MVT::i8) { + IsScaledIndex = false; + IndexType = IsSignedIndex ? ISD::SIGNED_UNSCALED : ISD::UNSIGNED_UNSCALED; + } + + return IndexType; +} + +SDValue TargetLowering::expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const { + SDValue Op0 = Node->getOperand(0); + SDValue Op1 = Node->getOperand(1); + EVT VT = Op0.getValueType(); + unsigned Opcode = Node->getOpcode(); + SDLoc DL(Node); + + // umin(x,y) -> sub(x,usubsat(x,y)) + if (Opcode == ISD::UMIN && isOperationLegal(ISD::SUB, VT) && + isOperationLegal(ISD::USUBSAT, VT)) { + return DAG.getNode(ISD::SUB, DL, VT, Op0, + DAG.getNode(ISD::USUBSAT, DL, VT, Op0, Op1)); + } + + // umax(x,y) -> add(x,usubsat(y,x)) + if (Opcode == ISD::UMAX && isOperationLegal(ISD::ADD, VT) && + isOperationLegal(ISD::USUBSAT, VT)) { + return DAG.getNode(ISD::ADD, DL, VT, Op0, + DAG.getNode(ISD::USUBSAT, DL, VT, Op1, Op0)); + } + + // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B + ISD::CondCode CC; + switch (Opcode) { + default: llvm_unreachable("How did we get here?"); + case ISD::SMAX: CC = ISD::SETGT; break; + case ISD::SMIN: CC = ISD::SETLT; break; + case ISD::UMAX: CC = ISD::SETUGT; break; + case ISD::UMIN: CC = ISD::SETULT; break; + } + + // FIXME: Should really try to split the vector in case it's legal on a + // subvector. + if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT)) + return DAG.UnrollVectorOp(Node); + + SDValue Cond = DAG.getSetCC(DL, VT, Op0, Op1, CC); + return DAG.getSelect(DL, VT, Cond, Op0, Op1); +} + SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const { unsigned Opcode = Node->getOpcode(); SDValue LHS = Node->getOperand(0); @@ -7317,12 +7679,13 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const { assert(VT.isInteger() && "Expected operands to be integers"); // usub.sat(a, b) -> umax(a, b) - b - if (Opcode == ISD::USUBSAT && isOperationLegalOrCustom(ISD::UMAX, VT)) { + if (Opcode == ISD::USUBSAT && isOperationLegal(ISD::UMAX, VT)) { SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS); return DAG.getNode(ISD::SUB, dl, VT, Max, RHS); } - if (Opcode == ISD::UADDSAT && isOperationLegalOrCustom(ISD::UMIN, VT)) { + // uadd.sat(a, b) -> umin(a, ~b) + b + if (Opcode == ISD::UADDSAT && isOperationLegal(ISD::UMIN, VT)) { SDValue InvRHS = DAG.getNOT(dl, RHS, VT); SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS); return DAG.getNode(ISD::ADD, dl, VT, Min, RHS); @@ -7347,6 +7710,11 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const { "addition or subtraction node."); } + // FIXME: Should really try to split the vector in case it's legal on a + // subvector. + if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT)) + return DAG.UnrollVectorOp(Node); + unsigned BitWidth = LHS.getScalarValueSizeInBits(); EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT), @@ -7386,6 +7754,41 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const { } } +SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const { + unsigned Opcode = Node->getOpcode(); + bool IsSigned = Opcode == ISD::SSHLSAT; + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + EVT VT = LHS.getValueType(); + SDLoc dl(Node); + + assert((Node->getOpcode() == ISD::SSHLSAT || + Node->getOpcode() == ISD::USHLSAT) && + "Expected a SHLSAT opcode"); + assert(VT == RHS.getValueType() && "Expected operands to be the same type"); + assert(VT.isInteger() && "Expected operands to be integers"); + + // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate. + + unsigned BW = VT.getScalarSizeInBits(); + SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS); + SDValue Orig = + DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS); + + SDValue SatVal; + if (IsSigned) { + SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT); + SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT); + SatVal = DAG.getSelectCC(dl, LHS, DAG.getConstant(0, dl, VT), + SatMin, SatMax, ISD::SETLT); + } else { + SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT); + } + Result = DAG.getSelectCC(dl, LHS, Orig, SatVal, Result, ISD::SETNE); + + return Result; +} + SDValue TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const { assert((Node->getOpcode() == ISD::SMULFIX || @@ -7759,7 +8162,7 @@ bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result, if (isSigned) { // The high part is obtained by SRA'ing all but one of the bits of low // part. - unsigned LoSize = VT.getSizeInBits(); + unsigned LoSize = VT.getFixedSizeInBits(); HiLHS = DAG.getNode(ISD::SRA, dl, VT, LHS, DAG.getConstant(LoSize - 1, dl, @@ -7818,7 +8221,7 @@ bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result, // Truncate the result if SetCC returns a larger type than needed. EVT RType = Node->getValueType(1); - if (RType.getSizeInBits() < Overflow.getValueSizeInBits()) + if (RType.bitsLT(Overflow.getValueType())) Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow); assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() && @@ -7828,32 +8231,14 @@ bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result, SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const { SDLoc dl(Node); - bool NoNaN = Node->getFlags().hasNoNaNs(); - unsigned BaseOpcode = 0; - switch (Node->getOpcode()) { - default: llvm_unreachable("Expected VECREDUCE opcode"); - case ISD::VECREDUCE_FADD: BaseOpcode = ISD::FADD; break; - case ISD::VECREDUCE_FMUL: BaseOpcode = ISD::FMUL; break; - case ISD::VECREDUCE_ADD: BaseOpcode = ISD::ADD; break; - case ISD::VECREDUCE_MUL: BaseOpcode = ISD::MUL; break; - case ISD::VECREDUCE_AND: BaseOpcode = ISD::AND; break; - case ISD::VECREDUCE_OR: BaseOpcode = ISD::OR; break; - case ISD::VECREDUCE_XOR: BaseOpcode = ISD::XOR; break; - case ISD::VECREDUCE_SMAX: BaseOpcode = ISD::SMAX; break; - case ISD::VECREDUCE_SMIN: BaseOpcode = ISD::SMIN; break; - case ISD::VECREDUCE_UMAX: BaseOpcode = ISD::UMAX; break; - case ISD::VECREDUCE_UMIN: BaseOpcode = ISD::UMIN; break; - case ISD::VECREDUCE_FMAX: - BaseOpcode = NoNaN ? ISD::FMAXNUM : ISD::FMAXIMUM; - break; - case ISD::VECREDUCE_FMIN: - BaseOpcode = NoNaN ? ISD::FMINNUM : ISD::FMINIMUM; - break; - } - + unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode()); SDValue Op = Node->getOperand(0); EVT VT = Op.getValueType(); + if (VT.isScalableVector()) + report_fatal_error( + "Expanding reductions for scalable vectors is undefined."); + // Try to use a shuffle reduction for power of two vectors. if (VT.isPow2VectorType()) { while (VT.getVectorNumElements() > 1) { @@ -7884,6 +8269,33 @@ SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const { return Res; } +SDValue TargetLowering::expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const { + SDLoc dl(Node); + SDValue AccOp = Node->getOperand(0); + SDValue VecOp = Node->getOperand(1); + SDNodeFlags Flags = Node->getFlags(); + + EVT VT = VecOp.getValueType(); + EVT EltVT = VT.getVectorElementType(); + + if (VT.isScalableVector()) + report_fatal_error( + "Expanding reductions for scalable vectors is undefined."); + + unsigned NumElts = VT.getVectorNumElements(); + + SmallVector<SDValue, 8> Ops; + DAG.ExtractVectorElements(VecOp, Ops, 0, NumElts); + + unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Node->getOpcode()); + + SDValue Res = AccOp; + for (unsigned i = 0; i < NumElts; i++) + Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Flags); + + return Res; +} + bool TargetLowering::expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const { EVT VT = Node->getValueType(0); @@ -7906,3 +8318,105 @@ bool TargetLowering::expandREM(SDNode *Node, SDValue &Result, } return false; } + +SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node, + SelectionDAG &DAG) const { + bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT; + SDLoc dl(SDValue(Node, 0)); + SDValue Src = Node->getOperand(0); + + // DstVT is the result type, while SatVT is the size to which we saturate + EVT SrcVT = Src.getValueType(); + EVT DstVT = Node->getValueType(0); + + unsigned SatWidth = Node->getConstantOperandVal(1); + unsigned DstWidth = DstVT.getScalarSizeInBits(); + assert(SatWidth <= DstWidth && + "Expected saturation width smaller than result width"); + + // Determine minimum and maximum integer values and their corresponding + // floating-point values. + APInt MinInt, MaxInt; + if (IsSigned) { + MinInt = APInt::getSignedMinValue(SatWidth).sextOrSelf(DstWidth); + MaxInt = APInt::getSignedMaxValue(SatWidth).sextOrSelf(DstWidth); + } else { + MinInt = APInt::getMinValue(SatWidth).zextOrSelf(DstWidth); + MaxInt = APInt::getMaxValue(SatWidth).zextOrSelf(DstWidth); + } + + // We cannot risk emitting FP_TO_XINT nodes with a source VT of f16, as + // libcall emission cannot handle this. Large result types will fail. + if (SrcVT == MVT::f16) { + Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Src); + SrcVT = Src.getValueType(); + } + + APFloat MinFloat(DAG.EVTToAPFloatSemantics(SrcVT)); + APFloat MaxFloat(DAG.EVTToAPFloatSemantics(SrcVT)); + + APFloat::opStatus MinStatus = + MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero); + APFloat::opStatus MaxStatus = + MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero); + bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) && + !(MaxStatus & APFloat::opStatus::opInexact); + + SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT); + SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT); + + // If the integer bounds are exactly representable as floats and min/max are + // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence + // of comparisons and selects. + bool MinMaxLegal = isOperationLegal(ISD::FMINNUM, SrcVT) && + isOperationLegal(ISD::FMAXNUM, SrcVT); + if (AreExactFloatBounds && MinMaxLegal) { + SDValue Clamped = Src; + + // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat. + Clamped = DAG.getNode(ISD::FMAXNUM, dl, SrcVT, Clamped, MinFloatNode); + // Clamp by MaxFloat from above. NaN cannot occur. + Clamped = DAG.getNode(ISD::FMINNUM, dl, SrcVT, Clamped, MaxFloatNode); + // Convert clamped value to integer. + SDValue FpToInt = DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, + dl, DstVT, Clamped); + + // In the unsigned case we're done, because we mapped NaN to MinFloat, + // which will cast to zero. + if (!IsSigned) + return FpToInt; + + // Otherwise, select 0 if Src is NaN. + SDValue ZeroInt = DAG.getConstant(0, dl, DstVT); + return DAG.getSelectCC(dl, Src, Src, ZeroInt, FpToInt, + ISD::CondCode::SETUO); + } + + SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT); + SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT); + + // Result of direct conversion. The assumption here is that the operation is + // non-trapping and it's fine to apply it to an out-of-range value if we + // select it away later. + SDValue FpToInt = + DAG.getNode(IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT, dl, DstVT, Src); + + SDValue Select = FpToInt; + + // If Src ULT MinFloat, select MinInt. In particular, this also selects + // MinInt if Src is NaN. + Select = DAG.getSelectCC(dl, Src, MinFloatNode, MinIntNode, Select, + ISD::CondCode::SETULT); + // If Src OGT MaxFloat, select MaxInt. + Select = DAG.getSelectCC(dl, Src, MaxFloatNode, MaxIntNode, Select, + ISD::CondCode::SETOGT); + + // In the unsigned case we are done, because we mapped NaN to MinInt, which + // is already zero. + if (!IsSigned) + return Select; + + // Otherwise, select 0 if Src is NaN. + SDValue ZeroInt = DAG.getConstant(0, dl, DstVT); + return DAG.getSelectCC(dl, Src, Src, ZeroInt, Select, ISD::CondCode::SETUO); +} |