diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2021-11-19 20:06:13 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2021-11-19 20:06:13 +0000 |
| commit | c0981da47d5696fe36474fcf86b4ce03ae3ff818 (patch) | |
| tree | f42add1021b9f2ac6a69ac7cf6c4499962739a45 /llvm/lib/CodeGen/SelectionDAG | |
| parent | 344a3780b2e33f6ca763666c380202b18aab72a3 (diff) | |
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG')
22 files changed, 2850 insertions, 1568 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index b104e995019f..ce400ea43f29 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -129,12 +129,12 @@ static cl::opt<unsigned> StoreMergeDependenceLimit( static cl::opt<bool> EnableReduceLoadOpStoreWidth( "combiner-reduce-load-op-store-width", cl::Hidden, cl::init(true), - cl::desc("DAG cominber enable reducing the width of load/op/store " + cl::desc("DAG combiner enable reducing the width of load/op/store " "sequence")); static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore( "combiner-shrink-load-replace-store-with-store", cl::Hidden, cl::init(true), - cl::desc("DAG cominber enable load/<replace bytes>/store with " + cl::desc("DAG combiner enable load/<replace bytes>/store with " "a narrower store")); namespace { @@ -319,7 +319,7 @@ namespace { /// If so, return true. bool SimplifyDemandedBits(SDValue Op) { unsigned BitWidth = Op.getScalarValueSizeInBits(); - APInt DemandedBits = APInt::getAllOnesValue(BitWidth); + APInt DemandedBits = APInt::getAllOnes(BitWidth); return SimplifyDemandedBits(Op, DemandedBits); } @@ -345,7 +345,7 @@ namespace { return false; unsigned NumElts = Op.getValueType().getVectorNumElements(); - APInt DemandedElts = APInt::getAllOnesValue(NumElts); + APInt DemandedElts = APInt::getAllOnes(NumElts); return SimplifyDemandedVectorElts(Op, DemandedElts); } @@ -436,7 +436,7 @@ namespace { SDValue visitOR(SDNode *N); SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N); SDValue visitXOR(SDNode *N); - SDValue SimplifyVBinOp(SDNode *N); + SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL); SDValue visitSHL(SDNode *N); SDValue visitSRA(SDNode *N); SDValue visitSRL(SDNode *N); @@ -515,6 +515,7 @@ namespace { SDValue visitFP_TO_FP16(SDNode *N); SDValue visitFP16_TO_FP(SDNode *N); SDValue visitVECREDUCE(SDNode *N); + SDValue visitVPOp(SDNode *N); SDValue visitFADDForFMACombine(SDNode *N); SDValue visitFSUBForFMACombine(SDNode *N); @@ -615,7 +616,7 @@ namespace { SmallVectorImpl<SDValue> &Aliases); /// Return true if there is any possibility that the two addresses overlap. - bool isAlias(SDNode *Op0, SDNode *Op1) const; + bool mayAlias(SDNode *Op0, SDNode *Op1) const; /// Walk up chain skipping non-aliasing memory nodes, looking for a better /// chain (aliasing node.) @@ -1062,21 +1063,22 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, if (N0.getOpcode() != Opc) return SDValue(); - if (DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) { - if (DAG.isConstantIntBuildVectorOrConstantInt(N1)) { + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + + if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N01))) { + if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1))) { // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2)) - if (SDValue OpNode = - DAG.FoldConstantArithmetic(Opc, DL, VT, {N0.getOperand(1), N1})) - return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); + if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1})) + return DAG.getNode(Opc, DL, VT, N00, OpNode); return SDValue(); } if (N0.hasOneUse()) { // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1) // iff (op x, c1) has one use - SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1); - if (!OpNode.getNode()) - return SDValue(); - return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); + if (SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1)) + return DAG.getNode(Opc, DL, VT, OpNode, N01); + return SDValue(); } } return SDValue(); @@ -1738,6 +1740,9 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::VECREDUCE_UMIN: case ISD::VECREDUCE_FMAX: case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N); +#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) case ISD::SDOPC: +#include "llvm/IR/VPIntrinsics.def" + return visitVPOp(N); } return SDValue(); } @@ -2257,7 +2262,7 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) { // fold vector ops if (VT.isVector()) { - if (SDValue FoldedVOp = SimplifyVBinOp(N)) + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; // fold (add x, 0) -> x, vector edition @@ -2439,9 +2444,7 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) { N0.getOperand(0)); // fold (add (add (xor a, -1), b), 1) -> (sub b, a) - if (N0.getOpcode() == ISD::ADD || - N0.getOpcode() == ISD::UADDO || - N0.getOpcode() == ISD::SADDO) { + if (N0.getOpcode() == ISD::ADD) { SDValue A, Xor; if (isBitwiseNot(N0.getOperand(0))) { @@ -2783,7 +2786,7 @@ static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG, IsFlip = Const->isOne(); break; case TargetLowering::ZeroOrNegativeOneBooleanContent: - IsFlip = Const->isAllOnesValue(); + IsFlip = Const->isAllOnes(); break; case TargetLowering::UndefinedBooleanContent: IsFlip = (Const->getAPIntValue() & 0x01) == 1; @@ -3259,7 +3262,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { // fold vector ops if (VT.isVector()) { - if (SDValue FoldedVOp = SimplifyVBinOp(N)) + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; // fold (sub x, 0) -> x, vector edition @@ -3317,11 +3320,10 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { } // Convert 0 - abs(x). - SDValue Result; if (N1->getOpcode() == ISD::ABS && - !TLI.isOperationLegalOrCustom(ISD::ABS, VT) && - TLI.expandABS(N1.getNode(), Result, DAG, true)) - return Result; + !TLI.isOperationLegalOrCustom(ISD::ABS, VT)) + if (SDValue Result = TLI.expandABS(N1.getNode(), DAG, true)) + return Result; // Fold neg(splat(neg(x)) -> splat(x) if (VT.isVector()) { @@ -3785,7 +3787,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { // fold vector ops if (VT.isVector()) { - if (SDValue FoldedVOp = SimplifyVBinOp(N)) + if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N))) return FoldedVOp; N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1); @@ -3810,18 +3812,18 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0); // fold (mul x, 0) -> 0 - if (N1IsConst && ConstValue1.isNullValue()) + if (N1IsConst && ConstValue1.isZero()) return N1; // fold (mul x, 1) -> x - if (N1IsConst && ConstValue1.isOneValue()) + if (N1IsConst && ConstValue1.isOne()) return N0; if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; // fold (mul x, -1) -> 0-x - if (N1IsConst && ConstValue1.isAllOnesValue()) { + if (N1IsConst && ConstValue1.isAllOnes()) { SDLoc DL(N); return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0); @@ -3839,7 +3841,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { } // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c - if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) { + if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isNegatedPowerOf2()) { unsigned Log2Val = (-ConstValue1).logBase2(); SDLoc DL(N); // FIXME: If the input is something that is easily negated (e.g. a @@ -3968,7 +3970,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { SmallBitVector ClearMask; ClearMask.reserve(NumElts); auto IsClearMask = [&ClearMask](ConstantSDNode *V) { - if (!V || V->isNullValue()) { + if (!V || V->isZero()) { ClearMask.push_back(true); return true; } @@ -4054,9 +4056,7 @@ SDValue DAGCombiner::useDivRem(SDNode *Node) { SDValue Op0 = Node->getOperand(0); SDValue Op1 = Node->getOperand(1); SDValue combined; - for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), - UE = Op0.getNode()->use_end(); UI != UE; ++UI) { - SDNode *User = *UI; + for (SDNode *User : Op0.getNode()->uses()) { if (User == Node || User->getOpcode() == ISD::DELETED_NODE || User->use_empty()) continue; @@ -4113,7 +4113,7 @@ static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) { // 0 / X -> 0 // 0 % X -> 0 ConstantSDNode *N0C = isConstOrConstSplat(N0); - if (N0C && N0C->isNullValue()) + if (N0C && N0C->isZero()) return N0; // X / X -> 1 @@ -4138,21 +4138,20 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); EVT CCVT = getSetCCResultType(VT); + SDLoc DL(N); // fold vector ops if (VT.isVector()) - if (SDValue FoldedVOp = SimplifyVBinOp(N)) + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; - SDLoc DL(N); - // fold (sdiv c1, c2) -> c1/c2 ConstantSDNode *N1C = isConstOrConstSplat(N1); if (SDValue C = DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, {N0, N1})) return C; // fold (sdiv X, -1) -> 0-X - if (N1C && N1C->isAllOnesValue()) + if (N1C && N1C->isAllOnes()) return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0); // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0) @@ -4206,11 +4205,11 @@ SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) { // Helper for determining whether a value is a power-2 constant scalar or a // vector of such elements. auto IsPowerOfTwo = [](ConstantSDNode *C) { - if (C->isNullValue() || C->isOpaque()) + if (C->isZero() || C->isOpaque()) return false; if (C->getAPIntValue().isPowerOf2()) return true; - if ((-C->getAPIntValue()).isPowerOf2()) + if (C->getAPIntValue().isNegatedPowerOf2()) return true; return false; }; @@ -4283,21 +4282,20 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); EVT CCVT = getSetCCResultType(VT); + SDLoc DL(N); // fold vector ops if (VT.isVector()) - if (SDValue FoldedVOp = SimplifyVBinOp(N)) + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; - SDLoc DL(N); - // fold (udiv c1, c2) -> c1/c2 ConstantSDNode *N1C = isConstOrConstSplat(N1); if (SDValue C = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, {N0, N1})) return C; // fold (udiv X, -1) -> select(X == -1, 1, 0) - if (N1C && N1C->getAPIntValue().isAllOnesValue()) + if (N1C && N1C->isAllOnes()) return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ), DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT)); @@ -4393,7 +4391,7 @@ SDValue DAGCombiner::visitREM(SDNode *N) { return C; // fold (urem X, -1) -> select(X == -1, 0, x) - if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue()) + if (!isSigned && N1C && N1C->isAllOnes()) return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ), DAG.getConstant(0, DL, VT), N0); @@ -4477,6 +4475,11 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHS, DL, VT, {N0, N1})) return C; + // canonicalize constant to RHS. + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(ISD::MULHS, DL, N->getVTList(), N1, N0); + // fold (mulhs x, 0) -> 0 if (isNullConstant(N1)) return N1; @@ -4529,6 +4532,11 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { if (SDValue C = DAG.FoldConstantArithmetic(ISD::MULHU, DL, VT, {N0, N1})) return C; + // canonicalize constant to RHS. + if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && + !DAG.isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(ISD::MULHU, DL, N->getVTList(), N1, N0); + // fold (mulhu x, 0) -> 0 if (isNullConstant(N1)) return N1; @@ -4569,6 +4577,12 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { } } + // Simplify the operands using demanded-bits information. + // We don't have demanded bits support for MULHU so this just enables constant + // folding based on known bits. + if (SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + return SDValue(); } @@ -4770,20 +4784,21 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) { SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); unsigned Opcode = N->getOpcode(); + SDLoc DL(N); // fold vector ops if (VT.isVector()) - if (SDValue FoldedVOp = SimplifyVBinOp(N)) + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; // fold operation with constant operands. - if (SDValue C = DAG.FoldConstantArithmetic(Opcode, SDLoc(N), VT, {N0, N1})) + if (SDValue C = DAG.FoldConstantArithmetic(Opcode, DL, VT, {N0, N1})) return C; // canonicalize constant to RHS if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && !DAG.isConstantIntBuildVectorOrConstantInt(N1)) - return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); + return DAG.getNode(N->getOpcode(), DL, VT, N1, N0); // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX. // Only do this if the current op isn't legal and the flipped is. @@ -4799,7 +4814,7 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) { default: llvm_unreachable("Unknown MINMAX opcode"); } if (TLI.isOperationLegal(AltOpcode, VT)) - return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1); + return DAG.getNode(AltOpcode, DL, VT, N0, N1); } // Simplify the operands using demanded-bits information. @@ -5135,8 +5150,9 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) { if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL)) return V; + // TODO: Rewrite this to return a new 'AND' instead of using CombineTo. if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL && - VT.getSizeInBits() <= 64) { + VT.getSizeInBits() <= 64 && N0->hasOneUse()) { if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) { // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal @@ -5608,6 +5624,39 @@ static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) { return DAG.getZExtOrTrunc(Setcc, DL, VT); } +/// For targets that support usubsat, match a bit-hack form of that operation +/// that ends in 'and' and convert it. +static SDValue foldAndToUsubsat(SDNode *N, SelectionDAG &DAG) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N1.getValueType(); + + // Canonicalize SRA as operand 1. + if (N0.getOpcode() == ISD::SRA) + std::swap(N0, N1); + + // xor/add with SMIN (signmask) are logically equivalent. + if (N0.getOpcode() != ISD::XOR && N0.getOpcode() != ISD::ADD) + return SDValue(); + + if (N1.getOpcode() != ISD::SRA || !N0.hasOneUse() || !N1.hasOneUse() || + N0.getOperand(0) != N1.getOperand(0)) + return SDValue(); + + unsigned BitWidth = VT.getScalarSizeInBits(); + ConstantSDNode *XorC = isConstOrConstSplat(N0.getOperand(1), true); + ConstantSDNode *SraC = isConstOrConstSplat(N1.getOperand(1), true); + if (!XorC || !XorC->getAPIntValue().isSignMask() || + !SraC || SraC->getAPIntValue() != BitWidth - 1) + return SDValue(); + + // (i8 X ^ 128) & (i8 X s>> 7) --> usubsat X, 128 + // (i8 X + 128) & (i8 X s>> 7) --> usubsat X, 128 + SDLoc DL(N); + SDValue SignMask = DAG.getConstant(XorC->getAPIntValue(), DL, VT); + return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0), SignMask); +} + SDValue DAGCombiner::visitAND(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -5619,17 +5668,17 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // fold vector ops if (VT.isVector()) { - if (SDValue FoldedVOp = SimplifyVBinOp(N)) + if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N))) return FoldedVOp; // fold (and x, 0) -> 0, vector edition if (ISD::isConstantSplatVectorAllZeros(N0.getNode())) // do not return N0, because undef node may exist in N0 - return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()), + return DAG.getConstant(APInt::getZero(N0.getScalarValueSizeInBits()), SDLoc(N), N0.getValueType()); if (ISD::isConstantSplatVectorAllZeros(N1.getNode())) // do not return N1, because undef node may exist in N1 - return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()), + return DAG.getConstant(APInt::getZero(N1.getScalarValueSizeInBits()), SDLoc(N), N1.getValueType()); // fold (and x, -1) -> x, vector edition @@ -5680,8 +5729,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // if (and x, c) is known to be zero, return 0 unsigned BitWidth = VT.getScalarSizeInBits(); - if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), - APInt::getAllOnesValue(BitWidth))) + if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(BitWidth))) return DAG.getConstant(0, SDLoc(N), VT); if (SDValue NewSel = foldBinOpIntoSelect(N)) @@ -5743,7 +5791,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // Get the constant (if applicable) the zero'th operand is being ANDed with. // This can be a pure constant or a vector splat, in which case we treat the // vector as a scalar and use the splat value. - APInt Constant = APInt::getNullValue(1); + APInt Constant = APInt::getZero(1); if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { Constant = C->getAPIntValue(); } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) { @@ -5774,7 +5822,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value. if ((SplatBitSize % EltBitWidth) == 0) { - Constant = APInt::getAllOnesValue(EltBitWidth); + Constant = APInt::getAllOnes(EltBitWidth); for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i) Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth); } @@ -5801,7 +5849,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { case ISD::NON_EXTLOAD: B = true; break; } - if (B && Constant.isAllOnesValue()) { + if (B && Constant.isAllOnes()) { // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to // preserve semantics once we get rid of the AND. SDValue NewLoad(Load, 0); @@ -5971,6 +6019,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (IsAndZeroExtMask(N0, N1)) return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0)); + if (hasOperation(ISD::USUBSAT, VT)) + if (SDValue V = foldAndToUsubsat(N, DAG)) + return V; + return SDValue(); } @@ -6385,7 +6437,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { // fold vector ops if (VT.isVector()) { - if (SDValue FoldedVOp = SimplifyVBinOp(N)) + if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N))) return FoldedVOp; // fold (or x, 0) -> x, vector edition @@ -6926,17 +6978,16 @@ SDValue DAGCombiner::MatchFunnelPosNeg(SDValue N0, SDValue N1, SDValue Pos, // a rot[lr]. This also matches funnel shift patterns, similar to rotation but // with different shifted sources. SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { - // Must be a legal type. Expanded 'n promoted things won't work with rotates. EVT VT = LHS.getValueType(); - if (!TLI.isTypeLegal(VT)) - return SDValue(); // The target must have at least one rotate/funnel flavor. + // We still try to match rotate by constant pre-legalization. + // TODO: Support pre-legalization funnel-shift by constant. bool HasROTL = hasOperation(ISD::ROTL, VT); bool HasROTR = hasOperation(ISD::ROTR, VT); bool HasFSHL = hasOperation(ISD::FSHL, VT); bool HasFSHR = hasOperation(ISD::FSHR, VT); - if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR) + if (LegalOperations && !HasROTL && !HasROTR && !HasFSHL && !HasFSHR) return SDValue(); // Check for truncated rotate. @@ -6989,6 +7040,7 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { if (LHSShift.getOpcode() == RHSShift.getOpcode()) return SDValue(); // Shifts must disagree. + // TODO: Support pre-legalization funnel-shift by constant. bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0); if (!IsRotate && !(HasFSHL || HasFSHR)) return SDValue(); // Requires funnel shift support. @@ -7017,12 +7069,15 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { }; if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) { SDValue Res; - if (IsRotate && (HasROTL || HasROTR)) - Res = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, - HasROTL ? LHSShiftAmt : RHSShiftAmt); - else - Res = DAG.getNode(HasFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg, - RHSShiftArg, HasFSHL ? LHSShiftAmt : RHSShiftAmt); + if (IsRotate && (HasROTL || HasROTR || !(HasFSHL || HasFSHR))) { + bool UseROTL = !LegalOperations || HasROTL; + Res = DAG.getNode(UseROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, + UseROTL ? LHSShiftAmt : RHSShiftAmt); + } else { + bool UseFSHL = !LegalOperations || HasFSHL; + Res = DAG.getNode(UseFSHL ? ISD::FSHL : ISD::FSHR, DL, VT, LHSShiftArg, + RHSShiftArg, UseFSHL ? LHSShiftAmt : RHSShiftAmt); + } // If there is an AND of either shifted operand, apply it to the result. if (LHSMask.getNode() || RHSMask.getNode()) { @@ -7046,6 +7101,11 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { return Res; } + // Even pre-legalization, we can't easily rotate/funnel-shift by a variable + // shift. + if (!HasROTL && !HasROTR && !HasFSHL && !HasFSHR) + return SDValue(); + // If there is a mask here, and we have a variable shift, we can't be sure // that we're masking out the right stuff. if (LHSMask.getNode() || RHSMask.getNode()) @@ -7297,7 +7357,7 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) { // TODO: If there is evidence that running this later would help, this // limitation could be removed. Legality checks may need to be added // for the created store and optional bswap/rotate. - if (LegalOperations) + if (LegalOperations || OptLevel == CodeGenOpt::None) return SDValue(); // We only handle merging simple stores of 1-4 bytes. @@ -7672,9 +7732,12 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { // | D | // Into: // (x & m) | (y & ~m) -// If y is a constant, and the 'andn' does not work with immediates, -// we unfold into a different pattern: +// If y is a constant, m is not a 'not', and the 'andn' does not work with +// immediates, we unfold into a different pattern: // ~(~x & m) & (m | y) +// If x is a constant, m is a 'not', and the 'andn' does not work with +// immediates, we unfold into a different pattern: +// (x | ~m) & ~(~m & ~y) // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at // the very least that breaks andnpd / andnps patterns, and because those // patterns are simplified in IR and shouldn't be created in the DAG @@ -7729,8 +7792,9 @@ SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) { SDLoc DL(N); - // If Y is a constant, check that 'andn' works with immediates. - if (!TLI.hasAndNot(Y)) { + // If Y is a constant, check that 'andn' works with immediates. Unless M is + // a bitwise not that would already allow ANDN to be used. + if (!TLI.hasAndNot(Y) && !isBitwiseNot(M)) { assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable."); // If not, we need to do a bit more work to make sure andn is still used. SDValue NotX = DAG.getNOT(DL, X, VT); @@ -7740,6 +7804,19 @@ SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) { return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS); } + // If X is a constant and M is a bitwise not, check that 'andn' works with + // immediates. + if (!TLI.hasAndNot(X) && isBitwiseNot(M)) { + assert(TLI.hasAndNot(Y) && "Only mask is a variable? Unreachable."); + // If not, we need to do a bit more work to make sure andn is still used. + SDValue NotM = M.getOperand(0); + SDValue LHS = DAG.getNode(ISD::OR, DL, VT, X, NotM); + SDValue NotY = DAG.getNOT(DL, Y, VT); + SDValue RHS = DAG.getNode(ISD::AND, DL, VT, NotM, NotY); + SDValue NotRHS = DAG.getNOT(DL, RHS, VT); + return DAG.getNode(ISD::AND, DL, VT, LHS, NotRHS); + } + SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M); SDValue NotM = DAG.getNOT(DL, M, VT); SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM); @@ -7751,10 +7828,11 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); EVT VT = N0.getValueType(); + SDLoc DL(N); // fold vector ops if (VT.isVector()) { - if (SDValue FoldedVOp = SimplifyVBinOp(N)) + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; // fold (xor x, 0) -> x, vector edition @@ -7765,7 +7843,6 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { } // fold (xor undef, undef) -> 0. This is a common idiom (misuse). - SDLoc DL(N); if (N0.isUndef() && N1.isUndef()) return DAG.getConstant(0, DL, VT); @@ -7900,7 +7977,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { // shift has been simplified to undef. uint64_t ShiftAmt = ShiftC->getLimitedValue(); if (ShiftAmt < BitWidth) { - APInt Ones = APInt::getAllOnesValue(BitWidth); + APInt Ones = APInt::getAllOnes(BitWidth); Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt); if (XorC->getAPIntValue() == Ones) { // If the xor constant is a shifted -1, do a 'not' before the shift: @@ -8223,7 +8300,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // fold vector ops if (VT.isVector()) { - if (SDValue FoldedVOp = SimplifyVBinOp(N)) + if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N))) return FoldedVOp; BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1); @@ -8256,8 +8333,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { return NewSel; // if (shl x, c) is known to be zero, return 0 - if (DAG.MaskedValueIsZero(SDValue(N, 0), - APInt::getAllOnesValue(OpSizeInBits))) + if (DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits))) return DAG.getConstant(0, SDLoc(N), VT); // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). @@ -8502,28 +8578,43 @@ static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG, // Both operands must be equivalent extend nodes. SDValue LeftOp = ShiftOperand.getOperand(0); SDValue RightOp = ShiftOperand.getOperand(1); + bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND; bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND; - if ((!(IsSignExt || IsZeroExt)) || LeftOp.getOpcode() != RightOp.getOpcode()) + if (!IsSignExt && !IsZeroExt) return SDValue(); - EVT WideVT1 = LeftOp.getValueType(); - EVT WideVT2 = RightOp.getValueType(); - (void)WideVT2; + EVT NarrowVT = LeftOp.getOperand(0).getValueType(); + unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits(); + + SDValue MulhRightOp; + if (ConstantSDNode *Constant = isConstOrConstSplat(RightOp)) { + unsigned ActiveBits = IsSignExt + ? Constant->getAPIntValue().getMinSignedBits() + : Constant->getAPIntValue().getActiveBits(); + if (ActiveBits > NarrowVTSize) + return SDValue(); + MulhRightOp = DAG.getConstant( + Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL, + NarrowVT); + } else { + if (LeftOp.getOpcode() != RightOp.getOpcode()) + return SDValue(); + // Check that the two extend nodes are the same type. + if (NarrowVT != RightOp.getOperand(0).getValueType()) + return SDValue(); + MulhRightOp = RightOp.getOperand(0); + } + + EVT WideVT = LeftOp.getValueType(); // Proceed with the transformation if the wide types match. - assert((WideVT1 == WideVT2) && + assert((WideVT == RightOp.getValueType()) && "Cannot have a multiply node with two different operand types."); - EVT NarrowVT = LeftOp.getOperand(0).getValueType(); - // Check that the two extend nodes are the same type. - if (NarrowVT != RightOp.getOperand(0).getValueType()) - return SDValue(); - // Proceed with the transformation if the wide type is twice as large // as the narrow type. - unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits(); - if (WideVT1.getScalarSizeInBits() != 2 * NarrowVTSize) + if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize) return SDValue(); // Check the shift amount with the narrow type size. @@ -8541,10 +8632,10 @@ static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG, if (!TLI.isOperationLegalOrCustom(MulhOpcode, NarrowVT)) return SDValue(); - SDValue Result = DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), - RightOp.getOperand(0)); - return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT1) - : DAG.getZExtOrTrunc(Result, DL, WideVT1)); + SDValue Result = + DAG.getNode(MulhOpcode, DL, NarrowVT, LeftOp.getOperand(0), MulhRightOp); + return (N->getOpcode() == ISD::SRA ? DAG.getSExtOrTrunc(Result, DL, WideVT) + : DAG.getZExtOrTrunc(Result, DL, WideVT)); } SDValue DAGCombiner::visitSRA(SDNode *N) { @@ -8564,7 +8655,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // fold vector ops if (VT.isVector()) - if (SDValue FoldedVOp = SimplifyVBinOp(N)) + if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N))) return FoldedVOp; ConstantSDNode *N1C = isConstOrConstSplat(N1); @@ -8762,7 +8853,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // fold vector ops if (VT.isVector()) - if (SDValue FoldedVOp = SimplifyVBinOp(N)) + if (SDValue FoldedVOp = SimplifyVBinOp(N, SDLoc(N))) return FoldedVOp; ConstantSDNode *N1C = isConstOrConstSplat(N1); @@ -8775,8 +8866,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { return NewSel; // if (srl x, c) is known to be zero, return 0 - if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), - APInt::getAllOnesValue(OpSizeInBits))) + if (N1C && + DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(OpSizeInBits))) return DAG.getConstant(0, SDLoc(N), VT); // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2)) @@ -9358,27 +9449,27 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { // is also a target-independent combine here in DAGCombiner in the other // direction for (select Cond, -1, 0) when the condition is not i1. if (CondVT == MVT::i1 && !LegalOperations) { - if (C1->isNullValue() && C2->isOne()) { + if (C1->isZero() && C2->isOne()) { // select Cond, 0, 1 --> zext (!Cond) SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1); if (VT != MVT::i1) NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond); return NotCond; } - if (C1->isNullValue() && C2->isAllOnesValue()) { + if (C1->isZero() && C2->isAllOnes()) { // select Cond, 0, -1 --> sext (!Cond) SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1); if (VT != MVT::i1) NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond); return NotCond; } - if (C1->isOne() && C2->isNullValue()) { + if (C1->isOne() && C2->isZero()) { // select Cond, 1, 0 --> zext (Cond) if (VT != MVT::i1) Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); return Cond; } - if (C1->isAllOnesValue() && C2->isNullValue()) { + if (C1->isAllOnes() && C2->isZero()) { // select Cond, -1, 0 --> sext (Cond) if (VT != MVT::i1) Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond); @@ -9406,7 +9497,7 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { } // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2) - if (C1Val.isPowerOf2() && C2Val.isNullValue()) { + if (C1Val.isPowerOf2() && C2Val.isZero()) { if (VT != MVT::i1) Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT); @@ -9434,7 +9525,7 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { TargetLowering::ZeroOrOneBooleanContent && TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) == TargetLowering::ZeroOrOneBooleanContent && - C1->isNullValue() && C2->isOne()) { + C1->isZero() && C2->isOne()) { SDValue NotCond = DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT)); if (VT.bitsEq(CondVT)) @@ -9479,6 +9570,64 @@ static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG) { return SDValue(); } +static SDValue foldVSelectToSignBitSplatMask(SDNode *N, SelectionDAG &DAG) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + EVT VT = N->getValueType(0); + if (N0.getOpcode() != ISD::SETCC || !N0.hasOneUse()) + return SDValue(); + + SDValue Cond0 = N0.getOperand(0); + SDValue Cond1 = N0.getOperand(1); + ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); + if (VT != Cond0.getValueType()) + return SDValue(); + + // Match a signbit check of Cond0 as "Cond0 s<0". Swap select operands if the + // compare is inverted from that pattern ("Cond0 s> -1"). + if (CC == ISD::SETLT && isNullOrNullSplat(Cond1)) + ; // This is the pattern we are looking for. + else if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond1)) + std::swap(N1, N2); + else + return SDValue(); + + // (Cond0 s< 0) ? N1 : 0 --> (Cond0 s>> BW-1) & N1 + if (isNullOrNullSplat(N2)) { + SDLoc DL(N); + SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT); + SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt); + return DAG.getNode(ISD::AND, DL, VT, Sra, N1); + } + + // (Cond0 s< 0) ? -1 : N2 --> (Cond0 s>> BW-1) | N2 + if (isAllOnesOrAllOnesSplat(N1)) { + SDLoc DL(N); + SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT); + SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt); + return DAG.getNode(ISD::OR, DL, VT, Sra, N2); + } + + // If we have to invert the sign bit mask, only do that transform if the + // target has a bitwise 'and not' instruction (the invert is free). + // (Cond0 s< -0) ? 0 : N2 --> ~(Cond0 s>> BW-1) & N2 + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (isNullOrNullSplat(N1) && TLI.hasAndNot(N1)) { + SDLoc DL(N); + SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT); + SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Cond0, ShiftAmt); + SDValue Not = DAG.getNOT(DL, Sra, VT); + return DAG.getNode(ISD::AND, DL, VT, Not, N2); + } + + // TODO: There's another pattern in this family, but it may require + // implementing hasOrNot() to check for profitability: + // (Cond0 s> -1) ? -1 : N2 --> ~(Cond0 s>> BW-1) | N2 + + return SDValue(); +} + SDValue DAGCombiner::visitSELECT(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -9703,8 +9852,8 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { "same value. This should have been addressed before this function."); return DAG.getNode( ISD::CONCAT_VECTORS, DL, VT, - BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0), - TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1)); + BottomHalf->isZero() ? RHS->getOperand(0) : LHS->getOperand(0), + TopHalf->isZero() ? RHS->getOperand(1) : LHS->getOperand(1)); } bool refineUniformBase(SDValue &BasePtr, SDValue &Index, SelectionDAG &DAG) { @@ -10169,6 +10318,10 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { if (SDValue V = foldVSelectOfConstants(N)) return V; + if (hasOperation(ISD::SRA, VT)) + if (SDValue V = foldVSelectToSignBitSplatMask(N, DAG)) + return V; + return SDValue(); } @@ -10190,7 +10343,7 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { AddToWorklist(SCC.getNode()); if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) { - if (!SCCC->isNullValue()) + if (!SCCC->isZero()) return N2; // cond always true -> true val else return N3; // cond always false -> false val @@ -10248,13 +10401,13 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) { // Is 'X Cond C' always true or false? auto IsAlwaysTrueOrFalse = [](ISD::CondCode Cond, ConstantSDNode *C) { - bool False = (Cond == ISD::SETULT && C->isNullValue()) || + bool False = (Cond == ISD::SETULT && C->isZero()) || (Cond == ISD::SETLT && C->isMinSignedValue()) || - (Cond == ISD::SETUGT && C->isAllOnesValue()) || + (Cond == ISD::SETUGT && C->isAllOnes()) || (Cond == ISD::SETGT && C->isMaxSignedValue()); - bool True = (Cond == ISD::SETULE && C->isAllOnesValue()) || + bool True = (Cond == ISD::SETULE && C->isAllOnes()) || (Cond == ISD::SETLE && C->isMaxSignedValue()) || - (Cond == ISD::SETUGE && C->isNullValue()) || + (Cond == ISD::SETUGE && C->isZero()) || (Cond == ISD::SETGE && C->isMinSignedValue()); return True || False; }; @@ -10863,7 +11016,7 @@ static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG, if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD) return SDValue(); - if (!TLI.isLoadExtLegal(ExtLoadType, VT, Ld->getValueType(0))) + if (!TLI.isLoadExtLegalOrCustom(ExtLoadType, VT, Ld->getValueType(0))) return SDValue(); if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0))) @@ -11257,7 +11410,7 @@ static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, Known = DAG.computeKnownBits(Op); - return (Known.Zero | 1).isAllOnesValue(); + return (Known.Zero | 1).isAllOnes(); } /// Given an extending node with a pop-count operand, if the target does not @@ -12016,7 +12169,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1); // If the input is already sign extended, just drop the extension. - if (DAG.ComputeNumSignBits(N0) >= (VTBits - ExtVTBits + 1)) + if (ExtVTBits >= DAG.ComputeMinSignedBits(N0)) return N0; // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2 @@ -12032,8 +12185,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { SDValue N00 = N0.getOperand(0); unsigned N00Bits = N00.getScalarValueSizeInBits(); - if ((N00Bits <= ExtVTBits || - (N00Bits - DAG.ComputeNumSignBits(N00)) < ExtVTBits) && + if ((N00Bits <= ExtVTBits || DAG.ComputeMinSignedBits(N00) <= ExtVTBits) && (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00); } @@ -12052,8 +12204,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { APInt DemandedSrcElts = APInt::getLowBitsSet(SrcElts, DstElts); if ((N00Bits == ExtVTBits || (!IsZext && (N00Bits < ExtVTBits || - (N00Bits - DAG.ComputeNumSignBits(N00, DemandedSrcElts)) < - ExtVTBits))) && + DAG.ComputeMinSignedBits(N00) <= ExtVTBits))) && (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))) return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT, N00); @@ -12290,7 +12441,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SDValue Amt = N0.getOperand(1); KnownBits Known = DAG.computeKnownBits(Amt); unsigned Size = VT.getScalarSizeInBits(); - if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) { + if (Known.countMaxActiveBits() <= Log2_32(Size)) { SDLoc SL(N); EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); @@ -12538,8 +12689,8 @@ static SDNode *getBuildPairElt(SDNode *N, unsigned i) { SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { assert(N->getOpcode() == ISD::BUILD_PAIR); - LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0)); - LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1)); + auto *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0)); + auto *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1)); // A BUILD_PAIR is always having the least significant part in elt 0 and the // most significant part in elt 1. So when combining into one large load, we @@ -12547,22 +12698,20 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { if (DAG.getDataLayout().isBigEndian()) std::swap(LD1, LD2); - if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() || + if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !ISD::isNON_EXTLoad(LD2) || + !LD1->hasOneUse() || !LD2->hasOneUse() || LD1->getAddressSpace() != LD2->getAddressSpace()) return SDValue(); + + bool LD1Fast = false; EVT LD1VT = LD1->getValueType(0); unsigned LD1Bytes = LD1VT.getStoreSize(); - if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() && - DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) { - Align Alignment = LD1->getAlign(); - Align NewAlign = DAG.getDataLayout().getABITypeAlign( - VT.getTypeForEVT(*DAG.getContext())); - - if (NewAlign <= Alignment && - (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) - return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(), - LD1->getPointerInfo(), Alignment); - } + if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) && + DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1) && + TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, + *LD1->getMemOperand(), &LD1Fast) && LD1Fast) + return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(), + LD1->getPointerInfo(), LD1->getAlign()); return SDValue(); } @@ -12938,69 +13087,45 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT); } - SDLoc DL(BV); - // Okay, we know the src/dst types are both integers of differing types. - // Handling growing first. assert(SrcEltVT.isInteger() && DstEltVT.isInteger()); - if (SrcBitSize < DstBitSize) { - unsigned NumInputsPerOutput = DstBitSize/SrcBitSize; - - SmallVector<SDValue, 8> Ops; - for (unsigned i = 0, e = BV->getNumOperands(); i != e; - i += NumInputsPerOutput) { - bool isLE = DAG.getDataLayout().isLittleEndian(); - APInt NewBits = APInt(DstBitSize, 0); - bool EltIsUndef = true; - for (unsigned j = 0; j != NumInputsPerOutput; ++j) { - // Shift the previously computed bits over. - NewBits <<= SrcBitSize; - SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j)); - if (Op.isUndef()) continue; - EltIsUndef = false; - NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue(). - zextOrTrunc(SrcBitSize).zext(DstBitSize); - } - - if (EltIsUndef) - Ops.push_back(DAG.getUNDEF(DstEltVT)); - else - Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT)); - } + // TODO: Should ConstantFoldBITCASTofBUILD_VECTOR always take a + // BuildVectorSDNode? + auto *BVN = cast<BuildVectorSDNode>(BV); - EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); - return DAG.getBuildVector(VT, DL, Ops); - } + // Extract the constant raw bit data. + BitVector UndefElements; + SmallVector<APInt> RawBits; + bool IsLE = DAG.getDataLayout().isLittleEndian(); + if (!BVN->getConstantRawBits(IsLE, DstBitSize, RawBits, UndefElements)) + return SDValue(); - // Finally, this must be the case where we are shrinking elements: each input - // turns into multiple outputs. - unsigned NumOutputsPerInput = SrcBitSize/DstBitSize; - EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, - NumOutputsPerInput*BV->getNumOperands()); + SDLoc DL(BV); SmallVector<SDValue, 8> Ops; + for (unsigned I = 0, E = RawBits.size(); I != E; ++I) { + if (UndefElements[I]) + Ops.push_back(DAG.getUNDEF(DstEltVT)); + else + Ops.push_back(DAG.getConstant(RawBits[I], DL, DstEltVT)); + } - for (const SDValue &Op : BV->op_values()) { - if (Op.isUndef()) { - Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT)); - continue; - } - - APInt OpVal = cast<ConstantSDNode>(Op)-> - getAPIntValue().zextOrTrunc(SrcBitSize); + EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); + return DAG.getBuildVector(VT, DL, Ops); +} - for (unsigned j = 0; j != NumOutputsPerInput; ++j) { - APInt ThisVal = OpVal.trunc(DstBitSize); - Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT)); - OpVal.lshrInPlace(DstBitSize); - } +// Returns true if floating point contraction is allowed on the FMUL-SDValue +// `N` +static bool isContractableFMUL(const TargetOptions &Options, SDValue N) { + assert(N.getOpcode() == ISD::FMUL); - // For big endian targets, swap the order of the pieces of each element. - if (DAG.getDataLayout().isBigEndian()) - std::reverse(Ops.end()-NumOutputsPerInput, Ops.end()); - } + return Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath || + N->getFlags().hasAllowContract(); +} - return DAG.getBuildVector(VT, DL, Ops); +// Returns true if `N` can assume no infinities involved in its computation. +static bool hasNoInfs(const TargetOptions &Options, SDValue N) { + return Options.NoInfsFPMath || N.getNode()->getFlags().hasNoInfs(); } /// Try to perform FMA combining on a given FADD node. @@ -13039,6 +13164,11 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; bool Aggressive = TLI.enableAggressiveFMAFusion(VT); + auto isFusedOp = [&](SDValue N) { + unsigned Opcode = N.getOpcode(); + return Opcode == ISD::FMA || Opcode == ISD::FMAD; + }; + // Is the node an FMUL and contractable either due to global flags or // SDNodeFlags. auto isContractableFMUL = [AllowFusionGlobally](SDValue N) { @@ -13070,12 +13200,12 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E) // This requires reassociation because it changes the order of operations. SDValue FMA, E; - if (CanReassociate && N0.getOpcode() == PreferredFusedOpcode && + if (CanReassociate && isFusedOp(N0) && N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() && N0.getOperand(2).hasOneUse()) { FMA = N0; E = N1; - } else if (CanReassociate && N1.getOpcode() == PreferredFusedOpcode && + } else if (CanReassociate && isFusedOp(N1) && N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() && N1.getOperand(2).hasOneUse()) { FMA = N1; @@ -13131,7 +13261,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { DAG.getNode(ISD::FP_EXTEND, SL, VT, V), Z)); }; - if (N0.getOpcode() == PreferredFusedOpcode) { + if (isFusedOp(N0)) { SDValue N02 = N0.getOperand(2); if (N02.getOpcode() == ISD::FP_EXTEND) { SDValue N020 = N02.getOperand(0); @@ -13161,7 +13291,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { }; if (N0.getOpcode() == ISD::FP_EXTEND) { SDValue N00 = N0.getOperand(0); - if (N00.getOpcode() == PreferredFusedOpcode) { + if (isFusedOp(N00)) { SDValue N002 = N00.getOperand(2); if (isContractableFMUL(N002) && TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, @@ -13175,7 +13305,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { // fold (fadd x, (fma y, z, (fpext (fmul u, v))) // -> (fma y, z, (fma (fpext u), (fpext v), x)) - if (N1.getOpcode() == PreferredFusedOpcode) { + if (isFusedOp(N1)) { SDValue N12 = N1.getOperand(2); if (N12.getOpcode() == ISD::FP_EXTEND) { SDValue N120 = N12.getOperand(0); @@ -13196,7 +13326,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { // interesting for all targets, especially GPUs. if (N1.getOpcode() == ISD::FP_EXTEND) { SDValue N10 = N1.getOperand(0); - if (N10.getOpcode() == PreferredFusedOpcode) { + if (isFusedOp(N10)) { SDValue N102 = N10.getOperand(2); if (isContractableFMUL(N102) && TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, @@ -13392,12 +13522,17 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { return isContractableFMUL(N) && isReassociable(N.getNode()); }; + auto isFusedOp = [&](SDValue N) { + unsigned Opcode = N.getOpcode(); + return Opcode == ISD::FMA || Opcode == ISD::FMAD; + }; + // More folding opportunities when target permits. if (Aggressive && isReassociable(N)) { bool CanFuse = Options.UnsafeFPMath || N->getFlags().hasAllowContract(); // fold (fsub (fma x, y, (fmul u, v)), z) // -> (fma x, y (fma u, v, (fneg z))) - if (CanFuse && N0.getOpcode() == PreferredFusedOpcode && + if (CanFuse && isFusedOp(N0) && isContractableAndReassociableFMUL(N0.getOperand(2)) && N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) { return DAG.getNode(PreferredFusedOpcode, SL, VT, N0.getOperand(0), @@ -13410,7 +13545,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { // fold (fsub x, (fma y, z, (fmul u, v))) // -> (fma (fneg y), z, (fma (fneg u), v, x)) - if (CanFuse && N1.getOpcode() == PreferredFusedOpcode && + if (CanFuse && isFusedOp(N1) && isContractableAndReassociableFMUL(N1.getOperand(2)) && N1->hasOneUse() && NoSignedZero) { SDValue N20 = N1.getOperand(2).getOperand(0); @@ -13424,8 +13559,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { // fold (fsub (fma x, y, (fpext (fmul u, v))), z) // -> (fma x, y (fma (fpext u), (fpext v), (fneg z))) - if (N0.getOpcode() == PreferredFusedOpcode && - N0->hasOneUse()) { + if (isFusedOp(N0) && N0->hasOneUse()) { SDValue N02 = N0.getOperand(2); if (N02.getOpcode() == ISD::FP_EXTEND) { SDValue N020 = N02.getOperand(0); @@ -13451,7 +13585,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { // interesting for all targets, especially GPUs. if (N0.getOpcode() == ISD::FP_EXTEND) { SDValue N00 = N0.getOperand(0); - if (N00.getOpcode() == PreferredFusedOpcode) { + if (isFusedOp(N00)) { SDValue N002 = N00.getOperand(2); if (isContractableAndReassociableFMUL(N002) && TLI.isFPExtFoldable(DAG, PreferredFusedOpcode, VT, @@ -13471,8 +13605,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { // fold (fsub x, (fma y, z, (fpext (fmul u, v)))) // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x)) - if (N1.getOpcode() == PreferredFusedOpcode && - N1.getOperand(2).getOpcode() == ISD::FP_EXTEND && + if (isFusedOp(N1) && N1.getOperand(2).getOpcode() == ISD::FP_EXTEND && N1->hasOneUse()) { SDValue N120 = N1.getOperand(2).getOperand(0); if (isContractableAndReassociableFMUL(N120) && @@ -13496,8 +13629,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { // FIXME: This turns two single-precision and one double-precision // operation into two double-precision operations, which might not be // interesting for all targets, especially GPUs. - if (N1.getOpcode() == ISD::FP_EXTEND && - N1.getOperand(0).getOpcode() == PreferredFusedOpcode) { + if (N1.getOpcode() == ISD::FP_EXTEND && isFusedOp(N1.getOperand(0))) { SDValue CvtSrc = N1.getOperand(0); SDValue N100 = CvtSrc.getOperand(0); SDValue N101 = CvtSrc.getOperand(1); @@ -13538,12 +13670,13 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) { // The transforms below are incorrect when x == 0 and y == inf, because the // intermediate multiplication produces a nan. - if (!Options.NoInfsFPMath) + SDValue FAdd = N0.getOpcode() == ISD::FADD ? N0 : N1; + if (!hasNoInfs(Options, FAdd)) return SDValue(); // Floating-point multiply-add without intermediate rounding. bool HasFMA = - (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) && + isContractableFMUL(Options, SDValue(N, 0)) && TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) && (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); @@ -13633,7 +13766,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // fold vector ops if (VT.isVector()) - if (SDValue FoldedVOp = SimplifyVBinOp(N)) + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; // fold (fadd c1, c2) -> c1 + c2 @@ -13841,7 +13974,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { // fold vector ops if (VT.isVector()) - if (SDValue FoldedVOp = SimplifyVBinOp(N)) + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; // fold (fsub c1, c2) -> c1-c2 @@ -13926,7 +14059,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { // fold vector ops if (VT.isVector()) { // This just handles C1 * C2 for vectors. Other vector folds are below. - if (SDValue FoldedVOp = SimplifyVBinOp(N)) + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; } @@ -13971,10 +14104,13 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { if (N1CFP && N1CFP->isExactlyValue(+2.0)) return DAG.getNode(ISD::FADD, DL, VT, N0, N0); - // fold (fmul X, -1.0) -> (fneg X) - if (N1CFP && N1CFP->isExactlyValue(-1.0)) - if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) - return DAG.getNode(ISD::FNEG, DL, VT, N0); + // fold (fmul X, -1.0) -> (fsub -0.0, X) + if (N1CFP && N1CFP->isExactlyValue(-1.0)) { + if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) { + return DAG.getNode(ISD::FSUB, DL, VT, + DAG.getConstantFP(-0.0, DL, VT), N0, Flags); + } + } // -N0 * -N1 --> N0 * N1 TargetLowering::NegatibleCost CostN0 = @@ -14260,7 +14396,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // fold vector ops if (VT.isVector()) - if (SDValue FoldedVOp = SimplifyVBinOp(N)) + if (SDValue FoldedVOp = SimplifyVBinOp(N, DL)) return FoldedVOp; // fold (fdiv c1, c2) -> c1/c2 @@ -16245,11 +16381,12 @@ struct LoadedSlice { return false; // Check if it will be merged with the load. - // 1. Check the alignment constraint. - Align RequiredAlignment = DAG->getDataLayout().getABITypeAlign( - ResVT.getTypeForEVT(*DAG->getContext())); - - if (RequiredAlignment > getAlign()) + // 1. Check the alignment / fast memory access constraint. + bool IsFast = false; + if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT, + Origin->getAddressSpace(), getAlign(), + Origin->getMemOperand()->getFlags(), &IsFast) || + !IsFast) return false; // 2. Check that the load is a legal operation for that type. @@ -16270,7 +16407,7 @@ struct LoadedSlice { /// \p UsedBits looks like 0..0 1..1 0..0. static bool areUsedBitsDense(const APInt &UsedBits) { // If all the bits are one, this is dense! - if (UsedBits.isAllOnesValue()) + if (UsedBits.isAllOnes()) return true; // Get rid of the unused bits on the right. @@ -16279,7 +16416,7 @@ static bool areUsedBitsDense(const APInt &UsedBits) { if (NarrowedUsedBits.countLeadingZeros()) NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits()); // Check that the chunk of bits is completely used. - return NarrowedUsedBits.isAllOnesValue(); + return NarrowedUsedBits.isAllOnes(); } /// Check whether or not \p First and \p Second are next to each other @@ -16697,8 +16834,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { unsigned BitWidth = N1.getValueSizeInBits(); APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue(); if (Opc == ISD::AND) - Imm ^= APInt::getAllOnesValue(BitWidth); - if (Imm == 0 || Imm.isAllOnesValue()) + Imm ^= APInt::getAllOnes(BitWidth); + if (Imm == 0 || Imm.isAllOnes()) return SDValue(); unsigned ShAmt = Imm.countTrailingZeros(); unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1; @@ -16725,16 +16862,19 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { if ((Imm & Mask) == Imm) { APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW); if (Opc == ISD::AND) - NewImm ^= APInt::getAllOnesValue(NewBW); + NewImm ^= APInt::getAllOnes(NewBW); uint64_t PtrOff = ShAmt / 8; // For big endian targets, we need to adjust the offset to the pointer to // load the correct bytes. if (DAG.getDataLayout().isBigEndian()) PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff; + bool IsFast = false; Align NewAlign = commonAlignment(LD->getAlign(), PtrOff); - Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext()); - if (NewAlign < DAG.getDataLayout().getABITypeAlign(NewVTTy)) + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT, + LD->getAddressSpace(), NewAlign, + LD->getMemOperand()->getFlags(), &IsFast) || + !IsFast) return SDValue(); SDValue NewPtr = @@ -16788,27 +16928,26 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { if (VTSize.isScalable()) return SDValue(); + bool FastLD = false, FastST = false; EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize()); if (!TLI.isOperationLegal(ISD::LOAD, IntVT) || !TLI.isOperationLegal(ISD::STORE, IntVT) || !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) || - !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT)) - return SDValue(); - - Align LDAlign = LD->getAlign(); - Align STAlign = ST->getAlign(); - Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext()); - Align ABIAlign = DAG.getDataLayout().getABITypeAlign(IntVTTy); - if (LDAlign < ABIAlign || STAlign < ABIAlign) + !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT) || + !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT, + *LD->getMemOperand(), &FastLD) || + !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), IntVT, + *ST->getMemOperand(), &FastST) || + !FastLD || !FastST) return SDValue(); SDValue NewLD = DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(), - LD->getPointerInfo(), LDAlign); + LD->getPointerInfo(), LD->getAlign()); SDValue NewST = DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(), - ST->getPointerInfo(), STAlign); + ST->getPointerInfo(), ST->getAlign()); AddToWorklist(NewLD.getNode()); AddToWorklist(NewST.getNode()); @@ -16839,8 +16978,10 @@ bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, SDValue &ConstNode) { APInt Val; - // If the add only has one use, this would be OK to do. - if (AddNode.getNode()->hasOneUse()) + // If the add only has one use, and the target thinks the folding is + // profitable or does not lead to worse code, this would be OK to do. + if (AddNode.getNode()->hasOneUse() && + TLI.isMulAddWithConstProfitable(AddNode, ConstNode)) return true; // Walk all the users of the constant with which we're multiplying. @@ -16932,6 +17073,22 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts( unsigned SizeInBits = NumStores * ElementSizeBits; unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1; + Optional<MachineMemOperand::Flags> Flags; + AAMDNodes AAInfo; + for (unsigned I = 0; I != NumStores; ++I) { + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode); + if (!Flags) { + Flags = St->getMemOperand()->getFlags(); + AAInfo = St->getAAInfo(); + continue; + } + // Skip merging if there's an inconsistent flag. + if (Flags != St->getMemOperand()->getFlags()) + return false; + // Concatenate AA metadata. + AAInfo = AAInfo.concat(St->getAAInfo()); + } + EVT StoreTy; if (UseVector) { unsigned Elts = NumStores * NumMemElts; @@ -17049,9 +17206,9 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts( // make sure we use trunc store if it's necessary to be legal. SDValue NewStore; if (!UseTrunc) { - NewStore = - DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(), - FirstInChain->getPointerInfo(), FirstInChain->getAlign()); + NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), + FirstInChain->getAlign(), Flags.getValue(), AAInfo); } else { // Must be realized as a trunc store EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType()); @@ -17063,7 +17220,7 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts( NewStore = DAG.getTruncStore( NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(), FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/, - FirstInChain->getAlign(), FirstInChain->getMemOperand()->getFlags()); + FirstInChain->getAlign(), Flags.getValue(), AAInfo); } // Replace all merged stores with the new store. @@ -17360,7 +17517,7 @@ bool DAGCombiner::tryStoreMergeOfConstants( SDValue StoredVal = ST->getValue(); bool IsElementZero = false; if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) - IsElementZero = C->isNullValue(); + IsElementZero = C->isZero(); else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) IsElementZero = C->getConstantFPValue()->isNullValue(); if (IsElementZero) { @@ -17379,7 +17536,8 @@ bool DAGCombiner::tryStoreMergeOfConstants( break; if (TLI.isTypeLegal(StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && + TLI.canMergeStoresTo(FirstStoreAS, StoreTy, + DAG.getMachineFunction()) && TLI.allowsMemoryAccess(Context, DL, StoreTy, *FirstInChain->getMemOperand(), &IsFast) && IsFast) { @@ -17391,7 +17549,8 @@ bool DAGCombiner::tryStoreMergeOfConstants( EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoredVal.getValueType()); if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) && + TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, + DAG.getMachineFunction()) && TLI.allowsMemoryAccess(Context, DL, StoreTy, *FirstInChain->getMemOperand(), &IsFast) && IsFast) { @@ -17410,7 +17569,7 @@ bool DAGCombiner::tryStoreMergeOfConstants( unsigned Elts = (i + 1) * NumMemElts; EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts); if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) && - TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && + TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) && TLI.allowsMemoryAccess(Context, DL, Ty, *FirstInChain->getMemOperand(), &IsFast) && IsFast) @@ -17486,7 +17645,8 @@ bool DAGCombiner::tryStoreMergeOfExtracts( if (Ty.getSizeInBits() > MaximumLegalStoreInBits) break; - if (TLI.isTypeLegal(Ty) && TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) && + if (TLI.isTypeLegal(Ty) && + TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG.getMachineFunction()) && TLI.allowsMemoryAccess(Context, DL, Ty, *FirstInChain->getMemOperand(), &IsFast) && IsFast) @@ -17634,8 +17794,13 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes, bool IsFastSt = false; bool IsFastLd = false; - if (TLI.isTypeLegal(StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && + // Don't try vector types if we need a rotate. We may still fail the + // legality checks for the integer type, but we can't handle the rotate + // case with vectors. + // FIXME: We could use a shuffle in place of the rotate. + if (!NeedRotate && TLI.isTypeLegal(StoreTy) && + TLI.canMergeStoresTo(FirstStoreAS, StoreTy, + DAG.getMachineFunction()) && TLI.allowsMemoryAccess(Context, DL, StoreTy, *FirstInChain->getMemOperand(), &IsFastSt) && IsFastSt && @@ -17649,7 +17814,8 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes, unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; StoreTy = EVT::getIntegerVT(Context, SizeInBits); if (TLI.isTypeLegal(StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) && + TLI.canMergeStoresTo(FirstStoreAS, StoreTy, + DAG.getMachineFunction()) && TLI.allowsMemoryAccess(Context, DL, StoreTy, *FirstInChain->getMemOperand(), &IsFastSt) && IsFastSt && @@ -17663,7 +17829,8 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes, TargetLowering::TypePromoteInteger) { EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy); if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) && - TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) && + TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, + DAG.getMachineFunction()) && TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy, StoreTy) && TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy, StoreTy) && TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) && @@ -18215,7 +18382,7 @@ SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) { case ISD::LIFETIME_END: // We can forward past any lifetime start/end that can be proven not to // alias the node. - if (!isAlias(Chain.getNode(), N)) + if (!mayAlias(Chain.getNode(), N)) Chains.push_back(Chain.getOperand(0)); break; case ISD::STORE: { @@ -18593,32 +18760,35 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, if (!VecEltVT.isByteSized()) return SDValue(); - Align Alignment = OriginalLoad->getAlign(); - Align NewAlign = DAG.getDataLayout().getABITypeAlign( - VecEltVT.getTypeForEVT(*DAG.getContext())); - - if (NewAlign > Alignment || - !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT)) - return SDValue(); - - ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ? - ISD::NON_EXTLOAD : ISD::EXTLOAD; - if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT)) + ISD::LoadExtType ExtTy = + ResultVT.bitsGT(VecEltVT) ? ISD::NON_EXTLOAD : ISD::EXTLOAD; + if (!TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT) || + !TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT)) return SDValue(); - Alignment = NewAlign; - + Align Alignment = OriginalLoad->getAlign(); MachinePointerInfo MPI; SDLoc DL(EVE); if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) { int Elt = ConstEltNo->getZExtValue(); unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8; MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff); + Alignment = commonAlignment(Alignment, PtrOff); } else { // Discard the pointer info except the address space because the memory // operand can't represent this new access since the offset is variable. MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace()); + Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8); } + + bool IsFast = false; + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT, + OriginalLoad->getAddressSpace(), Alignment, + OriginalLoad->getMemOperand()->getFlags(), + &IsFast) || + !IsFast) + return SDValue(); + SDValue NewPtr = TLI.getVectorElementPointer(DAG, OriginalLoad->getBasePtr(), InVecVT, EltNo); @@ -18864,7 +19034,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { Use->getOperand(0) == VecOp && isa<ConstantSDNode>(Use->getOperand(1)); })) { - APInt DemandedElts = APInt::getNullValue(NumElts); + APInt DemandedElts = APInt::getZero(NumElts); for (SDNode *Use : VecOp->uses()) { auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1)); if (CstElt->getAPIntValue().ult(NumElts)) @@ -18877,7 +19047,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { AddToWorklist(N); return SDValue(N, 0); } - APInt DemandedBits = APInt::getAllOnesValue(VecEltBitWidth); + APInt DemandedBits = APInt::getAllOnes(VecEltBitWidth); if (SimplifyDemandedBits(VecOp, DemandedBits, DemandedElts, true)) { // We simplified the vector operand of this extract element. If this // extract is not dead, visit it again so it is folded properly. @@ -19672,8 +19842,10 @@ SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) { // Make sure the first element matches // (zext (extract_vector_elt X, C)) + // Offset must be a constant multiple of the + // known-minimum vector length of the result type. int64_t Offset = checkElem(Op0); - if (Offset < 0) + if (Offset < 0 || (Offset % VT.getVectorNumElements()) != 0) return SDValue(); unsigned NumElems = N->getNumOperands(); @@ -19844,6 +20016,44 @@ static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) { return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops)); } +// Attempt to merge nested concat_vectors/undefs. +// Fold concat_vectors(concat_vectors(x,y,z,w),u,u,concat_vectors(a,b,c,d)) +// --> concat_vectors(x,y,z,w,u,u,u,u,u,u,u,u,a,b,c,d) +static SDValue combineConcatVectorOfConcatVectors(SDNode *N, + SelectionDAG &DAG) { + EVT VT = N->getValueType(0); + + // Ensure we're concatenating UNDEF and CONCAT_VECTORS nodes of similar types. + EVT SubVT; + SDValue FirstConcat; + for (const SDValue &Op : N->ops()) { + if (Op.isUndef()) + continue; + if (Op.getOpcode() != ISD::CONCAT_VECTORS) + return SDValue(); + if (!FirstConcat) { + SubVT = Op.getOperand(0).getValueType(); + if (!DAG.getTargetLoweringInfo().isTypeLegal(SubVT)) + return SDValue(); + FirstConcat = Op; + continue; + } + if (SubVT != Op.getOperand(0).getValueType()) + return SDValue(); + } + assert(FirstConcat && "Concat of all-undefs found"); + + SmallVector<SDValue> ConcatOps; + for (const SDValue &Op : N->ops()) { + if (Op.isUndef()) { + ConcatOps.append(FirstConcat->getNumOperands(), DAG.getUNDEF(SubVT)); + continue; + } + ConcatOps.append(Op->op_begin(), Op->op_end()); + } + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, ConcatOps); +} + // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at // most two distinct vectors the same size as the result, attempt to turn this @@ -20103,13 +20313,19 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { } // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR. + // FIXME: Add support for concat_vectors(bitcast(vec0),bitcast(vec1),...). if (SDValue V = combineConcatVectorOfScalars(N, DAG)) return V; - // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE. - if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) + if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) { + // Fold CONCAT_VECTORS of CONCAT_VECTORS (or undef) to VECTOR_SHUFFLE. + if (SDValue V = combineConcatVectorOfConcatVectors(N, DAG)) + return V; + + // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE. if (SDValue V = combineConcatVectorOfExtracts(N, DAG)) return V; + } if (SDValue V = combineConcatVectorOfCasts(N, DAG)) return V; @@ -20351,9 +20567,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { return SDValue(); auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0)); - auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1)); - if (!Ld || Ld->getExtensionType() || !Ld->isSimple() || - !ExtIdx) + if (!Ld || Ld->getExtensionType() || !Ld->isSimple()) return SDValue(); // Allow targets to opt-out. @@ -20363,7 +20577,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { if (!VT.isByteSized()) return SDValue(); - unsigned Index = ExtIdx->getZExtValue(); + unsigned Index = Extract->getConstantOperandVal(1); unsigned NumElts = VT.getVectorMinNumElements(); // The definition of EXTRACT_SUBVECTOR states that the index must be a @@ -20492,7 +20706,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { // If the concatenated source types match this extract, it's a direct // simplification: // extract_subvec (concat V1, V2, ...), i --> Vi - if (ConcatSrcNumElts == ExtNumElts) + if (NVT.getVectorElementCount() == ConcatSrcVT.getVectorElementCount()) return V.getOperand(ConcatOpIdx); // If the concatenated source vectors are a multiple length of this extract, @@ -20500,7 +20714,8 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { // concat operand. Example: // v2i8 extract_subvec (v16i8 concat (v8i8 X), (v8i8 Y), 14 --> // v2i8 extract_subvec v8i8 Y, 6 - if (NVT.isFixedLengthVector() && ConcatSrcNumElts % ExtNumElts == 0) { + if (NVT.isFixedLengthVector() && ConcatSrcVT.isFixedLengthVector() && + ConcatSrcNumElts % ExtNumElts == 0) { SDLoc DL(N); unsigned NewExtIdx = ExtIdx - ConcatOpIdx * ConcatSrcNumElts; assert(NewExtIdx + ExtNumElts <= ConcatSrcNumElts && @@ -20562,8 +20777,12 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) { // otherwise => (extract_subvec V1, ExtIdx) uint64_t InsIdx = V.getConstantOperandVal(2); if (InsIdx * SmallVT.getScalarSizeInBits() == - ExtIdx * NVT.getScalarSizeInBits()) + ExtIdx * NVT.getScalarSizeInBits()) { + if (LegalOperations && !TLI.isOperationLegal(ISD::BITCAST, NVT)) + return SDValue(); + return DAG.getBitcast(NVT, V.getOperand(1)); + } return DAG.getNode( ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)), @@ -21131,15 +21350,9 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); // Canonicalize shuffle v, v -> v, undef - if (N0 == N1) { - SmallVector<int, 8> NewMask; - for (unsigned i = 0; i != NumElts; ++i) { - int Idx = SVN->getMaskElt(i); - if (Idx >= (int)NumElts) Idx -= NumElts; - NewMask.push_back(Idx); - } - return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask); - } + if (N0 == N1) + return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), + createUnaryMask(SVN->getMask(), NumElts)); // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask. if (N0.isUndef()) @@ -21290,6 +21503,70 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { } } + // See if we can replace a shuffle with an insert_subvector. + // e.g. v2i32 into v8i32: + // shuffle(lhs,concat(rhs0,rhs1,rhs2,rhs3),0,1,2,3,10,11,6,7). + // --> insert_subvector(lhs,rhs1,4). + if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT) && + TLI.isOperationLegalOrCustom(ISD::INSERT_SUBVECTOR, VT)) { + auto ShuffleToInsert = [&](SDValue LHS, SDValue RHS, ArrayRef<int> Mask) { + // Ensure RHS subvectors are legal. + assert(RHS.getOpcode() == ISD::CONCAT_VECTORS && "Can't find subvectors"); + EVT SubVT = RHS.getOperand(0).getValueType(); + int NumSubVecs = RHS.getNumOperands(); + int NumSubElts = SubVT.getVectorNumElements(); + assert((NumElts % NumSubElts) == 0 && "Subvector mismatch"); + if (!TLI.isTypeLegal(SubVT)) + return SDValue(); + + // Don't bother if we have an unary shuffle (matches undef + LHS elts). + if (all_of(Mask, [NumElts](int M) { return M < (int)NumElts; })) + return SDValue(); + + // Search [NumSubElts] spans for RHS sequence. + // TODO: Can we avoid nested loops to increase performance? + SmallVector<int> InsertionMask(NumElts); + for (int SubVec = 0; SubVec != NumSubVecs; ++SubVec) { + for (int SubIdx = 0; SubIdx != (int)NumElts; SubIdx += NumSubElts) { + // Reset mask to identity. + std::iota(InsertionMask.begin(), InsertionMask.end(), 0); + + // Add subvector insertion. + std::iota(InsertionMask.begin() + SubIdx, + InsertionMask.begin() + SubIdx + NumSubElts, + NumElts + (SubVec * NumSubElts)); + + // See if the shuffle mask matches the reference insertion mask. + bool MatchingShuffle = true; + for (int i = 0; i != (int)NumElts; ++i) { + int ExpectIdx = InsertionMask[i]; + int ActualIdx = Mask[i]; + if (0 <= ActualIdx && ExpectIdx != ActualIdx) { + MatchingShuffle = false; + break; + } + } + + if (MatchingShuffle) + return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, LHS, + RHS.getOperand(SubVec), + DAG.getVectorIdxConstant(SubIdx, SDLoc(N))); + } + } + return SDValue(); + }; + ArrayRef<int> Mask = SVN->getMask(); + if (N1.getOpcode() == ISD::CONCAT_VECTORS) + if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask)) + return InsertN1; + if (N0.getOpcode() == ISD::CONCAT_VECTORS) { + SmallVector<int> CommuteMask(Mask.begin(), Mask.end()); + ShuffleVectorSDNode::commuteMask(CommuteMask); + if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask)) + return InsertN0; + } + } + // Attempt to combine a shuffle of 2 inputs of 'scalar sources' - // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR. if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) @@ -21859,6 +22136,40 @@ SDValue DAGCombiner::visitVECREDUCE(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitVPOp(SDNode *N) { + // VP operations in which all vector elements are disabled - either by + // determining that the mask is all false or that the EVL is 0 - can be + // eliminated. + bool AreAllEltsDisabled = false; + if (auto EVLIdx = ISD::getVPExplicitVectorLengthIdx(N->getOpcode())) + AreAllEltsDisabled |= isNullConstant(N->getOperand(*EVLIdx)); + if (auto MaskIdx = ISD::getVPMaskIdx(N->getOpcode())) + AreAllEltsDisabled |= + ISD::isConstantSplatVectorAllZeros(N->getOperand(*MaskIdx).getNode()); + + // This is the only generic VP combine we support for now. + if (!AreAllEltsDisabled) + return SDValue(); + + // Binary operations can be replaced by UNDEF. + if (ISD::isVPBinaryOp(N->getOpcode())) + return DAG.getUNDEF(N->getValueType(0)); + + // VP Memory operations can be replaced by either the chain (stores) or the + // chain + undef (loads). + if (const auto *MemSD = dyn_cast<MemSDNode>(N)) { + if (MemSD->writeMem()) + return MemSD->getChain(); + return CombineTo(N, DAG.getUNDEF(N->getValueType(0)), MemSD->getChain()); + } + + // Reduction operations return the start operand when no elements are active. + if (ISD::isVPReduction(N->getOpcode())) + return N->getOperand(0); + + return SDValue(); +} + /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle /// with the destination vector and a zero vector. /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> @@ -21915,7 +22226,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { else Bits = Bits.extractBits(NumSubBits, SubIdx * NumSubBits); - if (Bits.isAllOnesValue()) + if (Bits.isAllOnes()) Indices.push_back(i); else if (Bits == 0) Indices.push_back(i + NumSubElts); @@ -21950,7 +22261,8 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { /// If a vector binop is performed on splat values, it may be profitable to /// extract, scalarize, and insert/splat. -static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) { +static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG, + const SDLoc &DL) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); unsigned Opcode = N->getOpcode(); @@ -21971,7 +22283,6 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) { !TLI.isOperationLegalOrCustom(Opcode, EltVT)) return SDValue(); - SDLoc DL(N); SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL); SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src0, IndexC); SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Src1, IndexC); @@ -21995,20 +22306,19 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) { } /// Visit a binary vector operation, like ADD. -SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { - assert(N->getValueType(0).isVector() && - "SimplifyVBinOp only works on vectors!"); +SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) { + EVT VT = N->getValueType(0); + assert(VT.isVector() && "SimplifyVBinOp only works on vectors!"); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); SDValue Ops[] = {LHS, RHS}; - EVT VT = N->getValueType(0); unsigned Opcode = N->getOpcode(); SDNodeFlags Flags = N->getFlags(); // See if we can constant fold the vector operation. - if (SDValue Fold = DAG.FoldConstantVectorArithmetic( - Opcode, SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags())) + if (SDValue Fold = DAG.FoldConstantArithmetic(Opcode, SDLoc(LHS), + LHS.getValueType(), Ops)) return Fold; // Move unary shuffles with identical masks after a vector binop: @@ -22026,7 +22336,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) && LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() && (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) { - SDLoc DL(N); SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0), RHS.getOperand(0), Flags); SDValue UndefV = LHS.getOperand(1); @@ -22043,7 +22352,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() && Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) { // binop (splat X), (splat C) --> splat (binop X, C) - SDLoc DL(N); SDValue X = Shuf0->getOperand(0); SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, X, RHS, Flags); return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT), @@ -22053,7 +22361,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() && Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) { // binop (splat C), (splat X) --> splat (binop C, X) - SDLoc DL(N); SDValue X = Shuf1->getOperand(0); SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS, X, Flags); return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT), @@ -22077,7 +22384,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT, LegalOperations)) { // (binop undef, undef) may not return undef, so compute that result. - SDLoc DL(N); SDValue VecC = DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT)); SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y); @@ -22104,7 +22410,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { EVT NarrowVT = LHS.getOperand(0).getValueType(); if (NarrowVT == RHS.getOperand(0).getValueType() && TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) { - SDLoc DL(N); unsigned NumOperands = LHS.getNumOperands(); SmallVector<SDValue, 4> ConcatOps; for (unsigned i = 0; i != NumOperands; ++i) { @@ -22117,7 +22422,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { } } - if (SDValue V = scalarizeBinOpOfSplats(N, DAG)) + if (SDValue V = scalarizeBinOpOfSplats(N, DAG, DL)) return V; return SDValue(); @@ -22431,15 +22736,23 @@ SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) { if (!TLI.isBinOp(BinOpc) || (N2.getOpcode() != BinOpc)) return SDValue(); - if (!N->isOnlyUserOf(N0.getNode()) || !N->isOnlyUserOf(N1.getNode())) + // The use checks are intentionally on SDNode because we may be dealing + // with opcodes that produce more than one SDValue. + // TODO: Do we really need to check N0 (the condition operand of the select)? + // But removing that clause could cause an infinite loop... + if (!N0->hasOneUse() || !N1->hasOneUse() || !N2->hasOneUse()) return SDValue(); + // Binops may include opcodes that return multiple values, so all values + // must be created/propagated from the newly created binops below. + SDVTList OpVTs = N1->getVTList(); + // Fold select(cond, binop(x, y), binop(z, y)) // --> binop(select(cond, x, z), y) if (N1.getOperand(1) == N2.getOperand(1)) { SDValue NewSel = DAG.getSelect(DL, VT, N0, N1.getOperand(0), N2.getOperand(0)); - SDValue NewBinOp = DAG.getNode(BinOpc, DL, VT, NewSel, N1.getOperand(1)); + SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, NewSel, N1.getOperand(1)); NewBinOp->setFlags(N1->getFlags()); NewBinOp->intersectFlagsWith(N2->getFlags()); return NewBinOp; @@ -22453,7 +22766,7 @@ SDValue DAGCombiner::foldSelectOfBinops(SDNode *N) { VT == N2.getOperand(1).getValueType()) { SDValue NewSel = DAG.getSelect(DL, VT, N0, N1.getOperand(1), N2.getOperand(1)); - SDValue NewBinOp = DAG.getNode(BinOpc, DL, VT, N1.getOperand(0), NewSel); + SDValue NewBinOp = DAG.getNode(BinOpc, DL, OpVTs, N1.getOperand(0), NewSel); NewBinOp->setFlags(N1->getFlags()); NewBinOp->intersectFlagsWith(N2->getFlags()); return NewBinOp; @@ -22581,7 +22894,7 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) { // fold select_cc true, x, y -> x // fold select_cc false, x, y -> y - return !(SCCC->isNullValue()) ? N2 : N3; + return !(SCCC->isZero()) ? N2 : N3; } } @@ -22680,7 +22993,7 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X) // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X) // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X) - if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { + if (N1C && N1C->isZero() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { SDValue ValueOnZero = N2; SDValue Count = N3; // If the condition is NE instead of E, swap the operands. @@ -22707,6 +23020,20 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, } } + // Fold select_cc setgt X, -1, C, ~C -> xor (ashr X, BW-1), C + // Fold select_cc setlt X, 0, C, ~C -> xor (ashr X, BW-1), ~C + if (!NotExtCompare && N1C && N2C && N3C && + N2C->getAPIntValue() == ~N3C->getAPIntValue() && + ((N1C->isAllOnes() && CC == ISD::SETGT) || + (N1C->isZero() && CC == ISD::SETLT)) && + !TLI.shouldAvoidTransformToShift(VT, CmpOpVT.getScalarSizeInBits() - 1)) { + SDValue ASR = DAG.getNode( + ISD::SRA, DL, CmpOpVT, N0, + DAG.getConstant(CmpOpVT.getScalarSizeInBits() - 1, DL, CmpOpVT)); + return DAG.getNode(ISD::XOR, DL, VT, DAG.getSExtOrTrunc(ASR, DL, VT), + DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT)); + } + return SDValue(); } @@ -22747,7 +23074,7 @@ SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) { return SDValue(); // Avoid division by zero. - if (C->isNullValue()) + if (C->isZero()) return SDValue(); SmallVector<SDNode *, 8> Built; @@ -22792,7 +23119,7 @@ SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) { /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) /// For the reciprocal, we need to find the zero of the function: -/// F(X) = A X - 1 [which has a zero at X = 1/A] +/// F(X) = 1/X - A [which has a zero at X = 1/A] /// => /// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form /// does not require additional intermediate precision] @@ -22803,9 +23130,10 @@ SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op, if (LegalDAG) return SDValue(); - // TODO: Handle half and/or extended types? + // TODO: Handle extended types? EVT VT = Op.getValueType(); - if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64) + if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 && + VT.getScalarType() != MVT::f64) return SDValue(); // If estimates are explicitly disabled for this function, we're done. @@ -22942,9 +23270,10 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, if (LegalDAG) return SDValue(); - // TODO: Handle half and/or extended types? + // TODO: Handle extended types? EVT VT = Op.getValueType(); - if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64) + if (VT.getScalarType() != MVT::f16 && VT.getScalarType() != MVT::f32 && + VT.getScalarType() != MVT::f64) return SDValue(); // If estimates are explicitly disabled for this function, we're done. @@ -22994,7 +23323,7 @@ SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) { } /// Return true if there is any possibility that the two addresses overlap. -bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const { +bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const { struct MemUseCharacteristics { bool IsVolatile; @@ -23154,7 +23483,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, // TODO: Relax aliasing for unordered atomics (see D66309) bool IsOpLoad = isa<LoadSDNode>(C.getNode()) && cast<LSBaseSDNode>(C.getNode())->isSimple(); - if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) { + if ((IsLoad && IsOpLoad) || !mayAlias(N, C.getNode())) { // Look further up the chain. C = C.getOperand(0); return true; @@ -23172,7 +23501,7 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, case ISD::LIFETIME_END: { // We can forward past any lifetime start/end that can be proven not to // alias the memory access. - if (!isAlias(N, C.getNode())) { + if (!mayAlias(N, C.getNode())) { // Look further up the chain. C = C.getOperand(0); return true; diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index 4ca731cfdf62..4d1449bc2751 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -75,6 +75,7 @@ #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalValue.h" @@ -195,10 +196,8 @@ void FastISel::flushLocalValueMap() { EmitStartPt ? MachineBasicBlock::reverse_iterator(EmitStartPt) : FuncInfo.MBB->rend(); MachineBasicBlock::reverse_iterator RI(LastLocalValue); - for (; RI != RE;) { - MachineInstr &LocalMI = *RI; - // Increment before erasing what it points to. - ++RI; + for (MachineInstr &LocalMI : + llvm::make_early_inc_range(llvm::make_range(RI, RE))) { Register DefReg = findLocalRegDef(LocalMI); if (!DefReg) continue; @@ -622,7 +621,7 @@ bool FastISel::selectGetElementPtr(const User *I) { bool FastISel::addStackMapLiveVars(SmallVectorImpl<MachineOperand> &Ops, const CallInst *CI, unsigned StartIdx) { - for (unsigned i = StartIdx, e = CI->getNumArgOperands(); i != e; ++i) { + for (unsigned i = StartIdx, e = CI->arg_size(); i != e; ++i) { Value *Val = CI->getArgOperand(i); // Check for constants and encode them with a StackMaps::ConstantOp prefix. if (const auto *C = dyn_cast<ConstantInt>(Val)) { @@ -784,7 +783,7 @@ bool FastISel::selectPatchpoint(const CallInst *I) { // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs> // This includes all meta-operands up to but not including CC. unsigned NumMetaOpers = PatchPointOpers::CCPos; - assert(I->getNumArgOperands() >= NumMetaOpers + NumArgs && + assert(I->arg_size() >= NumMetaOpers + NumArgs && "Not enough arguments provided to the patchpoint intrinsic"); // For AnyRegCC the arguments are lowered later on manually. @@ -1151,6 +1150,8 @@ bool FastISel::lowerCall(const CallInst *CI) { CLI.setCallee(RetTy, FuncTy, CI->getCalledOperand(), std::move(Args), *CI) .setTailCall(IsTailCall); + diagnoseDontCall(*CI); + return lowerCallTo(CLI); } @@ -1264,7 +1265,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { // If using instruction referencing, mutate this into a DBG_INSTR_REF, // to be later patched up by finalizeDebugInstrRefs. Tack a deref onto // the expression, we don't have an "indirect" flag in DBG_INSTR_REF. - if (TM.Options.ValueTrackingVariableLocations && Op->isReg()) { + if (FuncInfo.MF->useDebugInstrRef() && Op->isReg()) { Builder->setDesc(TII.get(TargetOpcode::DBG_INSTR_REF)); Builder->getOperand(1).ChangeToImmediate(0); auto *NewExpr = @@ -1292,18 +1293,22 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, false, 0U, DI->getVariable(), DI->getExpression()); } else if (const auto *CI = dyn_cast<ConstantInt>(V)) { + // See if there's an expression to constant-fold. + DIExpression *Expr = DI->getExpression(); + if (Expr) + std::tie(Expr, CI) = Expr->constantFold(CI); if (CI->getBitWidth() > 64) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addCImm(CI) .addImm(0U) .addMetadata(DI->getVariable()) - .addMetadata(DI->getExpression()); + .addMetadata(Expr); else BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addImm(CI->getZExtValue()) .addImm(0U) .addMetadata(DI->getVariable()) - .addMetadata(DI->getExpression()); + .addMetadata(Expr); } else if (const auto *CF = dyn_cast<ConstantFP>(V)) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) .addFPImm(CF) @@ -1319,7 +1324,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { // If using instruction referencing, mutate this into a DBG_INSTR_REF, // to be later patched up by finalizeDebugInstrRefs. - if (TM.Options.ValueTrackingVariableLocations) { + if (FuncInfo.MF->useDebugInstrRef()) { Builder->setDesc(TII.get(TargetOpcode::DBG_INSTR_REF)); Builder->getOperand(1).ChangeToImmediate(0); } @@ -2303,8 +2308,7 @@ FastISel::createMachineMemOperandFor(const Instruction *I) const { bool IsDereferenceable = I->hasMetadata(LLVMContext::MD_dereferenceable); const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range); - AAMDNodes AAInfo; - I->getAAMetadata(AAInfo); + AAMDNodes AAInfo = I->getAAMetadata(); if (!Alignment) // Ensure that codegen never sees alignment 0. Alignment = DL.getABITypeAlign(ValTy); diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 348fad6daf8f..c1bb65409282 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -722,7 +722,7 @@ void InstrEmitter::AddDbgValueLocationOps( MIB.addFrameIndex(Op.getFrameIx()); break; case SDDbgOperand::VREG: - MIB.addReg(Op.getVReg(), RegState::Debug); + MIB.addReg(Op.getVReg()); break; case SDDbgOperand::SDNODE: { SDValue V = SDValue(Op.getSDNode(), Op.getResNo()); @@ -862,7 +862,7 @@ MachineInstr *InstrEmitter::EmitDbgNoLocation(SDDbgValue *SD) { DebugLoc DL = SD->getDebugLoc(); auto MIB = BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE)); MIB.addReg(0U); - MIB.addReg(0U, RegState::Debug); + MIB.addReg(0U); MIB.addMetadata(Var); MIB.addMetadata(Expr); return &*MIB; @@ -872,22 +872,33 @@ MachineInstr * InstrEmitter::EmitDbgValueFromSingleOp(SDDbgValue *SD, DenseMap<SDValue, Register> &VRBaseMap) { MDNode *Var = SD->getVariable(); - MDNode *Expr = SD->getExpression(); + DIExpression *Expr = SD->getExpression(); DebugLoc DL = SD->getDebugLoc(); const MCInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE); assert(SD->getLocationOps().size() == 1 && "Non variadic dbg_value should have only one location op"); + // See about constant-folding the expression. + // Copy the location operand in case we replace it. + SmallVector<SDDbgOperand, 1> LocationOps(1, SD->getLocationOps()[0]); + if (Expr && LocationOps[0].getKind() == SDDbgOperand::CONST) { + const Value *V = LocationOps[0].getConst(); + if (auto *C = dyn_cast<ConstantInt>(V)) { + std::tie(Expr, C) = Expr->constantFold(C); + LocationOps[0] = SDDbgOperand::fromConst(C); + } + } + // Emit non-variadic dbg_value nodes as DBG_VALUE. // DBG_VALUE := "DBG_VALUE" loc, isIndirect, var, expr auto MIB = BuildMI(*MF, DL, II); - AddDbgValueLocationOps(MIB, II, SD->getLocationOps(), VRBaseMap); + AddDbgValueLocationOps(MIB, II, LocationOps, VRBaseMap); if (SD->isIndirect()) MIB.addImm(0U); else - MIB.addReg(0U, RegState::Debug); + MIB.addReg(0U); return MIB.addMetadata(Var).addMetadata(Expr); } @@ -1329,5 +1340,5 @@ InstrEmitter::InstrEmitter(const TargetMachine &TM, MachineBasicBlock *mbb, TRI(MF->getSubtarget().getRegisterInfo()), TLI(MF->getSubtarget().getTargetLowering()), MBB(mbb), InsertPos(insertpos) { - EmitDebugInstrRefs = TM.Options.ValueTrackingVariableLocations; + EmitDebugInstrRefs = MF->useDebugInstrRef(); } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index d92b23f56e4d..eb9d2286aeb4 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1164,6 +1164,16 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Action = TLI.getOperationAction(Node->getOpcode(), cast<MaskedStoreSDNode>(Node)->getValue().getValueType()); break; + case ISD::VP_SCATTER: + Action = TLI.getOperationAction( + Node->getOpcode(), + cast<VPScatterSDNode>(Node)->getValue().getValueType()); + break; + case ISD::VP_STORE: + Action = TLI.getOperationAction( + Node->getOpcode(), + cast<VPStoreSDNode>(Node)->getValue().getValueType()); + break; case ISD::VECREDUCE_FADD: case ISD::VECREDUCE_FMUL: case ISD::VECREDUCE_ADD: @@ -1181,6 +1191,22 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Node->getOpcode(), Node->getOperand(0).getValueType()); break; case ISD::VECREDUCE_SEQ_FADD: + case ISD::VECREDUCE_SEQ_FMUL: + case ISD::VP_REDUCE_FADD: + case ISD::VP_REDUCE_FMUL: + case ISD::VP_REDUCE_ADD: + case ISD::VP_REDUCE_MUL: + case ISD::VP_REDUCE_AND: + case ISD::VP_REDUCE_OR: + case ISD::VP_REDUCE_XOR: + case ISD::VP_REDUCE_SMAX: + case ISD::VP_REDUCE_SMIN: + case ISD::VP_REDUCE_UMAX: + case ISD::VP_REDUCE_UMIN: + case ISD::VP_REDUCE_FMAX: + case ISD::VP_REDUCE_FMIN: + case ISD::VP_REDUCE_SEQ_FADD: + case ISD::VP_REDUCE_SEQ_FMUL: Action = TLI.getOperationAction( Node->getOpcode(), Node->getOperand(1).getValueType()); break; @@ -1333,9 +1359,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { Visited.insert(Op.getNode()); Worklist.push_back(Idx.getNode()); SDValue StackPtr, Ch; - for (SDNode::use_iterator UI = Vec.getNode()->use_begin(), - UE = Vec.getNode()->use_end(); UI != UE; ++UI) { - SDNode *User = *UI; + for (SDNode *User : Vec.getNode()->uses()) { if (StoreSDNode *ST = dyn_cast<StoreSDNode>(User)) { if (ST->isIndexed() || ST->isTruncatingStore() || ST->getValue() != Vec) @@ -2197,9 +2221,7 @@ static bool useSinCos(SDNode *Node) { ? ISD::FCOS : ISD::FSIN; SDValue Op0 = Node->getOperand(0); - for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), - UE = Op0.getNode()->use_end(); UI != UE; ++UI) { - SDNode *User = *UI; + for (const SDNode *User : Op0.getNode()->uses()) { if (User == Node) continue; // The other user might have been turned into sincos already. @@ -2636,7 +2658,7 @@ SDValue SelectionDAGLegalize::ExpandPARITY(SDValue Op, const SDLoc &dl) { // If CTPOP is legal, use it. Otherwise use shifts and xor. SDValue Result; - if (TLI.isOperationLegal(ISD::CTPOP, VT)) { + if (TLI.isOperationLegalOrPromote(ISD::CTPOP, VT)) { Result = DAG.getNode(ISD::CTPOP, dl, VT, Op); } else { Result = Op; @@ -2658,21 +2680,21 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { bool NeedInvert; switch (Node->getOpcode()) { case ISD::ABS: - if (TLI.expandABS(Node, Tmp1, DAG)) + if ((Tmp1 = TLI.expandABS(Node, DAG))) Results.push_back(Tmp1); break; case ISD::CTPOP: - if (TLI.expandCTPOP(Node, Tmp1, DAG)) + if ((Tmp1 = TLI.expandCTPOP(Node, DAG))) Results.push_back(Tmp1); break; case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: - if (TLI.expandCTLZ(Node, Tmp1, DAG)) + if ((Tmp1 = TLI.expandCTLZ(Node, DAG))) Results.push_back(Tmp1); break; case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: - if (TLI.expandCTTZ(Node, Tmp1, DAG)) + if ((Tmp1 = TLI.expandCTTZ(Node, DAG))) Results.push_back(Tmp1); break; case ISD::BITREVERSE: @@ -3229,9 +3251,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) && TLI.isOperationLegalOrCustom(ISD::XOR, VT) && "Don't know how to expand this subtraction!"); - Tmp1 = DAG.getNode(ISD::XOR, dl, VT, Node->getOperand(1), - DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), dl, - VT)); + Tmp1 = DAG.getNOT(dl, Node->getOperand(1), VT); Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp1, DAG.getConstant(1, dl, VT)); Results.push_back(DAG.getNode(ISD::ADD, dl, VT, Node->getOperand(0), Tmp1)); break; @@ -4242,8 +4262,7 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { SDValue Op = Node->getOperand(IsStrict ? 1 : 0); SDValue Chain = IsStrict ? Node->getOperand(0) : SDValue(); EVT VT = Node->getValueType(0); - assert(cast<ConstantSDNode>(Node->getOperand(IsStrict ? 2 : 1)) - ->isNullValue() && + assert(cast<ConstantSDNode>(Node->getOperand(IsStrict ? 2 : 1))->isZero() && "Unable to expand as libcall if it is not normal rounding"); RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), VT); @@ -4737,6 +4756,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { break; case ISD::STRICT_FFLOOR: case ISD::STRICT_FCEIL: + case ISD::STRICT_FROUND: case ISD::STRICT_FSIN: case ISD::STRICT_FCOS: case ISD::STRICT_FLOG: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 3553f9ec16c2..27f9cede1922 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -61,6 +61,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { #endif llvm_unreachable("Do not know how to soften the result of this operator!"); + case ISD::ARITH_FENCE: R = SoftenFloatRes_ARITH_FENCE(N); break; case ISD::MERGE_VALUES:R = SoftenFloatRes_MERGE_VALUES(N, ResNo); break; case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N); break; case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break; @@ -206,6 +207,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FREEZE(SDNode *N) { GetSoftenedFloat(N->getOperand(0))); } +SDValue DAGTypeLegalizer::SoftenFloatRes_ARITH_FENCE(SDNode *N) { + EVT Ty = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue NewFence = DAG.getNode(ISD::ARITH_FENCE, SDLoc(N), Ty, + GetSoftenedFloat(N->getOperand(0))); + return NewFence; +} + SDValue DAGTypeLegalizer::SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo) { SDValue Op = DisintegrateMERGE_VALUES(N, ResNo); @@ -257,7 +265,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) { unsigned Size = NVT.getSizeInBits(); // Mask = ~(1 << (Size-1)) - APInt API = APInt::getAllOnesValue(Size); + APInt API = APInt::getAllOnes(Size); API.clearBit(Size - 1); SDValue Mask = DAG.getConstant(API, SDLoc(N), NVT); SDValue Op = GetSoftenedFloat(N->getOperand(0)); @@ -820,6 +828,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break; case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break; + case ISD::STRICT_FP_TO_FP16: case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes case ISD::STRICT_FP_ROUND: case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break; @@ -871,13 +880,17 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) { // We actually deal with the partially-softened FP_TO_FP16 node too, which // returns an i16 so doesn't meet the constraints necessary for FP_ROUND. assert(N->getOpcode() == ISD::FP_ROUND || N->getOpcode() == ISD::FP_TO_FP16 || + N->getOpcode() == ISD::STRICT_FP_TO_FP16 || N->getOpcode() == ISD::STRICT_FP_ROUND); bool IsStrict = N->isStrictFPOpcode(); SDValue Op = N->getOperand(IsStrict ? 1 : 0); EVT SVT = Op.getValueType(); EVT RVT = N->getValueType(0); - EVT FloatRVT = N->getOpcode() == ISD::FP_TO_FP16 ? MVT::f16 : RVT; + EVT FloatRVT = (N->getOpcode() == ISD::FP_TO_FP16 || + N->getOpcode() == ISD::STRICT_FP_TO_FP16) + ? MVT::f16 + : RVT; RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, FloatRVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall"); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index b8a3dd014901..1fa4d88fcb4a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -23,6 +23,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/raw_ostream.h" +#include <algorithm> using namespace llvm; #define DEBUG_TYPE "legalize-types" @@ -81,15 +82,23 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::STRICT_FSETCCS: case ISD::SETCC: Res = PromoteIntRes_SETCC(N); break; case ISD::SMIN: - case ISD::SMAX: Res = PromoteIntRes_SExtIntBinOp(N); break; + case ISD::SMAX: + Res = PromoteIntRes_SExtIntBinOp(N, /*IsVP*/ false); + break; case ISD::UMIN: case ISD::UMAX: Res = PromoteIntRes_UMINUMAX(N); break; - case ISD::SHL: Res = PromoteIntRes_SHL(N); break; + case ISD::SHL: + Res = PromoteIntRes_SHL(N, /*IsVP*/ false); + break; case ISD::SIGN_EXTEND_INREG: Res = PromoteIntRes_SIGN_EXTEND_INREG(N); break; - case ISD::SRA: Res = PromoteIntRes_SRA(N); break; - case ISD::SRL: Res = PromoteIntRes_SRL(N); break; + case ISD::SRA: + Res = PromoteIntRes_SRA(N, /*IsVP*/ false); + break; + case ISD::SRL: + Res = PromoteIntRes_SRL(N, /*IsVP*/ false); + break; case ISD::TRUNCATE: Res = PromoteIntRes_TRUNCATE(N); break; case ISD::UNDEF: Res = PromoteIntRes_UNDEF(N); break; case ISD::VAARG: Res = PromoteIntRes_VAARG(N); break; @@ -144,13 +153,19 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::XOR: case ISD::ADD: case ISD::SUB: - case ISD::MUL: Res = PromoteIntRes_SimpleIntBinOp(N); break; + case ISD::MUL: + Res = PromoteIntRes_SimpleIntBinOp(N, /*IsVP*/ false); + break; case ISD::SDIV: - case ISD::SREM: Res = PromoteIntRes_SExtIntBinOp(N); break; + case ISD::SREM: + Res = PromoteIntRes_SExtIntBinOp(N, /*IsVP*/ false); + break; case ISD::UDIV: - case ISD::UREM: Res = PromoteIntRes_ZExtIntBinOp(N); break; + case ISD::UREM: + Res = PromoteIntRes_ZExtIntBinOp(N, /*IsVP*/ false); + break; case ISD::SADDO: case ISD::SSUBO: Res = PromoteIntRes_SADDSUBO(N, ResNo); break; @@ -220,6 +235,18 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { Res = PromoteIntRes_VECREDUCE(N); break; + case ISD::VP_REDUCE_ADD: + case ISD::VP_REDUCE_MUL: + case ISD::VP_REDUCE_AND: + case ISD::VP_REDUCE_OR: + case ISD::VP_REDUCE_XOR: + case ISD::VP_REDUCE_SMAX: + case ISD::VP_REDUCE_SMIN: + case ISD::VP_REDUCE_UMAX: + case ISD::VP_REDUCE_UMIN: + Res = PromoteIntRes_VP_REDUCE(N); + break; + case ISD::FREEZE: Res = PromoteIntRes_FREEZE(N); break; @@ -233,6 +260,32 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::FSHR: Res = PromoteIntRes_FunnelShift(N); break; + + case ISD::VP_AND: + case ISD::VP_OR: + case ISD::VP_XOR: + case ISD::VP_ADD: + case ISD::VP_SUB: + case ISD::VP_MUL: + Res = PromoteIntRes_SimpleIntBinOp(N, /*IsVP*/ true); + break; + case ISD::VP_SDIV: + case ISD::VP_SREM: + Res = PromoteIntRes_SExtIntBinOp(N, /*IsVP*/ true); + break; + case ISD::VP_UDIV: + case ISD::VP_UREM: + Res = PromoteIntRes_ZExtIntBinOp(N, /*IsVP*/ true); + break; + case ISD::VP_SHL: + Res = PromoteIntRes_SHL(N, /*IsVP*/ true); + break; + case ISD::VP_ASHR: + Res = PromoteIntRes_SRA(N, /*IsVP*/ true); + break; + case ISD::VP_LSHR: + Res = PromoteIntRes_SRL(N, /*IsVP*/ true); + break; } // If the result is null then the sub-method took care of registering it. @@ -438,19 +491,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { CreateStackStoreLoad(InOp, OutVT)); } -// Helper for BSWAP/BITREVERSE promotion to ensure we can fit any shift amount -// in the VT returned by getShiftAmountTy and to return a safe VT if we can't. -static EVT getShiftAmountTyForConstant(EVT VT, const TargetLowering &TLI, - SelectionDAG &DAG) { - EVT ShiftVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); - // If any possible shift value won't fit in the prefered type, just use - // something safe. It will be legalized when the shift is expanded. - if (!ShiftVT.isVector() && - ShiftVT.getSizeInBits() < Log2_32_Ceil(VT.getSizeInBits())) - ShiftVT = MVT::i32; - return ShiftVT; -} - SDValue DAGTypeLegalizer::PromoteIntRes_FREEZE(SDNode *N) { SDValue V = GetPromotedInteger(N->getOperand(0)); return DAG.getNode(ISD::FREEZE, SDLoc(N), @@ -474,7 +514,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) { } unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(); - EVT ShiftVT = getShiftAmountTyForConstant(NVT, TLI, DAG); + EVT ShiftVT = TLI.getShiftAmountTy(NVT, DAG.getDataLayout()); return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op), DAG.getConstant(DiffBits, dl, ShiftVT)); } @@ -496,7 +536,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) { } unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(); - EVT ShiftVT = getShiftAmountTyForConstant(NVT, TLI, DAG); + EVT ShiftVT = TLI.getShiftAmountTy(NVT, DAG.getDataLayout()); return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BITREVERSE, dl, NVT, Op), DAG.getConstant(DiffBits, dl, ShiftVT)); @@ -526,11 +566,24 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) { + EVT OVT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT); + SDLoc dl(N); + + // If the larger CTLZ isn't supported by the target, try to expand now. + // If we expand later we'll end up with more operations since we lost the + // original type. + if (!OVT.isVector() && TLI.isTypeLegal(NVT) && + !TLI.isOperationLegalOrCustomOrPromote(ISD::CTLZ, NVT) && + !TLI.isOperationLegalOrCustomOrPromote(ISD::CTLZ_ZERO_UNDEF, NVT)) { + if (SDValue Result = TLI.expandCTLZ(N, DAG)) { + Result = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Result); + return Result; + } + } + // Zero extend to the promoted type and do the count there. SDValue Op = ZExtPromotedInteger(N->getOperand(0)); - SDLoc dl(N); - EVT OVT = N->getValueType(0); - EVT NVT = Op.getValueType(); Op = DAG.getNode(N->getOpcode(), dl, NVT, Op); // Subtract off the extra leading bits in the bigger type. return DAG.getNode( @@ -540,6 +593,22 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP_PARITY(SDNode *N) { + EVT OVT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT); + + // If the larger CTPOP isn't supported by the target, try to expand now. + // If we expand later we'll end up with more operations since we lost the + // original type. + // TODO: Expand ISD::PARITY. Need to move ExpandPARITY from LegalizeDAG to + // TargetLowering. + if (N->getOpcode() == ISD::CTPOP && !OVT.isVector() && TLI.isTypeLegal(NVT) && + !TLI.isOperationLegalOrCustomOrPromote(ISD::CTPOP, NVT)) { + if (SDValue Result = TLI.expandCTPOP(N, DAG)) { + Result = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), NVT, Result); + return Result; + } + } + // Zero extend to the promoted type and do the count or parity there. SDValue Op = ZExtPromotedInteger(N->getOperand(0)); return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op); @@ -550,6 +619,22 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) { EVT OVT = N->getValueType(0); EVT NVT = Op.getValueType(); SDLoc dl(N); + + // If the larger CTTZ isn't supported by the target, try to expand now. + // If we expand later we'll end up with more operations since we lost the + // original type. Don't expand if we can use CTPOP or CTLZ expansion on the + // larger type. + if (!OVT.isVector() && TLI.isTypeLegal(NVT) && + !TLI.isOperationLegalOrCustomOrPromote(ISD::CTTZ, NVT) && + !TLI.isOperationLegalOrCustomOrPromote(ISD::CTTZ_ZERO_UNDEF, NVT) && + !TLI.isOperationLegal(ISD::CTPOP, NVT) && + !TLI.isOperationLegal(ISD::CTLZ, NVT)) { + if (SDValue Result = TLI.expandCTTZ(N, DAG)) { + Result = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Result); + return Result; + } + } + if (N->getOpcode() == ISD::CTTZ) { // The count is the same in the promoted type except if the original // value was zero. This can be handled by setting the bit just off @@ -702,11 +787,16 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue ExtPassThru = GetPromotedInteger(N->getPassThru()); + ISD::LoadExtType ExtType = N->getExtensionType(); + if (ExtType == ISD::NON_EXTLOAD) + ExtType = ISD::EXTLOAD; + SDLoc dl(N); SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), N->getMask(), ExtPassThru, N->getMemoryVT(), N->getMemOperand(), - N->getAddressingMode(), ISD::EXTLOAD); + N->getAddressingMode(), ExtType, + N->isExpandingLoad()); // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); @@ -792,7 +882,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) { unsigned NewBits = PromotedType.getScalarSizeInBits(); if (Opcode == ISD::UADDSAT) { - APInt MaxVal = APInt::getAllOnesValue(OldBits).zext(NewBits); + APInt MaxVal = APInt::getAllOnes(OldBits).zext(NewBits); SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType); SDValue Add = DAG.getNode(ISD::ADD, dl, PromotedType, Op1Promoted, Op2Promoted); @@ -806,7 +896,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSHLSAT(SDNode *N) { // Shift cannot use a min/max expansion, we can't detect overflow if all of // the bits have been shifted out. - if (IsShift || TLI.isOperationLegalOrCustom(Opcode, PromotedType)) { + if (IsShift || TLI.isOperationLegal(Opcode, PromotedType)) { unsigned ShiftOp; switch (Opcode) { case ISD::SADDSAT: @@ -1103,12 +1193,15 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) { return DAG.getSExtOrTrunc(SetCC, dl, NVT); } -SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) { +SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N, bool IsVP) { SDValue LHS = GetPromotedInteger(N->getOperand(0)); SDValue RHS = N->getOperand(1); if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) RHS = ZExtPromotedInteger(RHS); - return DAG.getNode(ISD::SHL, SDLoc(N), LHS.getValueType(), LHS, RHS); + if (!IsVP) + return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); + return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS, + N->getOperand(2), N->getOperand(3)); } SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) { @@ -1117,30 +1210,36 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) { Op.getValueType(), Op, N->getOperand(1)); } -SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) { +SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N, bool IsVP) { // The input may have strange things in the top bits of the registers, but // these operations don't care. They may have weird bits going out, but // that too is okay if they are integer operations. SDValue LHS = GetPromotedInteger(N->getOperand(0)); SDValue RHS = GetPromotedInteger(N->getOperand(1)); - return DAG.getNode(N->getOpcode(), SDLoc(N), - LHS.getValueType(), LHS, RHS); + if (!IsVP) + return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); + return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS, + N->getOperand(2), N->getOperand(3)); } -SDValue DAGTypeLegalizer::PromoteIntRes_SExtIntBinOp(SDNode *N) { +SDValue DAGTypeLegalizer::PromoteIntRes_SExtIntBinOp(SDNode *N, bool IsVP) { // Sign extend the input. SDValue LHS = SExtPromotedInteger(N->getOperand(0)); SDValue RHS = SExtPromotedInteger(N->getOperand(1)); - return DAG.getNode(N->getOpcode(), SDLoc(N), - LHS.getValueType(), LHS, RHS); + if (!IsVP) + return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); + return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS, + N->getOperand(2), N->getOperand(3)); } -SDValue DAGTypeLegalizer::PromoteIntRes_ZExtIntBinOp(SDNode *N) { +SDValue DAGTypeLegalizer::PromoteIntRes_ZExtIntBinOp(SDNode *N, bool IsVP) { // Zero extend the input. SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); SDValue RHS = ZExtPromotedInteger(N->getOperand(1)); - return DAG.getNode(N->getOpcode(), SDLoc(N), - LHS.getValueType(), LHS, RHS); + if (!IsVP) + return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); + return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS, + N->getOperand(2), N->getOperand(3)); } SDValue DAGTypeLegalizer::PromoteIntRes_UMINUMAX(SDNode *N) { @@ -1152,22 +1251,28 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UMINUMAX(SDNode *N) { LHS.getValueType(), LHS, RHS); } -SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) { +SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N, bool IsVP) { // The input value must be properly sign extended. SDValue LHS = SExtPromotedInteger(N->getOperand(0)); SDValue RHS = N->getOperand(1); if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) RHS = ZExtPromotedInteger(RHS); - return DAG.getNode(ISD::SRA, SDLoc(N), LHS.getValueType(), LHS, RHS); + if (!IsVP) + return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); + return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS, + N->getOperand(2), N->getOperand(3)); } -SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) { +SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N, bool IsVP) { // The input value must be properly zero extended. SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); SDValue RHS = N->getOperand(1); if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) RHS = ZExtPromotedInteger(RHS); - return DAG.getNode(ISD::SRL, SDLoc(N), LHS.getValueType(), LHS, RHS); + if (!IsVP) + return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); + return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS, + N->getOperand(2), N->getOperand(3)); } SDValue DAGTypeLegalizer::PromoteIntRes_Rotate(SDNode *N) { @@ -1383,7 +1488,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { if (N->getOpcode() == ISD::UMULO) { // Unsigned overflow occurred if the high part is non-zero. unsigned Shift = SmallVT.getScalarSizeInBits(); - EVT ShiftTy = getShiftAmountTyForConstant(Mul.getValueType(), TLI, DAG); + EVT ShiftTy = TLI.getShiftAmountTy(Mul.getValueType(), DAG.getDataLayout()); SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul, DAG.getConstant(Shift, DL, ShiftTy)); Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi, @@ -1523,6 +1628,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::STRICT_UINT_TO_FP: Res = PromoteIntOp_STRICT_UINT_TO_FP(N); break; case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break; case ISD::EXTRACT_SUBVECTOR: Res = PromoteIntOp_EXTRACT_SUBVECTOR(N); break; + case ISD::INSERT_SUBVECTOR: Res = PromoteIntOp_INSERT_SUBVECTOR(N); break; case ISD::SHL: case ISD::SRA: @@ -1560,6 +1666,17 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::VECREDUCE_SMIN: case ISD::VECREDUCE_UMAX: case ISD::VECREDUCE_UMIN: Res = PromoteIntOp_VECREDUCE(N); break; + case ISD::VP_REDUCE_ADD: + case ISD::VP_REDUCE_MUL: + case ISD::VP_REDUCE_AND: + case ISD::VP_REDUCE_OR: + case ISD::VP_REDUCE_XOR: + case ISD::VP_REDUCE_SMAX: + case ISD::VP_REDUCE_SMIN: + case ISD::VP_REDUCE_UMAX: + case ISD::VP_REDUCE_UMIN: + Res = PromoteIntOp_VP_REDUCE(N, OpNo); + break; case ISD::SET_ROUNDING: Res = PromoteIntOp_SET_ROUNDING(N); break; } @@ -1605,10 +1722,8 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS, // If the width of OpL/OpR excluding the duplicated sign bits is no greater // than the width of NewLHS/NewRH, we can avoid inserting real truncate // instruction, which is redundant eventually. - unsigned OpLEffectiveBits = - OpL.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(OpL) + 1; - unsigned OpREffectiveBits = - OpR.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(OpR) + 1; + unsigned OpLEffectiveBits = DAG.ComputeMinSignedBits(OpL); + unsigned OpREffectiveBits = DAG.ComputeMinSignedBits(OpR); if (OpLEffectiveBits <= NewLHS.getScalarValueSizeInBits() && OpREffectiveBits <= NewRHS.getScalarValueSizeInBits()) { NewLHS = OpL; @@ -1832,29 +1947,25 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo) { - SDValue DataOp = N->getValue(); - EVT DataVT = DataOp.getValueType(); SDValue Mask = N->getMask(); - SDLoc dl(N); - bool TruncateStore = false; if (OpNo == 4) { + // The Mask. Update in place. + EVT DataVT = DataOp.getValueType(); Mask = PromoteTargetBoolean(Mask, DataVT); - // Update in place. SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end()); NewOps[4] = Mask; return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); - } else { // Data operand - assert(OpNo == 1 && "Unexpected operand for promotion"); - DataOp = GetPromotedInteger(DataOp); - TruncateStore = true; } - return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(), + assert(OpNo == 1 && "Unexpected operand for promotion"); + DataOp = GetPromotedInteger(DataOp); + + return DAG.getMaskedStore(N->getChain(), SDLoc(N), DataOp, N->getBasePtr(), N->getOffset(), Mask, N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(), - TruncateStore, N->isCompressingStore()); + /*IsTruncating*/ true, N->isCompressingStore()); } SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, @@ -2023,30 +2134,54 @@ SDValue DAGTypeLegalizer::PromoteIntOp_FPOWI(SDNode *N) { return SDValue(); } -SDValue DAGTypeLegalizer::PromoteIntOp_VECREDUCE(SDNode *N) { - SDLoc dl(N); - SDValue Op; +static unsigned getExtendForIntVecReduction(SDNode *N) { switch (N->getOpcode()) { - default: llvm_unreachable("Expected integer vector reduction"); + default: + llvm_unreachable("Expected integer vector reduction"); case ISD::VECREDUCE_ADD: case ISD::VECREDUCE_MUL: case ISD::VECREDUCE_AND: case ISD::VECREDUCE_OR: case ISD::VECREDUCE_XOR: - Op = GetPromotedInteger(N->getOperand(0)); - break; + case ISD::VP_REDUCE_ADD: + case ISD::VP_REDUCE_MUL: + case ISD::VP_REDUCE_AND: + case ISD::VP_REDUCE_OR: + case ISD::VP_REDUCE_XOR: + return ISD::ANY_EXTEND; case ISD::VECREDUCE_SMAX: case ISD::VECREDUCE_SMIN: - Op = SExtPromotedInteger(N->getOperand(0)); - break; + case ISD::VP_REDUCE_SMAX: + case ISD::VP_REDUCE_SMIN: + return ISD::SIGN_EXTEND; case ISD::VECREDUCE_UMAX: case ISD::VECREDUCE_UMIN: - Op = ZExtPromotedInteger(N->getOperand(0)); - break; + case ISD::VP_REDUCE_UMAX: + case ISD::VP_REDUCE_UMIN: + return ISD::ZERO_EXTEND; } +} + +SDValue DAGTypeLegalizer::PromoteIntOpVectorReduction(SDNode *N, SDValue V) { + switch (getExtendForIntVecReduction(N)) { + default: + llvm_unreachable("Impossible extension kind for integer reduction"); + case ISD::ANY_EXTEND: + return GetPromotedInteger(V); + case ISD::SIGN_EXTEND: + return SExtPromotedInteger(V); + case ISD::ZERO_EXTEND: + return ZExtPromotedInteger(V); + } +} + +SDValue DAGTypeLegalizer::PromoteIntOp_VECREDUCE(SDNode *N) { + SDLoc dl(N); + SDValue Op = PromoteIntOpVectorReduction(N, N->getOperand(0)); EVT EltVT = Op.getValueType().getVectorElementType(); EVT VT = N->getValueType(0); + if (VT.bitsGE(EltVT)) return DAG.getNode(N->getOpcode(), SDLoc(N), VT, Op); @@ -2056,6 +2191,38 @@ SDValue DAGTypeLegalizer::PromoteIntOp_VECREDUCE(SDNode *N) { return DAG.getNode(ISD::TRUNCATE, dl, VT, Reduce); } +SDValue DAGTypeLegalizer::PromoteIntOp_VP_REDUCE(SDNode *N, unsigned OpNo) { + SDLoc DL(N); + SDValue Op = N->getOperand(OpNo); + SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end()); + + if (OpNo == 2) { // Mask + // Update in place. + NewOps[2] = PromoteTargetBoolean(Op, N->getOperand(1).getValueType()); + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); + } + + assert(OpNo == 1 && "Unexpected operand for promotion"); + + Op = PromoteIntOpVectorReduction(N, Op); + + NewOps[OpNo] = Op; + + EVT VT = N->getValueType(0); + EVT EltVT = Op.getValueType().getScalarType(); + + if (VT.bitsGE(EltVT)) + return DAG.getNode(N->getOpcode(), SDLoc(N), VT, NewOps); + + // Result size must be >= element/start-value size. If this is not the case + // after promotion, also promote both the start value and result type and + // then truncate. + NewOps[0] = + DAG.getNode(getExtendForIntVecReduction(N), DL, EltVT, N->getOperand(0)); + SDValue Reduce = DAG.getNode(N->getOpcode(), DL, EltVT, NewOps); + return DAG.getNode(ISD::TRUNCATE, DL, VT, Reduce); +} + SDValue DAGTypeLegalizer::PromoteIntOp_SET_ROUNDING(SDNode *N) { SDValue Op = ZExtPromotedInteger(N->getOperand(1)); return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op), 0); @@ -2088,6 +2255,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { report_fatal_error("Do not know how to expand the result of this " "operator!"); + case ISD::ARITH_FENCE: SplitRes_ARITH_FENCE(N, Lo, Hi); break; case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break; case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break; case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; @@ -2978,7 +3146,7 @@ void DAGTypeLegalizer::ExpandIntRes_ABS(SDNode *N, SDValue &Lo, SDValue &Hi) { bool HasAddCarry = TLI.isOperationLegalOrCustom( ISD::ADDCARRY, TLI.getTypeToExpandTo(*DAG.getContext(), NVT)); if (HasAddCarry) { - EVT ShiftAmtTy = getShiftAmountTyForConstant(NVT, TLI, DAG); + EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout()); SDValue Sign = DAG.getNode(ISD::SRA, dl, NVT, Hi, DAG.getConstant(NVT.getSizeInBits() - 1, dl, ShiftAmtTy)); @@ -3087,6 +3255,9 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo, EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()); Op = GetSoftPromotedHalf(Op); Op = DAG.getNode(ISD::FP16_TO_FP, dl, NFPVT, Op); + Op = DAG.getNode(ISD::FP_TO_SINT, dl, VT, Op); + SplitInteger(Op, Lo, Hi); + return; } RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT); @@ -3116,6 +3287,9 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo, EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()); Op = GetSoftPromotedHalf(Op); Op = DAG.getNode(ISD::FP16_TO_FP, dl, NFPVT, Op); + Op = DAG.getNode(ISD::FP_TO_UINT, dl, VT, Op); + SplitInteger(Op, Lo, Hi); + return; } RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT); @@ -3367,11 +3541,6 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, SDValue TL = DAG.getNode(ISD::AND, dl, NVT, T, Mask); EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout()); - if (APInt::getMaxValue(ShiftAmtTy.getSizeInBits()).ult(HalfBits)) { - // The type from TLI is too small to fit the shift amount we want. - // Override it with i32. The shift will have to be legalized. - ShiftAmtTy = MVT::i32; - } SDValue Shift = DAG.getConstant(HalfBits, dl, ShiftAmtTy); SDValue TH = DAG.getNode(ISD::SRL, dl, NVT, T, Shift); SDValue LLH = DAG.getNode(ISD::SRL, dl, NVT, LL, Shift); @@ -3464,8 +3633,11 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo, SDValue SatMin = DAG.getConstant(MinVal, dl, VT); SDValue SatMax = DAG.getConstant(MaxVal, dl, VT); SDValue Zero = DAG.getConstant(0, dl, VT); - SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT); - Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin); + // Xor the inputs, if resulting sign bit is 0 the product will be + // positive, else negative. + SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS); + SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT); + Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax); Result = DAG.getSelect(dl, VT, Overflow, Result, Product); } else { // For unsigned multiplication, we only need to check the max since we @@ -3638,7 +3810,7 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo, // Saturate to signed maximum. APInt MaxHi = APInt::getSignedMaxValue(NVTSize); - APInt MaxLo = APInt::getAllOnesValue(NVTSize); + APInt MaxLo = APInt::getAllOnes(NVTSize); Hi = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(MaxHi, dl, NVT), Hi); Lo = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(MaxLo, dl, NVT), Lo); // Saturate to signed minimum. @@ -3808,9 +3980,6 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, // the new SHL_PARTS operation would need further legalization. SDValue ShiftOp = N->getOperand(1); EVT ShiftTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); - assert(ShiftTy.getScalarSizeInBits() >= - Log2_32_Ceil(VT.getScalarSizeInBits()) && - "ShiftAmountTy is too small to cover the range of this type!"); if (ShiftOp.getValueType() != ShiftTy) ShiftOp = DAG.getZExtOrTrunc(ShiftOp, dl, ShiftTy); @@ -3857,7 +4026,10 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, } if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) { - SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + EVT ShAmtTy = + EVT::getIntegerVT(*DAG.getContext(), DAG.getLibInfo().getIntSize()); + SDValue ShAmt = DAG.getZExtOrTrunc(N->getOperand(1), dl, ShAmtTy); + SDValue Ops[2] = {N->getOperand(0), ShAmt}; TargetLowering::MakeLibCallOptions CallOptions; CallOptions.setSExt(isSigned); SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); @@ -4035,7 +4207,25 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, LC = RTLIB::MULO_I64; else if (VT == MVT::i128) LC = RTLIB::MULO_I128; - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XMULO!"); + + if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC)) { + // FIXME: This is not an optimal expansion, but better than crashing. + EVT WideVT = + EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2); + SDValue LHS = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, N->getOperand(0)); + SDValue RHS = DAG.getNode(ISD::SIGN_EXTEND, dl, WideVT, N->getOperand(1)); + SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS); + SDValue MulLo, MulHi; + SplitInteger(Mul, MulLo, MulHi); + SDValue SRA = + DAG.getNode(ISD::SRA, dl, VT, MulLo, + DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, VT)); + SDValue Overflow = + DAG.getSetCC(dl, N->getValueType(1), MulHi, SRA, ISD::SETNE); + SplitInteger(MulLo, Lo, Hi); + ReplaceValueWith(SDValue(N, 1), Overflow); + return; + } SDValue Temp = DAG.CreateStackTemporary(PtrVT); // Temporary for the overflow value, default it to zero. @@ -4188,18 +4378,45 @@ void DAGTypeLegalizer::ExpandIntRes_VECREDUCE(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_Rotate(SDNode *N, SDValue &Lo, SDValue &Hi) { - // Lower the rotate to shifts and ORs which can be expanded. - SDValue Res; - TLI.expandROT(N, true /*AllowVectorOps*/, Res, DAG); + // Delegate to funnel-shift expansion. + SDLoc DL(N); + unsigned Opcode = N->getOpcode() == ISD::ROTL ? ISD::FSHL : ISD::FSHR; + SDValue Res = DAG.getNode(Opcode, DL, N->getValueType(0), N->getOperand(0), + N->getOperand(0), N->getOperand(1)); SplitInteger(Res, Lo, Hi); } -void DAGTypeLegalizer::ExpandIntRes_FunnelShift(SDNode *N, - SDValue &Lo, SDValue &Hi) { - // Lower the funnel shift to shifts and ORs which can be expanded. - SDValue Res; - TLI.expandFunnelShift(N, Res, DAG); - SplitInteger(Res, Lo, Hi); +void DAGTypeLegalizer::ExpandIntRes_FunnelShift(SDNode *N, SDValue &Lo, + SDValue &Hi) { + // Values numbered from least significant to most significant. + SDValue In1, In2, In3, In4; + GetExpandedInteger(N->getOperand(0), In3, In4); + GetExpandedInteger(N->getOperand(1), In1, In2); + EVT HalfVT = In1.getValueType(); + + SDLoc DL(N); + unsigned Opc = N->getOpcode(); + SDValue ShAmt = N->getOperand(2); + EVT ShAmtVT = ShAmt.getValueType(); + EVT ShAmtCCVT = getSetCCResultType(ShAmtVT); + + // If the shift amount is at least half the bitwidth, swap the inputs. + unsigned HalfVTBits = HalfVT.getScalarSizeInBits(); + SDValue AndNode = DAG.getNode(ISD::AND, DL, ShAmtVT, ShAmt, + DAG.getConstant(HalfVTBits, DL, ShAmtVT)); + SDValue Cond = + DAG.getSetCC(DL, ShAmtCCVT, AndNode, DAG.getConstant(0, DL, ShAmtVT), + Opc == ISD::FSHL ? ISD::SETNE : ISD::SETEQ); + + // Expand to a pair of funnel shifts. + EVT NewShAmtVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout()); + SDValue NewShAmt = DAG.getAnyExtOrTrunc(ShAmt, DL, NewShAmtVT); + + SDValue Select1 = DAG.getNode(ISD::SELECT, DL, HalfVT, Cond, In1, In2); + SDValue Select2 = DAG.getNode(ISD::SELECT, DL, HalfVT, Cond, In2, In3); + SDValue Select3 = DAG.getNode(ISD::SELECT, DL, HalfVT, Cond, In3, In4); + Lo = DAG.getNode(Opc, DL, HalfVT, Select2, Select1, NewShAmt); + Hi = DAG.getNode(Opc, DL, HalfVT, Select3, Select2, NewShAmt); } void DAGTypeLegalizer::ExpandIntRes_VSCALE(SDNode *N, SDValue &Lo, @@ -4297,7 +4514,7 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, if (CCCode == ISD::SETEQ || CCCode == ISD::SETNE) { if (RHSLo == RHSHi) { if (ConstantSDNode *RHSCST = dyn_cast<ConstantSDNode>(RHSLo)) { - if (RHSCST->isAllOnesValue()) { + if (RHSCST->isAllOnes()) { // Equality comparison to -1. NewLHS = DAG.getNode(ISD::AND, dl, LHSLo.getValueType(), LHSLo, LHSHi); @@ -4317,8 +4534,8 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, // If this is a comparison of the sign bit, just look at the top part. // X > -1, x < 0 if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(NewRHS)) - if ((CCCode == ISD::SETLT && CST->isNullValue()) || // X < 0 - (CCCode == ISD::SETGT && CST->isAllOnesValue())) { // X > -1 + if ((CCCode == ISD::SETLT && CST->isZero()) || // X < 0 + (CCCode == ISD::SETGT && CST->isAllOnes())) { // X > -1 NewLHS = LHSHi; NewRHS = RHSHi; return; @@ -4369,9 +4586,11 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, bool EqAllowed = (CCCode == ISD::SETLE || CCCode == ISD::SETGE || CCCode == ISD::SETUGE || CCCode == ISD::SETULE); - if ((EqAllowed && (HiCmpC && HiCmpC->isNullValue())) || - (!EqAllowed && ((HiCmpC && (HiCmpC->getAPIntValue() == 1)) || - (LoCmpC && LoCmpC->isNullValue())))) { + // FIXME: Is the HiCmpC->isOne() here correct for + // ZeroOrNegativeOneBooleanContent. + if ((EqAllowed && (HiCmpC && HiCmpC->isZero())) || + (!EqAllowed && + ((HiCmpC && HiCmpC->isOne()) || (LoCmpC && LoCmpC->isZero())))) { // For LE / GE, if high part is known false, ignore the low part. // For LT / GT: if low part is known false, return the high part. // if high part is known true, ignore the low part. @@ -4706,6 +4925,30 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) { SDValue InOp0 = N->getOperand(0); EVT InVT = InOp0.getValueType(); + // Try and extract from a smaller type so that it eventually falls + // into the promotion code below. + if (getTypeAction(InVT) == TargetLowering::TypeSplitVector || + getTypeAction(InVT) == TargetLowering::TypeLegal) { + EVT NInVT = InVT.getHalfNumVectorElementsVT(*DAG.getContext()); + unsigned NElts = NInVT.getVectorMinNumElements(); + uint64_t IdxVal = cast<ConstantSDNode>(BaseIdx)->getZExtValue(); + + SDValue Step1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NInVT, InOp0, + DAG.getConstant(alignDown(IdxVal, NElts), dl, + BaseIdx.getValueType())); + SDValue Step2 = DAG.getNode( + ISD::EXTRACT_SUBVECTOR, dl, OutVT, Step1, + DAG.getConstant(IdxVal % NElts, dl, BaseIdx.getValueType())); + return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, Step2); + } + + // Try and extract from a widened type. + if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) { + SDValue Ops[] = {GetWidenedVector(InOp0), BaseIdx}; + SDValue Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), OutVT, Ops); + return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, Ext); + } + // Promote operands and see if this is handled by target lowering, // Otherwise, use the BUILD_VECTOR approach below if (getTypeAction(InVT) == TargetLowering::TypePromoteInteger) { @@ -4873,11 +5116,46 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); assert(NOutVT.isVector() && "This type must be promoted to a vector type"); + unsigned NumOperands = N->getNumOperands(); + unsigned NumOutElem = NOutVT.getVectorMinNumElements(); EVT OutElemTy = NOutVT.getVectorElementType(); + if (OutVT.isScalableVector()) { + // Find the largest promoted element type for each of the operands. + SDUse *MaxSizedValue = std::max_element( + N->op_begin(), N->op_end(), [](const SDValue &A, const SDValue &B) { + EVT AVT = A.getValueType().getVectorElementType(); + EVT BVT = B.getValueType().getVectorElementType(); + return AVT.getScalarSizeInBits() < BVT.getScalarSizeInBits(); + }); + EVT MaxElementVT = MaxSizedValue->getValueType().getVectorElementType(); + + // Then promote all vectors to the largest element type. + SmallVector<SDValue, 8> Ops; + for (unsigned I = 0; I < NumOperands; ++I) { + SDValue Op = N->getOperand(I); + EVT OpVT = Op.getValueType(); + if (getTypeAction(OpVT) == TargetLowering::TypePromoteInteger) + Op = GetPromotedInteger(Op); + else + assert(getTypeAction(OpVT) == TargetLowering::TypeLegal && + "Unhandled legalization type"); + + if (OpVT.getVectorElementType().getScalarSizeInBits() < + MaxElementVT.getScalarSizeInBits()) + Op = DAG.getAnyExtOrTrunc(Op, dl, + OpVT.changeVectorElementType(MaxElementVT)); + Ops.push_back(Op); + } + + // Do the CONCAT on the promoted type and finally truncate to (the promoted) + // NOutVT. + return DAG.getAnyExtOrTrunc( + DAG.getNode(ISD::CONCAT_VECTORS, dl, + OutVT.changeVectorElementType(MaxElementVT), Ops), + dl, NOutVT); + } unsigned NumElem = N->getOperand(0).getValueType().getVectorNumElements(); - unsigned NumOutElem = NOutVT.getVectorNumElements(); - unsigned NumOperands = N->getNumOperands(); assert(NumElem * NumOperands == NumOutElem && "Unexpected number of elements"); @@ -4957,7 +5235,17 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VECREDUCE(SDNode *N) { // we can simply change the result type. SDLoc dl(N); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0)); + return DAG.getNode(N->getOpcode(), dl, NVT, N->ops()); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_VP_REDUCE(SDNode *N) { + // The VP_REDUCE result size may be larger than the element size, so we can + // simply change the result type. However the start value and result must be + // the same. + SDLoc DL(N); + SDValue Start = PromoteIntOpVectorReduction(N, N->getOperand(0)); + return DAG.getNode(N->getOpcode(), DL, Start.getValueType(), Start, + N->getOperand(1), N->getOperand(2), N->getOperand(3)); } SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) { @@ -4974,6 +5262,21 @@ SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) { return DAG.getAnyExtOrTrunc(Ext, dl, N->getValueType(0)); } +SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_SUBVECTOR(SDNode *N) { + SDLoc dl(N); + // The result type is equal to the first input operand's type, so the + // type that needs promoting must be the second source vector. + SDValue V0 = N->getOperand(0); + SDValue V1 = GetPromotedInteger(N->getOperand(1)); + SDValue Idx = N->getOperand(2); + EVT PromVT = EVT::getVectorVT(*DAG.getContext(), + V1.getValueType().getVectorElementType(), + V0.getValueType().getVectorElementCount()); + V0 = DAG.getAnyExtOrTrunc(V0, dl, PromVT); + SDValue Ext = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, PromVT, V0, V1, Idx); + return DAG.getAnyExtOrTrunc(Ext, dl, N->getValueType(0)); +} + SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N) { SDLoc dl(N); SDValue V0 = GetPromotedInteger(N->getOperand(0)); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 05a974af3b55..1f73c9eea104 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -223,8 +223,7 @@ bool DAGTypeLegalizer::run() { #endif PerformExpensiveChecks(); - SDNode *N = Worklist.back(); - Worklist.pop_back(); + SDNode *N = Worklist.pop_back_val(); assert(N->getNodeId() == ReadyToProcess && "Node should be ready if on worklist!"); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 8d17d8fc68b1..da282ecad282 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -289,6 +289,12 @@ private: return DAG.getZeroExtendInReg(Op, DL, OldVT); } + // Promote the given operand V (vector or scalar) according to N's specific + // reduction kind. N must be an integer VECREDUCE_* or VP_REDUCE_*. Returns + // the nominal extension opcode (ISD::(ANY|ZERO|SIGN)_EXTEND) and the + // promoted value. + SDValue PromoteIntOpVectorReduction(SDNode *N, SDValue V); + // Integer Result Promotion. void PromoteIntegerResult(SDNode *N, unsigned ResNo); SDValue PromoteIntRes_MERGE_VALUES(SDNode *N, unsigned ResNo); @@ -332,14 +338,14 @@ private: SDValue PromoteIntRes_VSELECT(SDNode *N); SDValue PromoteIntRes_SELECT_CC(SDNode *N); SDValue PromoteIntRes_SETCC(SDNode *N); - SDValue PromoteIntRes_SHL(SDNode *N); - SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N); - SDValue PromoteIntRes_ZExtIntBinOp(SDNode *N); - SDValue PromoteIntRes_SExtIntBinOp(SDNode *N); + SDValue PromoteIntRes_SHL(SDNode *N, bool IsVP); + SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N, bool IsVP); + SDValue PromoteIntRes_ZExtIntBinOp(SDNode *N, bool IsVP); + SDValue PromoteIntRes_SExtIntBinOp(SDNode *N, bool IsVP); SDValue PromoteIntRes_UMINUMAX(SDNode *N); SDValue PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N); - SDValue PromoteIntRes_SRA(SDNode *N); - SDValue PromoteIntRes_SRL(SDNode *N); + SDValue PromoteIntRes_SRA(SDNode *N, bool IsVP); + SDValue PromoteIntRes_SRL(SDNode *N, bool IsVP); SDValue PromoteIntRes_TRUNCATE(SDNode *N); SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo); SDValue PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo); @@ -353,6 +359,7 @@ private: SDValue PromoteIntRes_DIVFIX(SDNode *N); SDValue PromoteIntRes_FLT_ROUNDS(SDNode *N); SDValue PromoteIntRes_VECREDUCE(SDNode *N); + SDValue PromoteIntRes_VP_REDUCE(SDNode *N); SDValue PromoteIntRes_ABS(SDNode *N); SDValue PromoteIntRes_Rotate(SDNode *N); SDValue PromoteIntRes_FunnelShift(SDNode *N); @@ -369,6 +376,7 @@ private: SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N); + SDValue PromoteIntOp_INSERT_SUBVECTOR(SDNode *N); SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N); SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N); SDValue PromoteIntOp_SPLAT_VECTOR(SDNode *N); @@ -394,6 +402,7 @@ private: SDValue PromoteIntOp_FIX(SDNode *N); SDValue PromoteIntOp_FPOWI(SDNode *N); SDValue PromoteIntOp_VECREDUCE(SDNode *N); + SDValue PromoteIntOp_VP_REDUCE(SDNode *N, unsigned OpNo); SDValue PromoteIntOp_SET_ROUNDING(SDNode *N); void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code); @@ -518,6 +527,7 @@ private: SDValue SoftenFloatRes_Unary(SDNode *N, RTLIB::Libcall LC); SDValue SoftenFloatRes_Binary(SDNode *N, RTLIB::Libcall LC); SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_ARITH_FENCE(SDNode *N); SDValue SoftenFloatRes_BITCAST(SDNode *N); SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N); SDValue SoftenFloatRes_ConstantFP(SDNode *N); @@ -816,7 +826,7 @@ private: // Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>. void SplitVectorResult(SDNode *N, unsigned ResNo); - void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi, bool IsVP); void SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -898,6 +908,7 @@ private: SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N); SDValue WidenVecRes_EXTEND_VECTOR_INREG(SDNode* N); SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N); + SDValue WidenVecRes_INSERT_SUBVECTOR(SDNode *N); SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); SDValue WidenVecRes_LOAD(SDNode* N); SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N); @@ -912,7 +923,7 @@ private: SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N); SDValue WidenVecRes_Ternary(SDNode *N); - SDValue WidenVecRes_Binary(SDNode *N); + SDValue WidenVecRes_Binary(SDNode *N, bool IsVP); SDValue WidenVecRes_BinaryCanTrap(SDNode *N); SDValue WidenVecRes_BinaryWithExtraScalarOp(SDNode *N); SDValue WidenVecRes_StrictFP(SDNode *N); @@ -972,10 +983,10 @@ private: LoadSDNode *LD, ISD::LoadExtType ExtType); /// Helper function to generate a set of stores to store a widen vector into - /// non-widen memory. + /// non-widen memory. Returns true if successful, false otherwise. /// StChain: list of chains for the stores we have generated /// ST: store of a widen value - void GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST); + bool GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST); /// Modifies a vector input (widen or narrows) to a vector of NVT. The /// input vector must have the same element type as NVT. @@ -1011,6 +1022,7 @@ private: // Generic Result Splitting. void SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo, SDValue &Lo, SDValue &Hi); + void SplitRes_ARITH_FENCE (SDNode *N, SDValue &Lo, SDValue &Hi); void SplitRes_SELECT (SDNode *N, SDValue &Lo, SDValue &Hi); void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi); void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 81cc2bf10d25..3d3c9a2ad837 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -571,3 +571,13 @@ void DAGTypeLegalizer::SplitRes_FREEZE(SDNode *N, SDValue &Lo, SDValue &Hi) { Lo = DAG.getNode(ISD::FREEZE, dl, L.getValueType(), L); Hi = DAG.getNode(ISD::FREEZE, dl, H.getValueType(), H); } + +void DAGTypeLegalizer::SplitRes_ARITH_FENCE(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue L, H; + SDLoc DL(N); + GetSplitOp(N->getOperand(0), L, H); + + Lo = DAG.getNode(ISD::ARITH_FENCE, DL, L.getValueType(), L); + Hi = DAG.getNode(ISD::ARITH_FENCE, DL, H.getValueType(), H); +} diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index ebe3bfc4b75a..88a28a3be53e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -538,8 +538,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { return RecursivelyLegalizeResults(Op, ResultVals); } -// FIXME: This is very similar to the X86 override of -// TargetLowering::LowerOperationWrapper. Can we merge them somehow? +// FIXME: This is very similar to TargetLowering::LowerOperationWrapper. Can we +// merge them somehow? bool VectorLegalizer::LowerOperationWrapper(SDNode *Node, SmallVectorImpl<SDValue> &Results) { SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); @@ -774,8 +774,8 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { ExpandSETCC(Node, Results); return; case ISD::ABS: - if (TLI.expandABS(Node, Tmp, DAG)) { - Results.push_back(Tmp); + if (SDValue Expanded = TLI.expandABS(Node, DAG)) { + Results.push_back(Expanded); return; } break; @@ -783,22 +783,22 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { ExpandBITREVERSE(Node, Results); return; case ISD::CTPOP: - if (TLI.expandCTPOP(Node, Tmp, DAG)) { - Results.push_back(Tmp); + if (SDValue Expanded = TLI.expandCTPOP(Node, DAG)) { + Results.push_back(Expanded); return; } break; case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: - if (TLI.expandCTLZ(Node, Tmp, DAG)) { - Results.push_back(Tmp); + if (SDValue Expanded = TLI.expandCTLZ(Node, DAG)) { + Results.push_back(Expanded); return; } break; case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: - if (TLI.expandCTTZ(Node, Tmp, DAG)) { - Results.push_back(Tmp); + if (SDValue Expanded = TLI.expandCTTZ(Node, DAG)) { + Results.push_back(Expanded); return; } break; @@ -943,10 +943,8 @@ SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) { // What is the size of each element in the vector mask. EVT BitTy = MaskTy.getScalarType(); - Mask = DAG.getSelect(DL, BitTy, Mask, - DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), DL, - BitTy), - DAG.getConstant(0, DL, BitTy)); + Mask = DAG.getSelect(DL, BitTy, Mask, DAG.getAllOnesConstant(DL, BitTy), + DAG.getConstant(0, DL, BitTy)); // Broadcast the mask so that the entire vector is all one or all zero. if (VT.isFixedLengthVector()) @@ -960,9 +958,7 @@ SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) { Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1); Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2); - SDValue AllOnes = DAG.getConstant( - APInt::getAllOnesValue(BitTy.getSizeInBits()), DL, MaskTy); - SDValue NotMask = DAG.getNode(ISD::XOR, DL, MaskTy, Mask, AllOnes); + SDValue NotMask = DAG.getNOT(DL, Mask, MaskTy); Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask); Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask); @@ -1099,25 +1095,45 @@ static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) { SDValue VectorLegalizer::ExpandBSWAP(SDNode *Node) { EVT VT = Node->getValueType(0); + // Scalable vectors can't use shuffle expansion. + if (VT.isScalableVector()) + return TLI.expandBSWAP(Node, DAG); + // Generate a byte wise shuffle mask for the BSWAP. SmallVector<int, 16> ShuffleMask; createBSWAPShuffleMask(VT, ShuffleMask); EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size()); // Only emit a shuffle if the mask is legal. - if (!TLI.isShuffleMaskLegal(ShuffleMask, ByteVT)) - return DAG.UnrollVectorOp(Node); + if (TLI.isShuffleMaskLegal(ShuffleMask, ByteVT)) { + SDLoc DL(Node); + SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0)); + Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask); + return DAG.getNode(ISD::BITCAST, DL, VT, Op); + } - SDLoc DL(Node); - SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0)); - Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask); - return DAG.getNode(ISD::BITCAST, DL, VT, Op); + // If we have the appropriate vector bit operations, it is better to use them + // than unrolling and expanding each component. + if (TLI.isOperationLegalOrCustom(ISD::SHL, VT) && + TLI.isOperationLegalOrCustom(ISD::SRL, VT) && + TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) && + TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT)) + return TLI.expandBSWAP(Node, DAG); + + // Otherwise unroll. + return DAG.UnrollVectorOp(Node); } void VectorLegalizer::ExpandBITREVERSE(SDNode *Node, SmallVectorImpl<SDValue> &Results) { EVT VT = Node->getValueType(0); + // We can't unroll or use shuffles for scalable vectors. + if (VT.isScalableVector()) { + Results.push_back(TLI.expandBITREVERSE(Node, DAG)); + return; + } + // If we have the scalar operation, it's probably cheaper to unroll it. if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType())) { SDValue Tmp = DAG.UnrollVectorOp(Node); @@ -1156,9 +1172,10 @@ void VectorLegalizer::ExpandBITREVERSE(SDNode *Node, if (TLI.isOperationLegalOrCustom(ISD::SHL, VT) && TLI.isOperationLegalOrCustom(ISD::SRL, VT) && TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) && - TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT)) - // Let LegalizeDAG handle this later. + TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT)) { + Results.push_back(TLI.expandBITREVERSE(Node, DAG)); return; + } // Otherwise unroll. SDValue Tmp = DAG.UnrollVectorOp(Node); @@ -1207,9 +1224,7 @@ SDValue VectorLegalizer::ExpandVSELECT(SDNode *Node) { Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1); Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2); - SDValue AllOnes = DAG.getConstant( - APInt::getAllOnesValue(VT.getScalarSizeInBits()), DL, VT); - SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes); + SDValue NotMask = DAG.getNOT(DL, Mask, VT); Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask); Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask); @@ -1502,9 +1517,8 @@ void VectorLegalizer::UnrollStrictFPOp(SDNode *Node, if (Node->getOpcode() == ISD::STRICT_FSETCC || Node->getOpcode() == ISD::STRICT_FSETCCS) ScalarResult = DAG.getSelect(dl, EltVT, ScalarResult, - DAG.getConstant(APInt::getAllOnesValue - (EltVT.getSizeInBits()), dl, EltVT), - DAG.getConstant(0, dl, EltVT)); + DAG.getAllOnesConstant(dl, EltVT), + DAG.getConstant(0, dl, EltVT)); OpValues.push_back(ScalarResult); OpChains.push_back(ScalarChain); @@ -1536,9 +1550,7 @@ SDValue VectorLegalizer::UnrollVSETCC(SDNode *Node) { TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), TmpEltVT), LHSElem, RHSElem, CC); - Ops[i] = DAG.getSelect(dl, EltVT, Ops[i], - DAG.getConstant(APInt::getAllOnesValue - (EltVT.getSizeInBits()), dl, EltVT), + Ops[i] = DAG.getSelect(dl, EltVT, Ops[i], DAG.getAllOnesConstant(dl, EltVT), DAG.getConstant(0, dl, EltVT)); } return DAG.getBuildVector(VT, dl, Ops); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 91242bbf866f..539c9cb9c256 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -529,7 +529,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) { SDValue Arg = N->getOperand(2).getOperand(0); if (Arg.isUndef()) return DAG.getUNDEF(N->getValueType(0).getVectorElementType()); - unsigned Op = !cast<ConstantSDNode>(Arg)->isNullValue(); + unsigned Op = !cast<ConstantSDNode>(Arg)->isZero(); return GetScalarizedVector(N->getOperand(Op)); } @@ -1045,7 +1045,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::USHLSAT: case ISD::ROTL: case ISD::ROTR: - SplitVecRes_BinOp(N, Lo, Hi); + SplitVecRes_BinOp(N, Lo, Hi, /*IsVP*/ false); break; case ISD::FMA: case ISD::FSHL: @@ -1082,6 +1082,26 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::UDIVFIXSAT: SplitVecRes_FIX(N, Lo, Hi); break; + case ISD::VP_ADD: + case ISD::VP_AND: + case ISD::VP_MUL: + case ISD::VP_OR: + case ISD::VP_SUB: + case ISD::VP_XOR: + case ISD::VP_SHL: + case ISD::VP_LSHR: + case ISD::VP_ASHR: + case ISD::VP_SDIV: + case ISD::VP_UDIV: + case ISD::VP_SREM: + case ISD::VP_UREM: + case ISD::VP_FADD: + case ISD::VP_FSUB: + case ISD::VP_FMUL: + case ISD::VP_FDIV: + case ISD::VP_FREM: + SplitVecRes_BinOp(N, Lo, Hi, /*IsVP*/ true); + break; } // If Lo/Hi is null, the sub-method took care of registering results etc. @@ -1113,8 +1133,8 @@ void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT, } } -void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, - SDValue &Hi) { +void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi, + bool IsVP) { SDValue LHSLo, LHSHi; GetSplitVector(N->getOperand(0), LHSLo, LHSHi); SDValue RHSLo, RHSHi; @@ -1123,8 +1143,41 @@ void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, const SDNodeFlags Flags = N->getFlags(); unsigned Opcode = N->getOpcode(); - Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Flags); - Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Flags); + if (!IsVP) { + Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Flags); + Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Flags); + return; + } + + // Split the mask. + SDValue MaskLo, MaskHi; + SDValue Mask = N->getOperand(2); + EVT MaskVT = Mask.getValueType(); + if (getTypeAction(MaskVT) == TargetLowering::TypeSplitVector) + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, SDLoc(Mask)); + + // Split the vector length parameter. + // %evl -> umin(%evl, %halfnumelts) and usubsat(%evl - %halfnumelts). + SDValue EVL = N->getOperand(3); + EVT VecVT = N->getValueType(0); + EVT EVLVT = EVL.getValueType(); + assert(VecVT.getVectorElementCount().isKnownEven() && + "Expecting the mask to be an evenly-sized vector"); + unsigned HalfMinNumElts = VecVT.getVectorMinNumElements() / 2; + SDValue HalfNumElts = + VecVT.isFixedLengthVector() + ? DAG.getConstant(HalfMinNumElts, dl, EVLVT) + : DAG.getVScale(dl, EVLVT, + APInt(EVLVT.getScalarSizeInBits(), HalfMinNumElts)); + SDValue EVLLo = DAG.getNode(ISD::UMIN, dl, EVLVT, EVL, HalfNumElts); + SDValue EVLHi = DAG.getNode(ISD::USUBSAT, dl, EVLVT, EVL, HalfNumElts); + + Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), + {LHSLo, RHSLo, MaskLo, EVLLo}, Flags); + Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), + {LHSHi, RHSHi, MaskHi, EVLHi}, Flags); } void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, @@ -2985,6 +3038,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::BITCAST: Res = WidenVecRes_BITCAST(N); break; case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break; case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break; + case ISD::INSERT_SUBVECTOR: + Res = WidenVecRes_INSERT_SUBVECTOR(N); + break; case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break; case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; @@ -3035,7 +3091,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::USHLSAT: case ISD::ROTL: case ISD::ROTR: - Res = WidenVecRes_Binary(N); + Res = WidenVecRes_Binary(N, /*IsVP*/ false); break; case ISD::FADD: @@ -3159,6 +3215,31 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::FSHR: Res = WidenVecRes_Ternary(N); break; + case ISD::VP_ADD: + case ISD::VP_AND: + case ISD::VP_MUL: + case ISD::VP_OR: + case ISD::VP_SUB: + case ISD::VP_XOR: + case ISD::VP_SHL: + case ISD::VP_LSHR: + case ISD::VP_ASHR: + case ISD::VP_SDIV: + case ISD::VP_UDIV: + case ISD::VP_SREM: + case ISD::VP_UREM: + case ISD::VP_FADD: + case ISD::VP_FSUB: + case ISD::VP_FMUL: + case ISD::VP_FDIV: + case ISD::VP_FREM: + // Vector-predicated binary op widening. Note that -- unlike the + // unpredicated versions -- we don't have to worry about trapping on + // operations like UDIV, FADD, etc., as we pass on the original vector + // length parameter. This means the widened elements containing garbage + // aren't active. + Res = WidenVecRes_Binary(N, /*IsVP*/ true); + break; } // If Res is null, the sub-method took care of registering the result. @@ -3176,13 +3257,31 @@ SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) { return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3); } -SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { +SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N, bool IsVP) { // Binary op widening. SDLoc dl(N); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp1 = GetWidenedVector(N->getOperand(0)); SDValue InOp2 = GetWidenedVector(N->getOperand(1)); - return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, N->getFlags()); + if (!IsVP) + return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, + N->getFlags()); + // For VP operations, we must also widen the mask. Note that the mask type + // may not actually need widening, leading it be split along with the VP + // operation. + // FIXME: This could lead to an infinite split/widen loop. We only handle the + // case where the mask needs widening to an identically-sized type as the + // vector inputs. + SDValue Mask = N->getOperand(2); + assert(getTypeAction(Mask.getValueType()) == + TargetLowering::TypeWidenVector && + "Unable to widen binary VP op"); + Mask = GetWidenedVector(Mask); + assert(Mask.getValueType().getVectorElementCount() == + WidenVT.getVectorElementCount() && + "Unable to widen binary VP op"); + return DAG.getNode(N->getOpcode(), dl, WidenVT, + {InOp1, InOp2, Mask, N->getOperand(3)}, N->getFlags()); } SDValue DAGTypeLegalizer::WidenVecRes_BinaryWithExtraScalarOp(SDNode *N) { @@ -3527,7 +3626,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { SDLoc DL(N); EVT WidenVT = TLI.getTypeToTransformTo(Ctx, N->getValueType(0)); - unsigned WidenNumElts = WidenVT.getVectorNumElements(); + ElementCount WidenEC = WidenVT.getVectorElementCount(); EVT InVT = InOp.getValueType(); @@ -3547,14 +3646,14 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { } EVT InEltVT = InVT.getVectorElementType(); - EVT InWidenVT = EVT::getVectorVT(Ctx, InEltVT, WidenNumElts); - unsigned InVTNumElts = InVT.getVectorNumElements(); + EVT InWidenVT = EVT::getVectorVT(Ctx, InEltVT, WidenEC); + ElementCount InVTEC = InVT.getVectorElementCount(); if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) { InOp = GetWidenedVector(N->getOperand(0)); InVT = InOp.getValueType(); - InVTNumElts = InVT.getVectorNumElements(); - if (InVTNumElts == WidenNumElts) { + InVTEC = InVT.getVectorElementCount(); + if (InVTEC == WidenEC) { if (N->getNumOperands() == 1) return DAG.getNode(Opcode, DL, WidenVT, InOp); return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1), Flags); @@ -3578,9 +3677,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { // it an illegal type that might lead to repeatedly splitting the input // and then widening it. To avoid this, we widen the input only if // it results in a legal type. - if (WidenNumElts % InVTNumElts == 0) { + if (WidenEC.isKnownMultipleOf(InVTEC.getKnownMinValue())) { // Widen the input and call convert on the widened input vector. - unsigned NumConcat = WidenNumElts/InVTNumElts; + unsigned NumConcat = + WidenEC.getKnownMinValue() / InVTEC.getKnownMinValue(); SmallVector<SDValue, 16> Ops(NumConcat, DAG.getUNDEF(InVT)); Ops[0] = InOp; SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops); @@ -3589,7 +3689,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1), Flags); } - if (InVTNumElts % WidenNumElts == 0) { + if (InVTEC.isKnownMultipleOf(WidenEC.getKnownMinValue())) { SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, InOp, DAG.getVectorIdxConstant(0, DL)); // Extract the input and convert the shorten input vector. @@ -3601,7 +3701,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { // Otherwise unroll into some nasty scalar code and rebuild the vector. EVT EltVT = WidenVT.getVectorElementType(); - SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT)); + SmallVector<SDValue, 16> Ops(WidenEC.getFixedValue(), DAG.getUNDEF(EltVT)); // Use the original element count so we don't do more scalar opts than // necessary. unsigned MinElts = N->getValueType(0).getVectorNumElements(); @@ -3962,14 +4062,26 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { return DAG.getBuildVector(WidenVT, dl, Ops); } +SDValue DAGTypeLegalizer::WidenVecRes_INSERT_SUBVECTOR(SDNode *N) { + EVT VT = N->getValueType(0); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = N->getOperand(1); + SDValue Idx = N->getOperand(2); + SDLoc dl(N); + return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WidenVT, InOp1, InOp2, Idx); +} + SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { EVT VT = N->getValueType(0); + EVT EltVT = VT.getVectorElementType(); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); SDValue InOp = N->getOperand(0); SDValue Idx = N->getOperand(1); SDLoc dl(N); - if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector) + auto InOpTypeAction = getTypeAction(InOp.getValueType()); + if (InOpTypeAction == TargetLowering::TypeWidenVector) InOp = GetWidenedVector(InOp); EVT InVT = InOp.getValueType(); @@ -3979,20 +4091,49 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { if (IdxVal == 0 && InVT == WidenVT) return InOp; - if (VT.isScalableVector()) - report_fatal_error("Don't know how to widen the result of " - "EXTRACT_SUBVECTOR for scalable vectors"); - // Check if we can extract from the vector. - unsigned WidenNumElts = WidenVT.getVectorNumElements(); - unsigned InNumElts = InVT.getVectorNumElements(); + unsigned WidenNumElts = WidenVT.getVectorMinNumElements(); + unsigned InNumElts = InVT.getVectorMinNumElements(); if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts) return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx); + if (VT.isScalableVector()) { + // Try to split the operation up into smaller extracts and concat the + // results together, e.g. + // nxv6i64 extract_subvector(nxv12i64, 6) + // <-> + // nxv8i64 concat( + // nxv2i64 extract_subvector(nxv16i64, 6) + // nxv2i64 extract_subvector(nxv16i64, 8) + // nxv2i64 extract_subvector(nxv16i64, 10) + // undef) + unsigned VTNElts = VT.getVectorMinNumElements(); + unsigned GCD = greatestCommonDivisor(VTNElts, WidenNumElts); + assert((IdxVal % GCD) == 0 && "Expected Idx to be a multiple of the broken " + "down type's element count"); + EVT PartVT = EVT::getVectorVT(*DAG.getContext(), EltVT, + ElementCount::getScalable(GCD)); + // Avoid recursion around e.g. nxv1i8. + if (getTypeAction(PartVT) != TargetLowering::TypeWidenVector) { + SmallVector<SDValue> Parts; + unsigned I = 0; + for (; I < VTNElts / GCD; ++I) + Parts.push_back( + DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, PartVT, InOp, + DAG.getVectorIdxConstant(IdxVal + I * GCD, dl))); + for (; I < WidenNumElts / GCD; ++I) + Parts.push_back(DAG.getUNDEF(PartVT)); + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Parts); + } + + report_fatal_error("Don't know how to widen the result of " + "EXTRACT_SUBVECTOR for scalable vectors"); + } + // We could try widening the input to the right length but for now, extract // the original elements, fill the rest with undefs and build a vector. SmallVector<SDValue, 16> Ops(WidenNumElts); - EVT EltVT = VT.getVectorElementType(); unsigned NumElts = VT.getVectorNumElements(); unsigned i; for (i = 0; i < NumElts; ++i) @@ -4037,20 +4178,55 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) { else Result = GenWidenVectorLoads(LdChain, LD); - // If we generate a single load, we can use that for the chain. Otherwise, - // build a factor node to remember the multiple loads are independent and - // chain to that. - SDValue NewChain; - if (LdChain.size() == 1) - NewChain = LdChain[0]; - else - NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain); + if (Result) { + // If we generate a single load, we can use that for the chain. Otherwise, + // build a factor node to remember the multiple loads are independent and + // chain to that. + SDValue NewChain; + if (LdChain.size() == 1) + NewChain = LdChain[0]; + else + NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain); - // Modified the chain - switch anything that used the old chain to use - // the new one. - ReplaceValueWith(SDValue(N, 1), NewChain); + // Modified the chain - switch anything that used the old chain to use + // the new one. + ReplaceValueWith(SDValue(N, 1), NewChain); - return Result; + return Result; + } + + // Generate a vector-predicated load if it is custom/legal on the target. To + // avoid possible recursion, only do this if the widened mask type is legal. + // FIXME: Not all targets may support EVL in VP_LOAD. These will have been + // removed from the IR by the ExpandVectorPredication pass but we're + // reintroducing them here. + EVT LdVT = LD->getMemoryVT(); + EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), LdVT); + EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, + WideVT.getVectorElementCount()); + if (ExtType == ISD::NON_EXTLOAD && WideVT.isScalableVector() && + TLI.isOperationLegalOrCustom(ISD::VP_LOAD, WideVT) && + TLI.isTypeLegal(WideMaskVT)) { + SDLoc DL(N); + SDValue Mask = DAG.getAllOnesConstant(DL, WideMaskVT); + MVT EVLVT = TLI.getVPExplicitVectorLengthTy(); + unsigned NumVTElts = LdVT.getVectorMinNumElements(); + SDValue EVL = + DAG.getVScale(DL, EVLVT, APInt(EVLVT.getScalarSizeInBits(), NumVTElts)); + const auto *MMO = LD->getMemOperand(); + SDValue NewLoad = + DAG.getLoadVP(WideVT, DL, LD->getChain(), LD->getBasePtr(), Mask, EVL, + MMO->getPointerInfo(), MMO->getAlign(), MMO->getFlags(), + MMO->getAAInfo()); + + // Modified the chain - switch anything that used the old chain to use + // the new one. + ReplaceValueWith(SDValue(N, 1), NewLoad.getValue(1)); + + return NewLoad; + } + + report_fatal_error("Unable to widen vector load"); } SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) { @@ -4351,7 +4527,7 @@ SDValue DAGTypeLegalizer::WidenVSELECTMask(SDNode *N) { SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - unsigned WidenNumElts = WidenVT.getVectorNumElements(); + ElementCount WidenEC = WidenVT.getVectorElementCount(); SDValue Cond1 = N->getOperand(0); EVT CondVT = Cond1.getValueType(); @@ -4365,8 +4541,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) { } EVT CondEltVT = CondVT.getVectorElementType(); - EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(), - CondEltVT, WidenNumElts); + EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(), CondEltVT, WidenEC); if (getTypeAction(CondVT) == TargetLowering::TypeWidenVector) Cond1 = GetWidenedVector(Cond1); @@ -4891,12 +5066,42 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { return TLI.scalarizeVectorStore(ST, DAG); SmallVector<SDValue, 16> StChain; - GenWidenVectorStores(StChain, ST); + if (GenWidenVectorStores(StChain, ST)) { + if (StChain.size() == 1) + return StChain[0]; - if (StChain.size() == 1) - return StChain[0]; - else return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain); + } + + // Generate a vector-predicated store if it is custom/legal on the target. + // To avoid possible recursion, only do this if the widened mask type is + // legal. + // FIXME: Not all targets may support EVL in VP_STORE. These will have been + // removed from the IR by the ExpandVectorPredication pass but we're + // reintroducing them here. + SDValue StVal = ST->getValue(); + EVT StVT = StVal.getValueType(); + EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), StVT); + EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, + WideVT.getVectorElementCount()); + if (WideVT.isScalableVector() && + TLI.isOperationLegalOrCustom(ISD::VP_STORE, WideVT) && + TLI.isTypeLegal(WideMaskVT)) { + // Widen the value. + SDLoc DL(N); + StVal = GetWidenedVector(StVal); + SDValue Mask = DAG.getAllOnesConstant(DL, WideMaskVT); + MVT EVLVT = TLI.getVPExplicitVectorLengthTy(); + unsigned NumVTElts = StVT.getVectorMinNumElements(); + SDValue EVL = + DAG.getVScale(DL, EVLVT, APInt(EVLVT.getScalarSizeInBits(), NumVTElts)); + const auto *MMO = ST->getMemOperand(); + return DAG.getStoreVP(ST->getChain(), DL, StVal, ST->getBasePtr(), Mask, + EVL, MMO->getPointerInfo(), MMO->getAlign(), + MMO->getFlags(), MMO->getAAInfo()); + } + + report_fatal_error("Unable to widen vector store"); } SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) { @@ -5147,9 +5352,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_VSELECT(SDNode *N) { // Align: If 0, don't allow use of a wider type // WidenEx: If Align is not 0, the amount additional we can load/store from. -static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, - unsigned Width, EVT WidenVT, - unsigned Align = 0, unsigned WidenEx = 0) { +static Optional<EVT> findMemType(SelectionDAG &DAG, const TargetLowering &TLI, + unsigned Width, EVT WidenVT, + unsigned Align = 0, unsigned WidenEx = 0) { EVT WidenEltVT = WidenVT.getVectorElementType(); const bool Scalable = WidenVT.isScalableVector(); unsigned WidenWidth = WidenVT.getSizeInBits().getKnownMinSize(); @@ -5204,9 +5409,11 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, } } + // Using element-wise loads and stores for widening operations is not + // supported for scalable vectors if (Scalable) - report_fatal_error("Using element-wise loads and stores for widening " - "operations is not supported for scalable vectors"); + return None; + return RetVT; } @@ -5266,32 +5473,63 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, TypeSize WidthDiff = WidenWidth - LdWidth; // Allow wider loads if they are sufficiently aligned to avoid memory faults // and if the original load is simple. - unsigned LdAlign = (!LD->isSimple()) ? 0 : LD->getAlignment(); + unsigned LdAlign = + (!LD->isSimple() || LdVT.isScalableVector()) ? 0 : LD->getAlignment(); // Find the vector type that can load from. - EVT NewVT = FindMemType(DAG, TLI, LdWidth.getKnownMinSize(), WidenVT, LdAlign, - WidthDiff.getKnownMinSize()); - TypeSize NewVTWidth = NewVT.getSizeInBits(); - SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(), + Optional<EVT> FirstVT = + findMemType(DAG, TLI, LdWidth.getKnownMinSize(), WidenVT, LdAlign, + WidthDiff.getKnownMinSize()); + + if (!FirstVT) + return SDValue(); + + SmallVector<EVT, 8> MemVTs; + TypeSize FirstVTWidth = FirstVT->getSizeInBits(); + + // Unless we're able to load in one instruction we must work out how to load + // the remainder. + if (!TypeSize::isKnownLE(LdWidth, FirstVTWidth)) { + Optional<EVT> NewVT = FirstVT; + TypeSize RemainingWidth = LdWidth; + TypeSize NewVTWidth = FirstVTWidth; + do { + RemainingWidth -= NewVTWidth; + if (TypeSize::isKnownLT(RemainingWidth, NewVTWidth)) { + // The current type we are using is too large. Find a better size. + NewVT = findMemType(DAG, TLI, RemainingWidth.getKnownMinSize(), WidenVT, + LdAlign, WidthDiff.getKnownMinSize()); + if (!NewVT) + return SDValue(); + NewVTWidth = NewVT->getSizeInBits(); + } + MemVTs.push_back(*NewVT); + } while (TypeSize::isKnownGT(RemainingWidth, NewVTWidth)); + } + + SDValue LdOp = DAG.getLoad(*FirstVT, dl, Chain, BasePtr, LD->getPointerInfo(), LD->getOriginalAlign(), MMOFlags, AAInfo); LdChain.push_back(LdOp.getValue(1)); // Check if we can load the element with one instruction. - if (TypeSize::isKnownLE(LdWidth, NewVTWidth)) { - if (!NewVT.isVector()) { - unsigned NumElts = WidenWidth.getFixedSize() / NewVTWidth.getFixedSize(); - EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts); + if (MemVTs.empty()) { + assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth)); + if (!FirstVT->isVector()) { + unsigned NumElts = + WidenWidth.getFixedSize() / FirstVTWidth.getFixedSize(); + EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), *FirstVT, NumElts); SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp); return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp); } - if (NewVT == WidenVT) + if (FirstVT == WidenVT) return LdOp; // TODO: We don't currently have any tests that exercise this code path. - assert(WidenWidth.getFixedSize() % NewVTWidth.getFixedSize() == 0); - unsigned NumConcat = WidenWidth.getFixedSize() / NewVTWidth.getFixedSize(); + assert(WidenWidth.getFixedSize() % FirstVTWidth.getFixedSize() == 0); + unsigned NumConcat = + WidenWidth.getFixedSize() / FirstVTWidth.getFixedSize(); SmallVector<SDValue, 16> ConcatOps(NumConcat); - SDValue UndefVal = DAG.getUNDEF(NewVT); + SDValue UndefVal = DAG.getUNDEF(*FirstVT); ConcatOps[0] = LdOp; for (unsigned i = 1; i != NumConcat; ++i) ConcatOps[i] = UndefVal; @@ -5304,28 +5542,22 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, uint64_t ScaledOffset = 0; MachinePointerInfo MPI = LD->getPointerInfo(); - do { - LdWidth -= NewVTWidth; - IncrementPointer(cast<LoadSDNode>(LdOp), NewVT, MPI, BasePtr, - &ScaledOffset); - if (TypeSize::isKnownLT(LdWidth, NewVTWidth)) { - // The current type we are using is too large. Find a better size. - NewVT = FindMemType(DAG, TLI, LdWidth.getKnownMinSize(), WidenVT, LdAlign, - WidthDiff.getKnownMinSize()); - NewVTWidth = NewVT.getSizeInBits(); - } + // First incremement past the first load. + IncrementPointer(cast<LoadSDNode>(LdOp), *FirstVT, MPI, BasePtr, + &ScaledOffset); + for (EVT MemVT : MemVTs) { Align NewAlign = ScaledOffset == 0 ? LD->getOriginalAlign() : commonAlignment(LD->getAlign(), ScaledOffset); SDValue L = - DAG.getLoad(NewVT, dl, Chain, BasePtr, MPI, NewAlign, MMOFlags, AAInfo); - LdChain.push_back(L.getValue(1)); + DAG.getLoad(MemVT, dl, Chain, BasePtr, MPI, NewAlign, MMOFlags, AAInfo); LdOps.push_back(L); - LdOp = L; - } while (TypeSize::isKnownGT(LdWidth, NewVTWidth)); + LdChain.push_back(L.getValue(1)); + IncrementPointer(cast<LoadSDNode>(L), MemVT, MPI, BasePtr, &ScaledOffset); + } // Build the vector from the load operations. unsigned End = LdOps.size(); @@ -5447,7 +5679,7 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain, return DAG.getBuildVector(WidenVT, dl, Ops); } -void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, +bool DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST) { // The strategy assumes that we can efficiently store power-of-two widths. // The routine chops the vector into the largest vector stores with the same @@ -5473,9 +5705,30 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, MachinePointerInfo MPI = ST->getPointerInfo(); uint64_t ScaledOffset = 0; + + // A breakdown of how to widen this vector store. Each element of the vector + // is a memory VT combined with the number of times it is to be stored to, + // e,g., v5i32 -> {{v2i32,2},{i32,1}} + SmallVector<std::pair<EVT, unsigned>, 4> MemVTs; + while (StWidth.isNonZero()) { // Find the largest vector type we can store with. - EVT NewVT = FindMemType(DAG, TLI, StWidth.getKnownMinSize(), ValVT); + Optional<EVT> NewVT = + findMemType(DAG, TLI, StWidth.getKnownMinSize(), ValVT); + if (!NewVT) + return false; + MemVTs.push_back({*NewVT, 0}); + TypeSize NewVTWidth = NewVT->getSizeInBits(); + + do { + StWidth -= NewVTWidth; + MemVTs.back().second++; + } while (StWidth.isNonZero() && TypeSize::isKnownGE(StWidth, NewVTWidth)); + } + + for (const auto &Pair : MemVTs) { + EVT NewVT = Pair.first; + unsigned Count = Pair.second; TypeSize NewVTWidth = NewVT.getSizeInBits(); if (NewVT.isVector()) { @@ -5490,12 +5743,10 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, MMOFlags, AAInfo); StChain.push_back(PartStore); - StWidth -= NewVTWidth; Idx += NumVTElts; - IncrementPointer(cast<StoreSDNode>(PartStore), NewVT, MPI, BasePtr, &ScaledOffset); - } while (StWidth.isNonZero() && TypeSize::isKnownGE(StWidth, NewVTWidth)); + } while (--Count); } else { // Cast the vector to the scalar type we can store. unsigned NumElts = ValWidth.getFixedSize() / NewVTWidth.getFixedSize(); @@ -5511,13 +5762,14 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, MMOFlags, AAInfo); StChain.push_back(PartStore); - StWidth -= NewVTWidth; IncrementPointer(cast<StoreSDNode>(PartStore), NewVT, MPI, BasePtr); - } while (StWidth.isNonZero() && TypeSize::isKnownGE(StWidth, NewVTWidth)); + } while (--Count); // Restore index back to be relative to the original widen element type. Idx = Idx * NewVTWidth.getFixedSize() / ValEltWidth; } } + + return true; } /// Modifies a vector input (widen or narrows) to a vector of NVT. The diff --git a/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h index 75b4242a415c..f64b332a7fef 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h +++ b/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h @@ -192,7 +192,7 @@ public: // Returns the SDNodes which this SDDbgValue depends on. SmallVector<SDNode *> getSDNodes() const { SmallVector<SDNode *> Dependencies; - for (SDDbgOperand DbgOp : getLocationOps()) + for (const SDDbgOperand &DbgOp : getLocationOps()) if (DbgOp.getKind() == SDDbgOperand::SDNODE) Dependencies.push_back(DbgOp.getSDNode()); for (SDNode *Node : getAdditionalDependencies()) diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 0022e5ec31f0..1b89864116cb 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -56,9 +56,7 @@ namespace { SUnit *pop() { if (empty()) return nullptr; - SUnit *V = Queue.back(); - Queue.pop_back(); - return V; + return Queue.pop_back_val(); } }; diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index b2a8c8bdd78c..95f7e43b151d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -384,13 +384,12 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { // There are either zero or one users of the Glue result. bool HasGlueUse = false; - for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); - UI != E; ++UI) - if (GlueVal.isOperandOf(*UI)) { + for (SDNode *U : N->uses()) + if (GlueVal.isOperandOf(U)) { HasGlueUse = true; assert(N->getNodeId() == -1 && "Node already inserted!"); N->setNodeId(NodeSUnit->NodeNum); - N = *UI; + N = U; if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall()) NodeSUnit->isCall = true; break; @@ -742,7 +741,7 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, /// Returns true if \p DV has any VReg operand locations which don't exist in /// VRBaseMap. auto HasUnknownVReg = [&VRBaseMap](SDDbgValue *DV) { - for (SDDbgOperand L : DV->getLocationOps()) { + for (const SDDbgOperand &L : DV->getLocationOps()) { if (L.getKind() == SDDbgOperand::SDNODE && VRBaseMap.count({L.getSDNode(), L.getResNo()}) == 0) return true; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 2a98464425c4..008665d50233 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -28,6 +28,7 @@ #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -175,7 +176,7 @@ bool ISD::isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly) { if (!BuildVectorOnly && N->getOpcode() == ISD::SPLAT_VECTOR) { APInt SplatVal; - return isConstantSplatVector(N, SplatVal) && SplatVal.isAllOnesValue(); + return isConstantSplatVector(N, SplatVal) && SplatVal.isAllOnes(); } if (N->getOpcode() != ISD::BUILD_VECTOR) return false; @@ -224,7 +225,7 @@ bool ISD::isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly) { if (!BuildVectorOnly && N->getOpcode() == ISD::SPLAT_VECTOR) { APInt SplatVal; - return isConstantSplatVector(N, SplatVal) && SplatVal.isNullValue(); + return isConstantSplatVector(N, SplatVal) && SplatVal.isZero(); } if (N->getOpcode() != ISD::BUILD_VECTOR) return false; @@ -412,6 +413,28 @@ bool ISD::isVPOpcode(unsigned Opcode) { } } +bool ISD::isVPBinaryOp(unsigned Opcode) { + switch (Opcode) { + default: + return false; +#define PROPERTY_VP_BINARYOP_SDNODE(SDOPC) \ + case ISD::SDOPC: \ + return true; +#include "llvm/IR/VPIntrinsics.def" + } +} + +bool ISD::isVPReduction(unsigned Opcode) { + switch (Opcode) { + default: + return false; +#define PROPERTY_VP_REDUCTION_SDNODE(SDOPC) \ + case ISD::SDOPC: \ + return true; +#include "llvm/IR/VPIntrinsics.def" + } +} + /// The operand position of the vector mask. Optional<unsigned> ISD::getVPMaskIdx(unsigned Opcode) { switch (Opcode) { @@ -683,6 +706,34 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddInteger(ST->getPointerInfo().getAddrSpace()); break; } + case ISD::VP_LOAD: { + const VPLoadSDNode *ELD = cast<VPLoadSDNode>(N); + ID.AddInteger(ELD->getMemoryVT().getRawBits()); + ID.AddInteger(ELD->getRawSubclassData()); + ID.AddInteger(ELD->getPointerInfo().getAddrSpace()); + break; + } + case ISD::VP_STORE: { + const VPStoreSDNode *EST = cast<VPStoreSDNode>(N); + ID.AddInteger(EST->getMemoryVT().getRawBits()); + ID.AddInteger(EST->getRawSubclassData()); + ID.AddInteger(EST->getPointerInfo().getAddrSpace()); + break; + } + case ISD::VP_GATHER: { + const VPGatherSDNode *EG = cast<VPGatherSDNode>(N); + ID.AddInteger(EG->getMemoryVT().getRawBits()); + ID.AddInteger(EG->getRawSubclassData()); + ID.AddInteger(EG->getPointerInfo().getAddrSpace()); + break; + } + case ISD::VP_SCATTER: { + const VPScatterSDNode *ES = cast<VPScatterSDNode>(N); + ID.AddInteger(ES->getMemoryVT().getRawBits()); + ID.AddInteger(ES->getRawSubclassData()); + ID.AddInteger(ES->getPointerInfo().getAddrSpace()); + break; + } case ISD::MLOAD: { const MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N); ID.AddInteger(MLD->getMemoryVT().getRawBits()); @@ -1319,10 +1370,7 @@ SDValue SelectionDAG::getPtrExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) { /// getNOT - Create a bitwise NOT operation as (XOR Val, -1). SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) { - EVT EltVT = VT.getScalarType(); - SDValue NegOne = - getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), DL, VT); - return getNode(ISD::XOR, DL, VT, Val, NegOne); + return getNode(ISD::XOR, DL, VT, Val, getAllOnesConstant(DL, VT)); } SDValue SelectionDAG::getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT) { @@ -1901,7 +1949,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, if (SameNumElts) return N1; if (auto *C = dyn_cast<ConstantSDNode>(Splat)) - if (C->isNullValue()) + if (C->isZero()) return N1; } @@ -2265,19 +2313,8 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2, if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1)) { const APInt &C1 = N1C->getAPIntValue(); - switch (Cond) { - default: llvm_unreachable("Unknown integer setcc!"); - case ISD::SETEQ: return getBoolConstant(C1 == C2, dl, VT, OpVT); - case ISD::SETNE: return getBoolConstant(C1 != C2, dl, VT, OpVT); - case ISD::SETULT: return getBoolConstant(C1.ult(C2), dl, VT, OpVT); - case ISD::SETUGT: return getBoolConstant(C1.ugt(C2), dl, VT, OpVT); - case ISD::SETULE: return getBoolConstant(C1.ule(C2), dl, VT, OpVT); - case ISD::SETUGE: return getBoolConstant(C1.uge(C2), dl, VT, OpVT); - case ISD::SETLT: return getBoolConstant(C1.slt(C2), dl, VT, OpVT); - case ISD::SETGT: return getBoolConstant(C1.sgt(C2), dl, VT, OpVT); - case ISD::SETLE: return getBoolConstant(C1.sle(C2), dl, VT, OpVT); - case ISD::SETGE: return getBoolConstant(C1.sge(C2), dl, VT, OpVT); - } + return getBoolConstant(ICmpInst::compare(C1, C2, getICmpCondCode(Cond)), + dl, VT, OpVT); } } @@ -2380,7 +2417,7 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits) { return SDValue(); APInt DemandedElts = VT.isVector() - ? APInt::getAllOnesValue(VT.getVectorNumElements()) + ? APInt::getAllOnes(VT.getVectorNumElements()) : APInt(1, 1); return GetDemandedBits(V, DemandedBits, DemandedElts); } @@ -2475,7 +2512,7 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, switch (V.getOpcode()) { case ISD::SPLAT_VECTOR: UndefElts = V.getOperand(0).isUndef() - ? APInt::getAllOnesValue(DemandedElts.getBitWidth()) + ? APInt::getAllOnes(DemandedElts.getBitWidth()) : APInt(DemandedElts.getBitWidth(), 0); return true; case ISD::ADD: @@ -2507,7 +2544,7 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, unsigned NumElts = VT.getVectorNumElements(); assert(NumElts == DemandedElts.getBitWidth() && "Vector size mismatch"); - UndefElts = APInt::getNullValue(NumElts); + UndefElts = APInt::getZero(NumElts); switch (V.getOpcode()) { case ISD::BUILD_VECTOR: { @@ -2576,7 +2613,7 @@ bool SelectionDAG::isSplatValue(SDValue V, bool AllowUndefs) { // For now we don't support this with scalable vectors. if (!VT.isScalableVector()) - DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); + DemandedElts = APInt::getAllOnes(VT.getVectorNumElements()); return isSplatValue(V, DemandedElts, UndefElts) && (AllowUndefs || !UndefElts); } @@ -2592,7 +2629,7 @@ SDValue SelectionDAG::getSplatSourceVector(SDValue V, int &SplatIdx) { APInt DemandedElts; if (!VT.isScalableVector()) - DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); + DemandedElts = APInt::getAllOnes(VT.getVectorNumElements()); if (isSplatValue(V, DemandedElts, UndefElts)) { if (VT.isScalableVector()) { @@ -2740,7 +2777,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, unsigned Depth) const { } APInt DemandedElts = VT.isVector() - ? APInt::getAllOnesValue(VT.getVectorNumElements()) + ? APInt::getAllOnes(VT.getVectorNumElements()) : APInt(1, 1); return computeKnownBits(Op, DemandedElts, Depth); } @@ -2878,7 +2915,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); APInt DemandedSrcElts = DemandedElts; - DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx); + DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx); Known.One.setAllBits(); Known.Zero.setAllBits(); @@ -2965,11 +3002,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, // bits from the overlapping larger input elements and extracting the // sub sections we actually care about. unsigned SubScale = SubBitWidth / BitWidth; - APInt SubDemandedElts(NumElts / SubScale, 0); - for (unsigned i = 0; i != NumElts; ++i) - if (DemandedElts[i]) - SubDemandedElts.setBit(i / SubScale); - + APInt SubDemandedElts = + APIntOps::ScaleBitMask(DemandedElts, NumElts / SubScale); Known2 = computeKnownBits(N0, SubDemandedElts, Depth + 1); Known.Zero.setAllBits(); Known.One.setAllBits(); @@ -3415,7 +3449,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, // If we know the element index, just demand that vector element, else for // an unknown element index, ignore DemandedElts and demand them all. - APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts); + APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts); auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo); if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts)) DemandedSrcElts = @@ -3647,6 +3681,12 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const { })) return true; + // Is the operand of a splat vector a constant power of two? + if (Val.getOpcode() == ISD::SPLAT_VECTOR) + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val->getOperand(0))) + if (C->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2()) + return true; + // More could be done here, though the above checks are enough // to handle some common cases. @@ -3663,7 +3703,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const { return 1; APInt DemandedElts = VT.isVector() - ? APInt::getAllOnesValue(VT.getVectorNumElements()) + ? APInt::getAllOnes(VT.getVectorNumElements()) : APInt(1, 1); return ComputeNumSignBits(Op, DemandedElts, Depth); } @@ -3771,10 +3811,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, assert(VT.isVector() && "Expected bitcast to vector"); unsigned Scale = SrcBits / VTBits; - APInt SrcDemandedElts(NumElts / Scale, 0); - for (unsigned i = 0; i != NumElts; ++i) - if (DemandedElts[i]) - SrcDemandedElts.setBit(i / Scale); + APInt SrcDemandedElts = + APIntOps::ScaleBitMask(DemandedElts, NumElts / Scale); // Fast case - sign splat can be simply split across the small elements. Tmp = ComputeNumSignBits(N0, SrcDemandedElts, Depth + 1); @@ -3946,13 +3984,13 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, // Special case decrementing a value (ADD X, -1): if (ConstantSDNode *CRHS = isConstOrConstSplat(Op.getOperand(1), DemandedElts)) - if (CRHS->isAllOnesValue()) { + if (CRHS->isAllOnes()) { KnownBits Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. - if ((Known.Zero | 1).isAllOnesValue()) + if ((Known.Zero | 1).isAllOnes()) return VTBits; // If we are subtracting one from a positive number, there is no carry @@ -3971,12 +4009,12 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, // Handle NEG. if (ConstantSDNode *CLHS = isConstOrConstSplat(Op.getOperand(0), DemandedElts)) - if (CLHS->isNullValue()) { + if (CLHS->isZero()) { KnownBits Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. - if ((Known.Zero | 1).isAllOnesValue()) + if ((Known.Zero | 1).isAllOnes()) return VTBits; // If the input is known to be positive (the sign bit is known clear), @@ -4080,7 +4118,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, // If we know the element index, just demand that vector element, else for // an unknown element index, ignore DemandedElts and demand them all. - APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts); + APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts); auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo); if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts)) DemandedSrcElts = @@ -4126,7 +4164,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); APInt DemandedSrcElts = DemandedElts; - DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx); + DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx); Tmp = std::numeric_limits<unsigned>::max(); if (!!DemandedSubElts) { @@ -4248,6 +4286,18 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return std::max(FirstAnswer, Mask.countLeadingOnes()); } +unsigned SelectionDAG::ComputeMinSignedBits(SDValue Op, unsigned Depth) const { + unsigned SignBits = ComputeNumSignBits(Op, Depth); + return Op.getScalarValueSizeInBits() - SignBits + 1; +} + +unsigned SelectionDAG::ComputeMinSignedBits(SDValue Op, + const APInt &DemandedElts, + unsigned Depth) const { + unsigned SignBits = ComputeNumSignBits(Op, DemandedElts, Depth); + return Op.getScalarValueSizeInBits() - SignBits + 1; +} + bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly, unsigned Depth) const { // Early out for FREEZE. @@ -4260,7 +4310,7 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, bool PoisonOnly, return false; APInt DemandedElts = VT.isVector() - ? APInt::getAllOnesValue(VT.getVectorNumElements()) + ? APInt::getAllOnes(VT.getVectorNumElements()) : APInt(1, 1); return isGuaranteedNotToBeUndefOrPoison(Op, DemandedElts, PoisonOnly, Depth); } @@ -4285,7 +4335,17 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, case ISD::UNDEF: return PoisonOnly; - // TODO: ISD::BUILD_VECTOR handling + case ISD::BUILD_VECTOR: + // NOTE: BUILD_VECTOR has implicit truncation of wider scalar elements - + // this shouldn't affect the result. + for (unsigned i = 0, e = Op.getNumOperands(); i < e; ++i) { + if (!DemandedElts[i]) + continue; + if (!isGuaranteedNotToBeUndefOrPoison(Op.getOperand(i), PoisonOnly, + Depth + 1)) + return false; + } + return true; // TODO: Search for noundef attributes from library functions. @@ -4449,8 +4509,8 @@ bool SelectionDAG::isKnownNeverZero(SDValue Op) const { "Floating point types unsupported - use isKnownNeverZeroFloat"); // If the value is a constant, we can obviously see if it is a zero or not. - if (ISD::matchUnaryPredicate( - Op, [](ConstantSDNode *C) { return !C->isNullValue(); })) + if (ISD::matchUnaryPredicate(Op, + [](ConstantSDNode *C) { return !C->isZero(); })) return true; // TODO: Recognize more cases here. @@ -4490,7 +4550,7 @@ bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const { static SDValue FoldSTEP_VECTOR(const SDLoc &DL, EVT VT, SDValue Step, SelectionDAG &DAG) { - if (cast<ConstantSDNode>(Step)->isNullValue()) + if (cast<ConstantSDNode>(Step)->isZero()) return DAG.getConstant(0, DL, VT); return SDValue(); @@ -4676,7 +4736,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: { APFloat apf(EVTToAPFloatSemantics(VT), - APInt::getNullValue(VT.getSizeInBits())); + APInt::getZero(VT.getSizeInBits())); (void)apf.convertFromAPInt(Val, Opcode==ISD::SINT_TO_FP, APFloat::rmNearestTiesToEven); @@ -4828,7 +4888,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::CTTZ_ZERO_UNDEF: case ISD::CTPOP: { SDValue Ops = {Operand}; - if (SDValue Fold = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops)) + if (SDValue Fold = FoldConstantArithmetic(Opcode, DL, VT, Ops)) return Fold; } } @@ -4976,6 +5036,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, } if (OpOpcode == ISD::UNDEF) return getUNDEF(VT); + if (OpOpcode == ISD::VSCALE && !NewNodesMustHaveLegalTypes) + return getVScale(DL, VT, Operand.getConstantOperandAPInt(0)); break; case ISD::ANY_EXTEND_VECTOR_INREG: case ISD::ZERO_EXTEND_VECTOR_INREG: @@ -5206,173 +5268,111 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::CONCAT_VECTORS) return SDValue(); - // For now, the array Ops should only contain two values. - // This enforcement will be removed once this function is merged with - // FoldConstantVectorArithmetic - if (Ops.size() != 2) + unsigned NumOps = Ops.size(); + if (NumOps == 0) return SDValue(); if (isUndef(Opcode, Ops)) return getUNDEF(VT); - SDNode *N1 = Ops[0].getNode(); - SDNode *N2 = Ops[1].getNode(); - // Handle the case of two scalars. - if (auto *C1 = dyn_cast<ConstantSDNode>(N1)) { - if (auto *C2 = dyn_cast<ConstantSDNode>(N2)) { - if (C1->isOpaque() || C2->isOpaque()) - return SDValue(); - - Optional<APInt> FoldAttempt = - FoldValue(Opcode, C1->getAPIntValue(), C2->getAPIntValue()); - if (!FoldAttempt) - return SDValue(); - - SDValue Folded = getConstant(FoldAttempt.getValue(), DL, VT); - assert((!Folded || !VT.isVector()) && - "Can't fold vectors ops with scalar operands"); - return Folded; - } - } + if (NumOps == 2) { + // TODO: Move foldConstantFPMath here? - // fold (add Sym, c) -> Sym+c - if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N1)) - return FoldSymbolOffset(Opcode, VT, GA, N2); - if (TLI->isCommutativeBinOp(Opcode)) - if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N2)) - return FoldSymbolOffset(Opcode, VT, GA, N1); + if (auto *C1 = dyn_cast<ConstantSDNode>(Ops[0])) { + if (auto *C2 = dyn_cast<ConstantSDNode>(Ops[1])) { + if (C1->isOpaque() || C2->isOpaque()) + return SDValue(); - // For fixed width vectors, extract each constant element and fold them - // individually. Either input may be an undef value. - bool IsBVOrSV1 = N1->getOpcode() == ISD::BUILD_VECTOR || - N1->getOpcode() == ISD::SPLAT_VECTOR; - if (!IsBVOrSV1 && !N1->isUndef()) - return SDValue(); - bool IsBVOrSV2 = N2->getOpcode() == ISD::BUILD_VECTOR || - N2->getOpcode() == ISD::SPLAT_VECTOR; - if (!IsBVOrSV2 && !N2->isUndef()) - return SDValue(); - // If both operands are undef, that's handled the same way as scalars. - if (!IsBVOrSV1 && !IsBVOrSV2) - return SDValue(); + Optional<APInt> FoldAttempt = + FoldValue(Opcode, C1->getAPIntValue(), C2->getAPIntValue()); + if (!FoldAttempt) + return SDValue(); - EVT SVT = VT.getScalarType(); - EVT LegalSVT = SVT; - if (NewNodesMustHaveLegalTypes && LegalSVT.isInteger()) { - LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT); - if (LegalSVT.bitsLT(SVT)) - return SDValue(); - } - - SmallVector<SDValue, 4> Outputs; - unsigned NumOps = 0; - if (IsBVOrSV1) - NumOps = std::max(NumOps, N1->getNumOperands()); - if (IsBVOrSV2) - NumOps = std::max(NumOps, N2->getNumOperands()); - assert(NumOps != 0 && "Expected non-zero operands"); - // Scalable vectors should only be SPLAT_VECTOR or UNDEF here. We only need - // one iteration for that. - assert((!VT.isScalableVector() || NumOps == 1) && - "Scalable vector should only have one scalar"); - - for (unsigned I = 0; I != NumOps; ++I) { - // We can have a fixed length SPLAT_VECTOR and a BUILD_VECTOR so we need - // to use operand 0 of the SPLAT_VECTOR for each fixed element. - SDValue V1; - if (N1->getOpcode() == ISD::BUILD_VECTOR) - V1 = N1->getOperand(I); - else if (N1->getOpcode() == ISD::SPLAT_VECTOR) - V1 = N1->getOperand(0); - else - V1 = getUNDEF(SVT); - - SDValue V2; - if (N2->getOpcode() == ISD::BUILD_VECTOR) - V2 = N2->getOperand(I); - else if (N2->getOpcode() == ISD::SPLAT_VECTOR) - V2 = N2->getOperand(0); - else - V2 = getUNDEF(SVT); - - if (SVT.isInteger()) { - if (V1.getValueType().bitsGT(SVT)) - V1 = getNode(ISD::TRUNCATE, DL, SVT, V1); - if (V2.getValueType().bitsGT(SVT)) - V2 = getNode(ISD::TRUNCATE, DL, SVT, V2); + SDValue Folded = getConstant(FoldAttempt.getValue(), DL, VT); + assert((!Folded || !VT.isVector()) && + "Can't fold vectors ops with scalar operands"); + return Folded; + } } - if (V1.getValueType() != SVT || V2.getValueType() != SVT) - return SDValue(); - - // Fold one vector element. - SDValue ScalarResult = getNode(Opcode, DL, SVT, V1, V2); - if (LegalSVT != SVT) - ScalarResult = getNode(ISD::SIGN_EXTEND, DL, LegalSVT, ScalarResult); - - // Scalar folding only succeeded if the result is a constant or UNDEF. - if (!ScalarResult.isUndef() && ScalarResult.getOpcode() != ISD::Constant && - ScalarResult.getOpcode() != ISD::ConstantFP) - return SDValue(); - Outputs.push_back(ScalarResult); - } - - if (N1->getOpcode() == ISD::BUILD_VECTOR || - N2->getOpcode() == ISD::BUILD_VECTOR) { - assert(VT.getVectorNumElements() == Outputs.size() && - "Vector size mismatch!"); - - // Build a big vector out of the scalar elements we generated. - return getBuildVector(VT, SDLoc(), Outputs); + // fold (add Sym, c) -> Sym+c + if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ops[0])) + return FoldSymbolOffset(Opcode, VT, GA, Ops[1].getNode()); + if (TLI->isCommutativeBinOp(Opcode)) + if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ops[1])) + return FoldSymbolOffset(Opcode, VT, GA, Ops[0].getNode()); } - assert((N1->getOpcode() == ISD::SPLAT_VECTOR || - N2->getOpcode() == ISD::SPLAT_VECTOR) && - "One operand should be a splat vector"); - - assert(Outputs.size() == 1 && "Vector size mismatch!"); - return getSplatVector(VT, SDLoc(), Outputs[0]); -} - -// TODO: Merge with FoldConstantArithmetic -SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, - const SDLoc &DL, EVT VT, - ArrayRef<SDValue> Ops, - const SDNodeFlags Flags) { - // If the opcode is a target-specific ISD node, there's nothing we can - // do here and the operand rules may not line up with the below, so - // bail early. - if (Opcode >= ISD::BUILTIN_OP_END) - return SDValue(); - - if (isUndef(Opcode, Ops)) - return getUNDEF(VT); - - // We can only fold vectors - maybe merge with FoldConstantArithmetic someday? + // This is for vector folding only from here on. if (!VT.isVector()) return SDValue(); ElementCount NumElts = VT.getVectorElementCount(); + // See if we can fold through bitcasted integer ops. + // TODO: Can we handle undef elements? + if (NumOps == 2 && VT.isFixedLengthVector() && VT.isInteger() && + Ops[0].getValueType() == VT && Ops[1].getValueType() == VT && + Ops[0].getOpcode() == ISD::BITCAST && + Ops[1].getOpcode() == ISD::BITCAST) { + SDValue N1 = peekThroughBitcasts(Ops[0]); + SDValue N2 = peekThroughBitcasts(Ops[1]); + auto *BV1 = dyn_cast<BuildVectorSDNode>(N1); + auto *BV2 = dyn_cast<BuildVectorSDNode>(N2); + EVT BVVT = N1.getValueType(); + if (BV1 && BV2 && BVVT.isInteger() && BVVT == N2.getValueType()) { + bool IsLE = getDataLayout().isLittleEndian(); + unsigned EltBits = VT.getScalarSizeInBits(); + SmallVector<APInt> RawBits1, RawBits2; + BitVector UndefElts1, UndefElts2; + if (BV1->getConstantRawBits(IsLE, EltBits, RawBits1, UndefElts1) && + BV2->getConstantRawBits(IsLE, EltBits, RawBits2, UndefElts2) && + UndefElts1.none() && UndefElts2.none()) { + SmallVector<APInt> RawBits; + for (unsigned I = 0, E = NumElts.getFixedValue(); I != E; ++I) { + Optional<APInt> Fold = FoldValue(Opcode, RawBits1[I], RawBits2[I]); + if (!Fold) + break; + RawBits.push_back(Fold.getValue()); + } + if (RawBits.size() == NumElts.getFixedValue()) { + // We have constant folded, but we need to cast this again back to + // the original (possibly legalized) type. + SmallVector<APInt> DstBits; + BitVector DstUndefs; + BuildVectorSDNode::recastRawBits(IsLE, BVVT.getScalarSizeInBits(), + DstBits, RawBits, DstUndefs, + BitVector(RawBits.size(), false)); + EVT BVEltVT = BV1->getOperand(0).getValueType(); + unsigned BVEltBits = BVEltVT.getSizeInBits(); + SmallVector<SDValue> Ops(DstBits.size(), getUNDEF(BVEltVT)); + for (unsigned I = 0, E = DstBits.size(); I != E; ++I) { + if (DstUndefs[I]) + continue; + Ops[I] = getConstant(DstBits[I].sextOrSelf(BVEltBits), DL, BVEltVT); + } + return getBitcast(VT, getBuildVector(BVVT, DL, Ops)); + } + } + } + } + auto IsScalarOrSameVectorSize = [NumElts](const SDValue &Op) { return !Op.getValueType().isVector() || Op.getValueType().getVectorElementCount() == NumElts; }; - auto IsConstantBuildVectorSplatVectorOrUndef = [](const SDValue &Op) { - APInt SplatVal; - BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op); + auto IsBuildVectorSplatVectorOrUndef = [](const SDValue &Op) { return Op.isUndef() || Op.getOpcode() == ISD::CONDCODE || - (BV && BV->isConstant()) || - (Op.getOpcode() == ISD::SPLAT_VECTOR && - ISD::isConstantSplatVector(Op.getNode(), SplatVal)); + Op.getOpcode() == ISD::BUILD_VECTOR || + Op.getOpcode() == ISD::SPLAT_VECTOR; }; // All operands must be vector types with the same number of elements as - // the result type and must be either UNDEF or a build vector of constant + // the result type and must be either UNDEF or a build/splat vector // or UNDEF scalars. - if (!llvm::all_of(Ops, IsConstantBuildVectorSplatVectorOrUndef) || + if (!llvm::all_of(Ops, IsBuildVectorSplatVectorOrUndef) || !llvm::all_of(Ops, IsScalarOrSameVectorSize)) return SDValue(); @@ -5392,17 +5392,16 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, // For scalable vector types we know we're dealing with SPLAT_VECTORs. We // only have one operand to check. For fixed-length vector types we may have // a combination of BUILD_VECTOR and SPLAT_VECTOR. - unsigned NumOperands = NumElts.isScalable() ? 1 : NumElts.getFixedValue(); + unsigned NumVectorElts = NumElts.isScalable() ? 1 : NumElts.getFixedValue(); // Constant fold each scalar lane separately. SmallVector<SDValue, 4> ScalarResults; - for (unsigned I = 0; I != NumOperands; I++) { + for (unsigned I = 0; I != NumVectorElts; I++) { SmallVector<SDValue, 4> ScalarOps; for (SDValue Op : Ops) { EVT InSVT = Op.getValueType().getScalarType(); if (Op.getOpcode() != ISD::BUILD_VECTOR && Op.getOpcode() != ISD::SPLAT_VECTOR) { - // We've checked that this is UNDEF or a constant of some kind. if (Op.isUndef()) ScalarOps.push_back(getUNDEF(InSVT)); else @@ -5423,7 +5422,7 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, } // Constant fold the scalar operands. - SDValue ScalarResult = getNode(Opcode, DL, SVT, ScalarOps, Flags); + SDValue ScalarResult = getNode(Opcode, DL, SVT, ScalarOps); // Legalize the (integer) scalar constant if necessary. if (LegalSVT != SVT) @@ -5591,9 +5590,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, N1.getValueType() == VT && "Binary operator types must match!"); // (X & 0) -> 0. This commonly occurs when legalizing i64 values, so it's // worth handling here. - if (N2C && N2C->isNullValue()) + if (N2C && N2C->isZero()) return N2; - if (N2C && N2C->isAllOnesValue()) // X & -1 -> X + if (N2C && N2C->isAllOnes()) // X & -1 -> X return N1; break; case ISD::OR: @@ -5605,7 +5604,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, N1.getValueType() == VT && "Binary operator types must match!"); // (X ^|+- 0) -> X. This commonly occurs when legalizing i64 values, so // it's worth handling here. - if (N2C && N2C->isNullValue()) + if (N2C && N2C->isZero()) return N1; if ((Opcode == ISD::ADD || Opcode == ISD::SUB) && VT.isVector() && VT.getVectorElementType() == MVT::i1) @@ -5711,7 +5710,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, // size of the value, the shift/rotate count is guaranteed to be zero. if (VT == MVT::i1) return N1; - if (N2C && N2C->isNullValue()) + if (N2C && N2C->isZero()) return N1; break; case ISD::FP_ROUND: @@ -6086,7 +6085,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, return V; // Vector constant folding. SDValue Ops[] = {N1, N2, N3}; - if (SDValue V = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops)) { + if (SDValue V = FoldConstantArithmetic(Opcode, DL, VT, Ops)) { NewSDValueDbgMsg(V, "New node vector constant folding: ", this); return V; } @@ -6099,6 +6098,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, break; case ISD::VECTOR_SHUFFLE: llvm_unreachable("should use getVectorShuffle constructor!"); + case ISD::VECTOR_SPLICE: { + if (cast<ConstantSDNode>(N3)->isNullValue()) + return N1; + break; + } case ISD::INSERT_VECTOR_ELT: { ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3); // INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF, except @@ -6214,9 +6218,8 @@ SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) { ArgChains.push_back(Chain); // Add a chain value for each stack argument. - for (SDNode::use_iterator U = getEntryNode().getNode()->use_begin(), - UE = getEntryNode().getNode()->use_end(); U != UE; ++U) - if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U)) + for (SDNode *U : getEntryNode().getNode()->uses()) + if (LoadSDNode *L = dyn_cast<LoadSDNode>(U)) if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr())) if (FI->getIndex() < 0) ArgChains.push_back(SDValue(L, 1)); @@ -6720,7 +6723,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, if (FI && !MFI.isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; bool IsZeroVal = - isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue(); + isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isZero(); if (!TLI.findOptimalMemOpLowering( MemOps, TLI.getMaxStoresPerMemset(OptSize), MemOp::Set(Size, DstAlignCanChange, Alignment, IsZeroVal, isVol), @@ -6809,7 +6812,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); if (ConstantSize) { // Memcpy with size zero? Just return the original chain. - if (ConstantSize->isNullValue()) + if (ConstantSize->isZero()) return Chain; SDValue Result = getMemcpyLoadsAndStores( @@ -6924,7 +6927,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst, ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); if (ConstantSize) { // Memmove with size zero? Just return the original chain. - if (ConstantSize->isNullValue()) + if (ConstantSize->isZero()) return Chain; SDValue Result = getMemmoveLoadsAndStores( @@ -7026,7 +7029,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); if (ConstantSize) { // Memset with size zero? Just return the original chain. - if (ConstantSize->isNullValue()) + if (ConstantSize->isZero()) return Chain; SDValue Result = getMemsetStores(*this, dl, Chain, Dst, Src, @@ -7618,6 +7621,374 @@ SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl, return V; } +SDValue SelectionDAG::getLoadVP( + ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, const SDLoc &dl, + SDValue Chain, SDValue Ptr, SDValue Offset, SDValue Mask, SDValue EVL, + MachinePointerInfo PtrInfo, EVT MemVT, Align Alignment, + MachineMemOperand::Flags MMOFlags, const AAMDNodes &AAInfo, + const MDNode *Ranges, bool IsExpanding) { + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); + + MMOFlags |= MachineMemOperand::MOLoad; + assert((MMOFlags & MachineMemOperand::MOStore) == 0); + // If we don't have a PtrInfo, infer the trivial frame index case to simplify + // clients. + if (PtrInfo.V.isNull()) + PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset); + + uint64_t Size = MemoryLocation::getSizeOrUnknown(MemVT.getStoreSize()); + MachineFunction &MF = getMachineFunction(); + MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, + Alignment, AAInfo, Ranges); + return getLoadVP(AM, ExtType, VT, dl, Chain, Ptr, Offset, Mask, EVL, MemVT, + MMO, IsExpanding); +} + +SDValue SelectionDAG::getLoadVP(ISD::MemIndexedMode AM, + ISD::LoadExtType ExtType, EVT VT, + const SDLoc &dl, SDValue Chain, SDValue Ptr, + SDValue Offset, SDValue Mask, SDValue EVL, + EVT MemVT, MachineMemOperand *MMO, + bool IsExpanding) { + if (VT == MemVT) { + ExtType = ISD::NON_EXTLOAD; + } else if (ExtType == ISD::NON_EXTLOAD) { + assert(VT == MemVT && "Non-extending load from different memory type!"); + } else { + // Extending load. + assert(MemVT.getScalarType().bitsLT(VT.getScalarType()) && + "Should only be an extending load, not truncating!"); + assert(VT.isInteger() == MemVT.isInteger() && + "Cannot convert from FP to Int or Int -> FP!"); + assert(VT.isVector() == MemVT.isVector() && + "Cannot use an ext load to convert to or from a vector!"); + assert((!VT.isVector() || + VT.getVectorElementCount() == MemVT.getVectorElementCount()) && + "Cannot use an ext load to change the number of vector elements!"); + } + + bool Indexed = AM != ISD::UNINDEXED; + assert((Indexed || Offset.isUndef()) && "Unindexed load with an offset!"); + + SDVTList VTs = Indexed ? getVTList(VT, Ptr.getValueType(), MVT::Other) + : getVTList(VT, MVT::Other); + SDValue Ops[] = {Chain, Ptr, Offset, Mask, EVL}; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::VP_LOAD, VTs, Ops); + ID.AddInteger(VT.getRawBits()); + ID.AddInteger(getSyntheticNodeSubclassData<VPLoadSDNode>( + dl.getIROrder(), VTs, AM, ExtType, IsExpanding, MemVT, MMO)); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { + cast<VPLoadSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + auto *N = newSDNode<VPLoadSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, AM, + ExtType, IsExpanding, MemVT, MMO); + createOperands(N, Ops); + + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + +SDValue SelectionDAG::getLoadVP(EVT VT, const SDLoc &dl, SDValue Chain, + SDValue Ptr, SDValue Mask, SDValue EVL, + MachinePointerInfo PtrInfo, + MaybeAlign Alignment, + MachineMemOperand::Flags MMOFlags, + const AAMDNodes &AAInfo, const MDNode *Ranges, + bool IsExpanding) { + SDValue Undef = getUNDEF(Ptr.getValueType()); + return getLoadVP(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef, + Mask, EVL, PtrInfo, VT, Alignment, MMOFlags, AAInfo, Ranges, + IsExpanding); +} + +SDValue SelectionDAG::getLoadVP(EVT VT, const SDLoc &dl, SDValue Chain, + SDValue Ptr, SDValue Mask, SDValue EVL, + MachineMemOperand *MMO, bool IsExpanding) { + SDValue Undef = getUNDEF(Ptr.getValueType()); + return getLoadVP(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef, + Mask, EVL, VT, MMO, IsExpanding); +} + +SDValue SelectionDAG::getExtLoadVP(ISD::LoadExtType ExtType, const SDLoc &dl, + EVT VT, SDValue Chain, SDValue Ptr, + SDValue Mask, SDValue EVL, + MachinePointerInfo PtrInfo, EVT MemVT, + MaybeAlign Alignment, + MachineMemOperand::Flags MMOFlags, + const AAMDNodes &AAInfo, bool IsExpanding) { + SDValue Undef = getUNDEF(Ptr.getValueType()); + return getLoadVP(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, Mask, + EVL, PtrInfo, MemVT, Alignment, MMOFlags, AAInfo, nullptr, + IsExpanding); +} + +SDValue SelectionDAG::getExtLoadVP(ISD::LoadExtType ExtType, const SDLoc &dl, + EVT VT, SDValue Chain, SDValue Ptr, + SDValue Mask, SDValue EVL, EVT MemVT, + MachineMemOperand *MMO, bool IsExpanding) { + SDValue Undef = getUNDEF(Ptr.getValueType()); + return getLoadVP(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, Mask, + EVL, MemVT, MMO, IsExpanding); +} + +SDValue SelectionDAG::getIndexedLoadVP(SDValue OrigLoad, const SDLoc &dl, + SDValue Base, SDValue Offset, + ISD::MemIndexedMode AM) { + auto *LD = cast<VPLoadSDNode>(OrigLoad); + assert(LD->getOffset().isUndef() && "Load is already a indexed load!"); + // Don't propagate the invariant or dereferenceable flags. + auto MMOFlags = + LD->getMemOperand()->getFlags() & + ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable); + return getLoadVP(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl, + LD->getChain(), Base, Offset, LD->getMask(), + LD->getVectorLength(), LD->getPointerInfo(), + LD->getMemoryVT(), LD->getAlign(), MMOFlags, LD->getAAInfo(), + nullptr, LD->isExpandingLoad()); +} + +SDValue SelectionDAG::getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, + SDValue Ptr, SDValue Mask, SDValue EVL, + MachinePointerInfo PtrInfo, Align Alignment, + MachineMemOperand::Flags MMOFlags, + const AAMDNodes &AAInfo, bool IsCompressing) { + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); + + MMOFlags |= MachineMemOperand::MOStore; + assert((MMOFlags & MachineMemOperand::MOLoad) == 0); + + if (PtrInfo.V.isNull()) + PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr); + + MachineFunction &MF = getMachineFunction(); + uint64_t Size = + MemoryLocation::getSizeOrUnknown(Val.getValueType().getStoreSize()); + MachineMemOperand *MMO = + MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo); + return getStoreVP(Chain, dl, Val, Ptr, Mask, EVL, MMO, IsCompressing); +} + +SDValue SelectionDAG::getStoreVP(SDValue Chain, const SDLoc &dl, SDValue Val, + SDValue Ptr, SDValue Mask, SDValue EVL, + MachineMemOperand *MMO, bool IsCompressing) { + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); + EVT VT = Val.getValueType(); + SDVTList VTs = getVTList(MVT::Other); + SDValue Undef = getUNDEF(Ptr.getValueType()); + SDValue Ops[] = {Chain, Val, Ptr, Undef, Mask, EVL}; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::VP_STORE, VTs, Ops); + ID.AddInteger(VT.getRawBits()); + ID.AddInteger(getSyntheticNodeSubclassData<VPStoreSDNode>( + dl.getIROrder(), VTs, ISD::UNINDEXED, false, IsCompressing, VT, MMO)); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { + cast<VPStoreSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + auto *N = + newSDNode<VPStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, + ISD::UNINDEXED, false, IsCompressing, VT, MMO); + createOperands(N, Ops); + + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + +SDValue SelectionDAG::getTruncStoreVP(SDValue Chain, const SDLoc &dl, + SDValue Val, SDValue Ptr, SDValue Mask, + SDValue EVL, MachinePointerInfo PtrInfo, + EVT SVT, Align Alignment, + MachineMemOperand::Flags MMOFlags, + const AAMDNodes &AAInfo, + bool IsCompressing) { + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); + + MMOFlags |= MachineMemOperand::MOStore; + assert((MMOFlags & MachineMemOperand::MOLoad) == 0); + + if (PtrInfo.V.isNull()) + PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr); + + MachineFunction &MF = getMachineFunction(); + MachineMemOperand *MMO = MF.getMachineMemOperand( + PtrInfo, MMOFlags, MemoryLocation::getSizeOrUnknown(SVT.getStoreSize()), + Alignment, AAInfo); + return getTruncStoreVP(Chain, dl, Val, Ptr, Mask, EVL, SVT, MMO, + IsCompressing); +} + +SDValue SelectionDAG::getTruncStoreVP(SDValue Chain, const SDLoc &dl, + SDValue Val, SDValue Ptr, SDValue Mask, + SDValue EVL, EVT SVT, + MachineMemOperand *MMO, + bool IsCompressing) { + EVT VT = Val.getValueType(); + + assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); + if (VT == SVT) + return getStoreVP(Chain, dl, Val, Ptr, Mask, EVL, MMO, IsCompressing); + + assert(SVT.getScalarType().bitsLT(VT.getScalarType()) && + "Should only be a truncating store, not extending!"); + assert(VT.isInteger() == SVT.isInteger() && "Can't do FP-INT conversion!"); + assert(VT.isVector() == SVT.isVector() && + "Cannot use trunc store to convert to or from a vector!"); + assert((!VT.isVector() || + VT.getVectorElementCount() == SVT.getVectorElementCount()) && + "Cannot use trunc store to change the number of vector elements!"); + + SDVTList VTs = getVTList(MVT::Other); + SDValue Undef = getUNDEF(Ptr.getValueType()); + SDValue Ops[] = {Chain, Val, Ptr, Undef, Mask, EVL}; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::VP_STORE, VTs, Ops); + ID.AddInteger(SVT.getRawBits()); + ID.AddInteger(getSyntheticNodeSubclassData<VPStoreSDNode>( + dl.getIROrder(), VTs, ISD::UNINDEXED, true, IsCompressing, SVT, MMO)); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { + cast<VPStoreSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + auto *N = + newSDNode<VPStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, + ISD::UNINDEXED, true, IsCompressing, SVT, MMO); + createOperands(N, Ops); + + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + +SDValue SelectionDAG::getIndexedStoreVP(SDValue OrigStore, const SDLoc &dl, + SDValue Base, SDValue Offset, + ISD::MemIndexedMode AM) { + auto *ST = cast<VPStoreSDNode>(OrigStore); + assert(ST->getOffset().isUndef() && "Store is already an indexed store!"); + SDVTList VTs = getVTList(Base.getValueType(), MVT::Other); + SDValue Ops[] = {ST->getChain(), ST->getValue(), Base, + Offset, ST->getMask(), ST->getVectorLength()}; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::VP_STORE, VTs, Ops); + ID.AddInteger(ST->getMemoryVT().getRawBits()); + ID.AddInteger(ST->getRawSubclassData()); + ID.AddInteger(ST->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) + return SDValue(E, 0); + + auto *N = newSDNode<VPStoreSDNode>( + dl.getIROrder(), dl.getDebugLoc(), VTs, AM, ST->isTruncatingStore(), + ST->isCompressingStore(), ST->getMemoryVT(), ST->getMemOperand()); + createOperands(N, Ops); + + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + +SDValue SelectionDAG::getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl, + ArrayRef<SDValue> Ops, MachineMemOperand *MMO, + ISD::MemIndexType IndexType) { + assert(Ops.size() == 6 && "Incompatible number of operands"); + + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::VP_GATHER, VTs, Ops); + ID.AddInteger(VT.getRawBits()); + ID.AddInteger(getSyntheticNodeSubclassData<VPGatherSDNode>( + dl.getIROrder(), VTs, VT, MMO, IndexType)); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { + cast<VPGatherSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + + auto *N = newSDNode<VPGatherSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, + VT, MMO, IndexType); + createOperands(N, Ops); + + assert(N->getMask().getValueType().getVectorElementCount() == + N->getValueType(0).getVectorElementCount() && + "Vector width mismatch between mask and data"); + assert(N->getIndex().getValueType().getVectorElementCount().isScalable() == + N->getValueType(0).getVectorElementCount().isScalable() && + "Scalable flags of index and data do not match"); + assert(ElementCount::isKnownGE( + N->getIndex().getValueType().getVectorElementCount(), + N->getValueType(0).getVectorElementCount()) && + "Vector width mismatch between index and data"); + assert(isa<ConstantSDNode>(N->getScale()) && + cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() && + "Scale should be a constant power of 2"); + + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + +SDValue SelectionDAG::getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl, + ArrayRef<SDValue> Ops, + MachineMemOperand *MMO, + ISD::MemIndexType IndexType) { + assert(Ops.size() == 7 && "Incompatible number of operands"); + + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::VP_SCATTER, VTs, Ops); + ID.AddInteger(VT.getRawBits()); + ID.AddInteger(getSyntheticNodeSubclassData<VPScatterSDNode>( + dl.getIROrder(), VTs, VT, MMO, IndexType)); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { + cast<VPScatterSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + auto *N = newSDNode<VPScatterSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs, + VT, MMO, IndexType); + createOperands(N, Ops); + + assert(N->getMask().getValueType().getVectorElementCount() == + N->getValue().getValueType().getVectorElementCount() && + "Vector width mismatch between mask and data"); + assert( + N->getIndex().getValueType().getVectorElementCount().isScalable() == + N->getValue().getValueType().getVectorElementCount().isScalable() && + "Scalable flags of index and data do not match"); + assert(ElementCount::isKnownGE( + N->getIndex().getValueType().getVectorElementCount(), + N->getValue().getValueType().getVectorElementCount()) && + "Vector width mismatch between index and data"); + assert(isa<ConstantSDNode>(N->getScale()) && + cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() && + "Scale should be a constant power of 2"); + + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue PassThru, EVT MemVT, @@ -7818,7 +8189,7 @@ SDValue SelectionDAG::simplifySelect(SDValue Cond, SDValue T, SDValue F) { // select true, T, F --> T // select false, T, F --> F if (auto *CondC = dyn_cast<ConstantSDNode>(Cond)) - return CondC->isNullValue() ? F : T; + return CondC->isZero() ? F : T; // TODO: This should simplify VSELECT with constant condition using something // like this (but check boolean contents to be complete?): @@ -9296,7 +9667,7 @@ void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode *> &Order) { } #ifndef NDEBUG -void SelectionDAG::VerifyDAGDiverence() { +void SelectionDAG::VerifyDAGDivergence() { std::vector<SDNode *> TopoOrder; CreateTopologicalOrder(TopoOrder); for (auto *N : TopoOrder) { @@ -9384,21 +9755,20 @@ unsigned SelectionDAG::AssignTopologicalOrder() { // before SortedPos will contain the topological sort index, and the // Node Id fields for nodes At SortedPos and after will contain the // count of outstanding operands. - for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ) { - SDNode *N = &*I++; - checkForCycles(N, this); - unsigned Degree = N->getNumOperands(); + for (SDNode &N : llvm::make_early_inc_range(allnodes())) { + checkForCycles(&N, this); + unsigned Degree = N.getNumOperands(); if (Degree == 0) { // A node with no uses, add it to the result array immediately. - N->setNodeId(DAGSize++); - allnodes_iterator Q(N); + N.setNodeId(DAGSize++); + allnodes_iterator Q(&N); if (Q != SortedPos) SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(Q)); assert(SortedPos != AllNodes.end() && "Overran node list"); ++SortedPos; } else { // Temporarily use the Node Id as scratch space for the degree count. - N->setNodeId(Degree); + N.setNodeId(Degree); } } @@ -9512,12 +9882,9 @@ SDValue SelectionDAG::getSymbolFunctionGlobalAddress(SDValue Op, std::string ErrorStr; raw_string_ostream ErrorFormatter(ErrorStr); - ErrorFormatter << "Undefined external symbol "; ErrorFormatter << '"' << Symbol << '"'; - ErrorFormatter.flush(); - - report_fatal_error(ErrorStr); + report_fatal_error(Twine(ErrorFormatter.str())); } //===----------------------------------------------------------------------===// @@ -9526,7 +9893,7 @@ SDValue SelectionDAG::getSymbolFunctionGlobalAddress(SDValue Op, bool llvm::isNullConstant(SDValue V) { ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); - return Const != nullptr && Const->isNullValue(); + return Const != nullptr && Const->isZero(); } bool llvm::isNullFPConstant(SDValue V) { @@ -9536,7 +9903,7 @@ bool llvm::isNullFPConstant(SDValue V) { bool llvm::isAllOnesConstant(SDValue V) { ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); - return Const != nullptr && Const->isAllOnesValue(); + return Const != nullptr && Const->isAllOnes(); } bool llvm::isOneConstant(SDValue V) { @@ -9670,7 +10037,7 @@ bool llvm::isNullOrNullSplat(SDValue N, bool AllowUndefs) { // TODO: may want to use peekThroughBitcast() here. ConstantSDNode *C = isConstOrConstSplat(N, AllowUndefs, /*AllowTruncation=*/true); - return C && C->isNullValue(); + return C && C->isZero(); } bool llvm::isOneOrOneSplat(SDValue N, bool AllowUndefs) { @@ -9684,7 +10051,7 @@ bool llvm::isAllOnesOrAllOnesSplat(SDValue N, bool AllowUndefs) { N = peekThroughBitcasts(N); unsigned BitWidth = N.getScalarValueSizeInBits(); ConstantSDNode *C = isConstOrConstSplat(N, AllowUndefs); - return C && C->isAllOnesValue() && C->getValueSizeInBits(0) == BitWidth; + return C && C->isAllOnes() && C->getValueSizeInBits(0) == BitWidth; } HandleSDNode::~HandleSDNode() { @@ -9790,8 +10157,7 @@ bool SDNode::hasAnyUseOfValue(unsigned Value) const { /// isOnlyUserOf - Return true if this node is the only use of N. bool SDNode::isOnlyUserOf(const SDNode *N) const { bool Seen = false; - for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) { - SDNode *User = *I; + for (const SDNode *User : N->uses()) { if (User == this) Seen = true; else @@ -9804,8 +10170,7 @@ bool SDNode::isOnlyUserOf(const SDNode *N) const { /// Return true if the only users of N are contained in Nodes. bool SDNode::areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N) { bool Seen = false; - for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) { - SDNode *User = *I; + for (const SDNode *User : N->uses()) { if (llvm::is_contained(Nodes, User)) Seen = true; else @@ -10212,14 +10577,14 @@ SelectionDAG::GetDependentSplitDestVTs(const EVT &VT, const EVT &EnvVT, "Mixing fixed width and scalable vectors when enveloping a type"); EVT LoVT, HiVT; if (VTNumElts.getKnownMinValue() > EnvNumElts.getKnownMinValue()) { - LoVT = EnvVT; + LoVT = EVT::getVectorVT(*getContext(), EltTp, EnvNumElts); HiVT = EVT::getVectorVT(*getContext(), EltTp, VTNumElts - EnvNumElts); *HiIsEmpty = false; } else { // Flag that hi type has zero storage size, but return split envelop type // (this would be easier if vector types with zero elements were allowed). LoVT = EVT::getVectorVT(*getContext(), EltTp, VTNumElts); - HiVT = EnvVT; + HiVT = EVT::getVectorVT(*getContext(), EltTp, EnvNumElts); *HiIsEmpty = true; } return std::make_pair(LoVT, HiVT); @@ -10387,7 +10752,7 @@ SDValue BuildVectorSDNode::getSplatValue(const APInt &DemandedElts, } SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const { - APInt DemandedElts = APInt::getAllOnesValue(getNumOperands()); + APInt DemandedElts = APInt::getAllOnes(getNumOperands()); return getSplatValue(DemandedElts, UndefElements); } @@ -10439,7 +10804,7 @@ bool BuildVectorSDNode::getRepeatedSequence(const APInt &DemandedElts, bool BuildVectorSDNode::getRepeatedSequence(SmallVectorImpl<SDValue> &Sequence, BitVector *UndefElements) const { - APInt DemandedElts = APInt::getAllOnesValue(getNumOperands()); + APInt DemandedElts = APInt::getAllOnes(getNumOperands()); return getRepeatedSequence(DemandedElts, Sequence, UndefElements); } @@ -10485,6 +10850,97 @@ BuildVectorSDNode::getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, return -1; } +bool BuildVectorSDNode::getConstantRawBits( + bool IsLittleEndian, unsigned DstEltSizeInBits, + SmallVectorImpl<APInt> &RawBitElements, BitVector &UndefElements) const { + // Early-out if this contains anything but Undef/Constant/ConstantFP. + if (!isConstant()) + return false; + + unsigned NumSrcOps = getNumOperands(); + unsigned SrcEltSizeInBits = getValueType(0).getScalarSizeInBits(); + assert(((NumSrcOps * SrcEltSizeInBits) % DstEltSizeInBits) == 0 && + "Invalid bitcast scale"); + + // Extract raw src bits. + SmallVector<APInt> SrcBitElements(NumSrcOps, + APInt::getNullValue(SrcEltSizeInBits)); + BitVector SrcUndeElements(NumSrcOps, false); + + for (unsigned I = 0; I != NumSrcOps; ++I) { + SDValue Op = getOperand(I); + if (Op.isUndef()) { + SrcUndeElements.set(I); + continue; + } + auto *CInt = dyn_cast<ConstantSDNode>(Op); + auto *CFP = dyn_cast<ConstantFPSDNode>(Op); + assert((CInt || CFP) && "Unknown constant"); + SrcBitElements[I] = + CInt ? CInt->getAPIntValue().truncOrSelf(SrcEltSizeInBits) + : CFP->getValueAPF().bitcastToAPInt(); + } + + // Recast to dst width. + recastRawBits(IsLittleEndian, DstEltSizeInBits, RawBitElements, + SrcBitElements, UndefElements, SrcUndeElements); + return true; +} + +void BuildVectorSDNode::recastRawBits(bool IsLittleEndian, + unsigned DstEltSizeInBits, + SmallVectorImpl<APInt> &DstBitElements, + ArrayRef<APInt> SrcBitElements, + BitVector &DstUndefElements, + const BitVector &SrcUndefElements) { + unsigned NumSrcOps = SrcBitElements.size(); + unsigned SrcEltSizeInBits = SrcBitElements[0].getBitWidth(); + assert(((NumSrcOps * SrcEltSizeInBits) % DstEltSizeInBits) == 0 && + "Invalid bitcast scale"); + assert(NumSrcOps == SrcUndefElements.size() && + "Vector size mismatch"); + + unsigned NumDstOps = (NumSrcOps * SrcEltSizeInBits) / DstEltSizeInBits; + DstUndefElements.clear(); + DstUndefElements.resize(NumDstOps, false); + DstBitElements.assign(NumDstOps, APInt::getNullValue(DstEltSizeInBits)); + + // Concatenate src elements constant bits together into dst element. + if (SrcEltSizeInBits <= DstEltSizeInBits) { + unsigned Scale = DstEltSizeInBits / SrcEltSizeInBits; + for (unsigned I = 0; I != NumDstOps; ++I) { + DstUndefElements.set(I); + APInt &DstBits = DstBitElements[I]; + for (unsigned J = 0; J != Scale; ++J) { + unsigned Idx = (I * Scale) + (IsLittleEndian ? J : (Scale - J - 1)); + if (SrcUndefElements[Idx]) + continue; + DstUndefElements.reset(I); + const APInt &SrcBits = SrcBitElements[Idx]; + assert(SrcBits.getBitWidth() == SrcEltSizeInBits && + "Illegal constant bitwidths"); + DstBits.insertBits(SrcBits, J * SrcEltSizeInBits); + } + } + return; + } + + // Split src element constant bits into dst elements. + unsigned Scale = SrcEltSizeInBits / DstEltSizeInBits; + for (unsigned I = 0; I != NumSrcOps; ++I) { + if (SrcUndefElements[I]) { + DstUndefElements.set(I * Scale, (I + 1) * Scale); + continue; + } + const APInt &SrcBits = SrcBitElements[I]; + for (unsigned J = 0; J != Scale; ++J) { + unsigned Idx = (I * Scale) + (IsLittleEndian ? J : (Scale - J - 1)); + APInt &DstBits = DstBitElements[Idx]; + DstBits = SrcBits.extractBits(DstEltSizeInBits, J * DstEltSizeInBits); + } + } +} + bool BuildVectorSDNode::isConstant() const { for (const SDValue &Op : op_values()) { unsigned Opc = Op.getOpcode(); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp index 20c7d771bfb6..6d8252046501 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -14,6 +14,7 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/TargetLowering.h" +#include "llvm/IR/GlobalAlias.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" #include <cstdint> @@ -143,13 +144,27 @@ bool BaseIndexOffset::computeAliasing(const SDNode *Op0, bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase()); bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase()); - // If of mismatched base types or checkable indices we can check - // they do not alias. - if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) || - (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) && - (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1)) { - IsAlias = false; - return true; + if ((IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1)) { + // We can derive NoAlias In case of mismatched base types. + if (IsFI0 != IsFI1 || IsGV0 != IsGV1 || IsCV0 != IsCV1) { + IsAlias = false; + return true; + } + if (IsGV0 && IsGV1) { + auto *GV0 = cast<GlobalAddressSDNode>(BasePtr0.getBase())->getGlobal(); + auto *GV1 = cast<GlobalAddressSDNode>(BasePtr1.getBase())->getGlobal(); + // It doesn't make sense to access one global value using another globals + // values address, so we can assume that there is no aliasing in case of + // two different globals (unless we have symbols that may indirectly point + // to each other). + // FIXME: This is perhaps a bit too defensive. We could try to follow the + // chain with aliasee information for GlobalAlias variables to find out if + // we indirect symbols may alias or not. + if (GV0 != GV1 && !isa<GlobalAlias>(GV0) && !isa<GlobalAlias>(GV1)) { + IsAlias = false; + return true; + } + } } return false; // Cannot determine whether the pointers alias. } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index d56d4bcc9169..5d911c165293 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -69,6 +69,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/InlineAsm.h" @@ -399,29 +400,31 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, return Val; if (PartEVT.isVector()) { + // Vector/Vector bitcast. + if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) + return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); + // If the element type of the source/dest vectors are the same, but the // parts vector has more elements than the value vector, then we have a // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the // elements we want. - if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) { + if (PartEVT.getVectorElementCount() != ValueVT.getVectorElementCount()) { assert((PartEVT.getVectorElementCount().getKnownMinValue() > ValueVT.getVectorElementCount().getKnownMinValue()) && (PartEVT.getVectorElementCount().isScalable() == ValueVT.getVectorElementCount().isScalable()) && "Cannot narrow, it would be a lossy transformation"); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, - DAG.getVectorIdxConstant(0, DL)); + PartEVT = + EVT::getVectorVT(*DAG.getContext(), PartEVT.getVectorElementType(), + ValueVT.getVectorElementCount()); + Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, PartEVT, Val, + DAG.getVectorIdxConstant(0, DL)); + if (PartEVT == ValueVT) + return Val; } - // Vector/Vector bitcast. - if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) - return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); - - assert(PartEVT.getVectorElementCount() == ValueVT.getVectorElementCount() && - "Cannot handle this kind of promotion"); // Promoted vector extract return DAG.getAnyExtOrTrunc(Val, DL, ValueVT); - } // Trivial bitcast if the types are the same size and the destination @@ -670,6 +673,17 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, // Promoted vector extract Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); + } else if (PartEVT.isVector() && + PartEVT.getVectorElementType() != + ValueVT.getVectorElementType() && + TLI.getTypeAction(*DAG.getContext(), ValueVT) == + TargetLowering::TypeWidenVector) { + // Combination of widening and promotion. + EVT WidenVT = + EVT::getVectorVT(*DAG.getContext(), ValueVT.getVectorElementType(), + PartVT.getVectorElementCount()); + SDValue Widened = widenVectorToPartType(DAG, Val, DL, WidenVT); + Val = DAG.getAnyExtOrTrunc(Widened, DL, PartVT); } else { if (ValueVT.getVectorElementCount().isScalar()) { Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, @@ -726,15 +740,19 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, } else if (ValueVT.getSizeInBits() == BuiltVectorTy.getSizeInBits()) { // Bitconvert vector->vector case. Val = DAG.getNode(ISD::BITCAST, DL, BuiltVectorTy, Val); - } else if (SDValue Widened = - widenVectorToPartType(DAG, Val, DL, BuiltVectorTy)) { - Val = Widened; - } else if (BuiltVectorTy.getVectorElementType().bitsGE( - ValueVT.getVectorElementType()) && - BuiltVectorTy.getVectorElementCount() == - ValueVT.getVectorElementCount()) { - // Promoted vector extract - Val = DAG.getAnyExtOrTrunc(Val, DL, BuiltVectorTy); + } else { + if (BuiltVectorTy.getVectorElementType().bitsGT( + ValueVT.getVectorElementType())) { + // Integer promotion. + ValueVT = EVT::getVectorVT(*DAG.getContext(), + BuiltVectorTy.getVectorElementType(), + ValueVT.getVectorElementCount()); + Val = DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val); + } + + if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, BuiltVectorTy)) { + Val = Widened; + } } assert(Val.getValueType() == BuiltVectorTy && "Unexpected vector value type"); @@ -1275,21 +1293,23 @@ void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) { while (isa<Instruction>(V)) { Instruction &VAsInst = *cast<Instruction>(V); // Temporary "0", awaiting real implementation. + SmallVector<uint64_t, 16> Ops; SmallVector<Value *, 4> AdditionalValues; - DIExpression *SalvagedExpr = - salvageDebugInfoImpl(VAsInst, Expr, StackValue, 0, AdditionalValues); - + V = salvageDebugInfoImpl(VAsInst, Expr->getNumLocationOperands(), Ops, + AdditionalValues); // If we cannot salvage any further, and haven't yet found a suitable debug // expression, bail out. + if (!V) + break; + // TODO: If AdditionalValues isn't empty, then the salvage can only be // represented with a DBG_VALUE_LIST, so we give up. When we have support // here for variadic dbg_values, remove that condition. - if (!SalvagedExpr || !AdditionalValues.empty()) + if (!AdditionalValues.empty()) break; // New value and expr now represent this debuginfo. - V = VAsInst.getOperand(0); - Expr = SalvagedExpr; + Expr = DIExpression::appendOpsToArg(Expr, Ops, 0, StackValue); // Some kind of simplification occurred: check whether the operand of the // salvaged debug expression can be encoded in this DAG. @@ -1400,7 +1420,7 @@ bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values, BitsToDescribe = *VarSize; if (auto Fragment = Expr->getFragmentInfo()) BitsToDescribe = Fragment->SizeInBits; - for (auto RegAndSize : RFV.getRegsAndSizes()) { + for (const auto &RegAndSize : RFV.getRegsAndSizes()) { // Bail out if all bits are described already. if (Offset >= BitsToDescribe) break; @@ -1945,16 +1965,13 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { /*IsVarArg*/ false, DL); ISD::NodeType ExtendKind = ISD::ANY_EXTEND; - if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex, - Attribute::SExt)) + if (F->getAttributes().hasRetAttr(Attribute::SExt)) ExtendKind = ISD::SIGN_EXTEND; - else if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex, - Attribute::ZExt)) + else if (F->getAttributes().hasRetAttr(Attribute::ZExt)) ExtendKind = ISD::ZERO_EXTEND; LLVMContext &Context = F->getContext(); - bool RetInReg = F->getAttributes().hasAttribute( - AttributeList::ReturnIndex, Attribute::InReg); + bool RetInReg = F->getAttributes().hasRetAttr(Attribute::InReg); for (unsigned j = 0; j != NumValues; ++j) { EVT VT = ValueVTs[j]; @@ -1995,7 +2012,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { Flags.setZExt(); for (unsigned i = 0; i < NumParts; ++i) { - Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(), + Outs.push_back(ISD::OutputArg(Flags, + Parts[i].getValueType().getSimpleVT(), VT, /*isfixed=*/true, 0, 0)); OutVals.push_back(Parts[i]); } @@ -2012,10 +2030,9 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { assert(SwiftError.getFunctionArg() && "Need a swift error argument"); ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); Flags.setSwiftError(); - Outs.push_back(ISD::OutputArg(Flags, EVT(TLI.getPointerTy(DL)) /*vt*/, - EVT(TLI.getPointerTy(DL)) /*argvt*/, - true /*isfixed*/, 1 /*origidx*/, - 0 /*partOffs*/)); + Outs.push_back(ISD::OutputArg( + Flags, /*vt=*/TLI.getPointerTy(DL), /*argvt=*/EVT(TLI.getPointerTy(DL)), + /*isfixed=*/true, /*origidx=*/1, /*partOffs=*/0)); // Create SDNode for the swifterror virtual register. OutVals.push_back( DAG.getRegister(SwiftError.getOrCreateVRegUseAt( @@ -2566,7 +2583,7 @@ void SelectionDAGBuilder::visitJumpTableHeader(SwitchCG::JumpTable &JT, JumpTableReg, SwitchOp); JT.Reg = JumpTableReg; - if (!JTH.OmitRangeCheck) { + if (!JTH.FallthroughUnreachable) { // Emit the range check for the jump table, and branch to the default block // for the switch statement if the value being switched on exceeds the // largest case in the switch. @@ -2663,7 +2680,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, TargetLowering::ArgListEntry Entry; Entry.Node = GuardVal; Entry.Ty = FnTy->getParamType(0); - if (GuardCheckFn->hasAttribute(1, Attribute::AttrKind::InReg)) + if (GuardCheckFn->hasParamAttribute(0, Attribute::AttrKind::InReg)) Entry.IsInReg = true; Args.push_back(Entry); @@ -2778,13 +2795,13 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, MachineBasicBlock* MBB = B.Cases[0].ThisBB; - if (!B.OmitRangeCheck) + if (!B.FallthroughUnreachable) addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb); addSuccessorWithProb(SwitchBB, MBB, B.Prob); SwitchBB->normalizeSuccProbs(); SDValue Root = CopyTo; - if (!B.OmitRangeCheck) { + if (!B.FallthroughUnreachable) { // Conditional branch to the default block. SDValue RangeCmp = DAG.getSetCC(dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), @@ -3140,7 +3157,7 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { // count type has enough bits to represent any shift value, truncate // it now. This is a common case and it exposes the truncate to // optimization early. - else if (ShiftSize >= Log2_32_Ceil(Op2.getValueSizeInBits())) + else if (ShiftSize >= Log2_32_Ceil(Op1.getValueSizeInBits())) Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2); // Otherwise we'll need to temporarily settle for some other convenient // type. Type legalization will make adjustments once the shiftee is split. @@ -4057,8 +4074,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { Type *Ty = I.getType(); Align Alignment = I.getAlign(); - AAMDNodes AAInfo; - I.getAAMetadata(AAInfo); + AAMDNodes AAInfo = I.getAAMetadata(); const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); SmallVector<EVT, 4> ValueVTs, MemVTs; @@ -4185,13 +4201,11 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) { const Value *SV = I.getOperand(0); Type *Ty = I.getType(); - AAMDNodes AAInfo; - I.getAAMetadata(AAInfo); assert( (!AA || !AA->pointsToConstantMemory(MemoryLocation( SV, LocationSize::precise(DAG.getDataLayout().getTypeStoreSize(Ty)), - AAInfo))) && + I.getAAMetadata()))) && "load_from_swift_error should not be constant memory"); SmallVector<EVT, 4> ValueVTs; @@ -4249,8 +4263,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues)); SDLoc dl = getCurSDLoc(); Align Alignment = I.getAlign(); - AAMDNodes AAInfo; - I.getAAMetadata(AAInfo); + AAMDNodes AAInfo = I.getAAMetadata(); auto MMOFlags = TLI.getStoreMemOperandFlags(I, DAG.getDataLayout()); @@ -4321,14 +4334,11 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, if (!Alignment) Alignment = DAG.getEVTAlign(VT); - AAMDNodes AAInfo; - I.getAAMetadata(AAInfo); - MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore, // TODO: Make MachineMemOperands aware of scalable // vectors. - VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo); + VT.getStoreSize().getKnownMinSize(), *Alignment, I.getAAMetadata()); SDValue StoreNode = DAG.getMaskedStore(getMemoryRoot(), sdl, Src0, Ptr, Offset, Mask, VT, MMO, ISD::UNINDEXED, false /* Truncating */, IsCompressing); @@ -4358,7 +4368,7 @@ static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index, const TargetLowering &TLI = DAG.getTargetLoweringInfo(); const DataLayout &DL = DAG.getDataLayout(); - assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type"); + assert(Ptr->getType()->isVectorTy() && "Unexpected pointer type"); // Handle splat constant pointer. if (auto *C = dyn_cast<Constant>(Ptr)) { @@ -4412,9 +4422,6 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { .getValueOr(DAG.getEVTAlign(VT.getScalarType())); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - AAMDNodes AAInfo; - I.getAAMetadata(AAInfo); - SDValue Base; SDValue Index; ISD::MemIndexType IndexType; @@ -4427,7 +4434,7 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { MachinePointerInfo(AS), MachineMemOperand::MOStore, // TODO: Make MachineMemOperands aware of scalable // vectors. - MemoryLocation::UnknownSize, Alignment, AAInfo); + MemoryLocation::UnknownSize, Alignment, I.getAAMetadata()); if (!UniformBase) { Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); Index = getValue(Ptr); @@ -4485,8 +4492,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { if (!Alignment) Alignment = DAG.getEVTAlign(VT); - AAMDNodes AAInfo; - I.getAAMetadata(AAInfo); + AAMDNodes AAInfo = I.getAAMetadata(); const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); // Do not serialize masked loads of constant memory with anything. @@ -4529,8 +4535,6 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { ->getMaybeAlignValue() .getValueOr(DAG.getEVTAlign(VT.getScalarType())); - AAMDNodes AAInfo; - I.getAAMetadata(AAInfo); const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); SDValue Root = DAG.getRoot(); @@ -4545,7 +4549,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { MachinePointerInfo(AS), MachineMemOperand::MOLoad, // TODO: Make MachineMemOperands aware of scalable // vectors. - MemoryLocation::UnknownSize, Alignment, AAInfo, Ranges); + MemoryLocation::UnknownSize, Alignment, I.getAAMetadata(), Ranges); if (!UniformBase) { Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); @@ -4786,7 +4790,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, TLI.getPointerTy(DAG.getDataLayout()))); // Add all operands of the call to the operand list. - for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { + for (unsigned i = 0, e = I.arg_size(); i != e; ++i) { const Value *Arg = I.getArgOperand(i); if (!I.paramHasAttr(i, Attribute::ImmArg)) { Ops.push_back(getValue(Arg)); @@ -4823,12 +4827,11 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, SDValue Result; if (IsTgtIntrinsic) { // This is target intrinsic that touches memory - AAMDNodes AAInfo; - I.getAAMetadata(AAInfo); Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT, MachinePointerInfo(Info.ptrVal, Info.offset), - Info.align, Info.flags, Info.size, AAInfo); + Info.align, Info.flags, Info.size, + I.getAAMetadata()); } else if (!HasChain) { Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops); } else if (!I.getType()->isVoidTy()) { @@ -5510,12 +5513,12 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( // we've been asked to pursue. auto MakeVRegDbgValue = [&](Register Reg, DIExpression *FragExpr, bool Indirect) { - if (Reg.isVirtual() && TM.Options.ValueTrackingVariableLocations) { + if (Reg.isVirtual() && MF.useDebugInstrRef()) { // For VRegs, in instruction referencing mode, create a DBG_INSTR_REF // pointing at the VReg, which will be patched up later. auto &Inst = TII->get(TargetOpcode::DBG_INSTR_REF); auto MIB = BuildMI(MF, DL, Inst); - MIB.addReg(Reg, RegState::Debug); + MIB.addReg(Reg); MIB.addImm(0); MIB.addMetadata(Variable); auto *NewDIExpr = FragExpr; @@ -5637,7 +5640,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( auto splitMultiRegDbgValue = [&](ArrayRef<std::pair<unsigned, TypeSize>> SplitRegs) { unsigned Offset = 0; - for (auto RegAndSize : SplitRegs) { + for (const auto &RegAndSize : SplitRegs) { // If the expression is already a fragment, the current register // offset+size might extend beyond the fragment. In this case, only // the register bits that are inside the fragment are relevant. @@ -5866,12 +5869,11 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, // FIXME: Support passing different dest/src alignments to the memcpy DAG // node. SDValue Root = isVol ? getRoot() : getMemoryRoot(); - AAMDNodes AAInfo; - I.getAAMetadata(AAInfo); SDValue MC = DAG.getMemcpy(Root, sdl, Op1, Op2, Op3, Alignment, isVol, /* AlwaysInline */ false, isTC, MachinePointerInfo(I.getArgOperand(0)), - MachinePointerInfo(I.getArgOperand(1)), AAInfo); + MachinePointerInfo(I.getArgOperand(1)), + I.getAAMetadata()); updateDAGForMaybeTailCall(MC); return; } @@ -5889,12 +5891,11 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); // FIXME: Support passing different dest/src alignments to the memcpy DAG // node. - AAMDNodes AAInfo; - I.getAAMetadata(AAInfo); SDValue MC = DAG.getMemcpy(getRoot(), sdl, Dst, Src, Size, Alignment, isVol, /* AlwaysInline */ true, isTC, MachinePointerInfo(I.getArgOperand(0)), - MachinePointerInfo(I.getArgOperand(1)), AAInfo); + MachinePointerInfo(I.getArgOperand(1)), + I.getAAMetadata()); updateDAGForMaybeTailCall(MC); return; } @@ -5908,10 +5909,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, bool isVol = MSI.isVolatile(); bool isTC = I.isTailCall() && isInTailCallPosition(I, DAG.getTarget()); SDValue Root = isVol ? getRoot() : getMemoryRoot(); - AAMDNodes AAInfo; - I.getAAMetadata(AAInfo); SDValue MS = DAG.getMemset(Root, sdl, Op1, Op2, Op3, Alignment, isVol, isTC, - MachinePointerInfo(I.getArgOperand(0)), AAInfo); + MachinePointerInfo(I.getArgOperand(0)), + I.getAAMetadata()); updateDAGForMaybeTailCall(MS); return; } @@ -5929,11 +5929,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, // FIXME: Support passing different dest/src alignments to the memmove DAG // node. SDValue Root = isVol ? getRoot() : getMemoryRoot(); - AAMDNodes AAInfo; - I.getAAMetadata(AAInfo); SDValue MM = DAG.getMemmove(Root, sdl, Op1, Op2, Op3, Alignment, isVol, isTC, MachinePointerInfo(I.getArgOperand(0)), - MachinePointerInfo(I.getArgOperand(1)), AAInfo); + MachinePointerInfo(I.getArgOperand(1)), + I.getAAMetadata()); updateDAGForMaybeTailCall(MM); return; } @@ -6124,7 +6123,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, if (Values.empty()) return; - if (std::count(Values.begin(), Values.end(), nullptr)) + if (llvm::is_contained(Values, nullptr)) return; bool IsVariadic = DI.hasArgList(); @@ -6706,9 +6705,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::debugtrap: case Intrinsic::trap: { StringRef TrapFuncName = - I.getAttributes() - .getAttribute(AttributeList::FunctionIndex, "trap-func-name") - .getValueAsString(); + I.getAttributes().getFnAttr("trap-func-name").getValueAsString(); if (TrapFuncName.empty()) { switch (Intrinsic) { case Intrinsic::trap: @@ -6888,7 +6885,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, // Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission // is the same on all targets. - for (unsigned Idx = 0, E = I.getNumArgOperands(); Idx < E; ++Idx) { + for (unsigned Idx = 0, E = I.arg_size(); Idx < E; ++Idx) { Value *Arg = I.getArgOperand(Idx)->stripPointerCasts(); if (isa<ConstantPointerNull>(Arg)) continue; // Skip null pointers. They represent a hole in index space. @@ -7058,7 +7055,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, }; SmallVector<BranchFunnelTarget, 8> Targets; - for (unsigned Op = 1, N = I.getNumArgOperands(); Op != N; Op += 2) { + for (unsigned Op = 1, N = I.arg_size(); Op != N; Op += 2) { auto *ElemBase = dyn_cast<GlobalObject>(GetPointerBaseWithConstantOffset( I.getArgOperand(Op), Offset, DAG.getDataLayout())); if (ElemBase != Base) @@ -7327,9 +7324,128 @@ static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) { llvm_unreachable( "Inconsistency: no SDNode available for this VPIntrinsic!"); + if (*ResOPC == ISD::VP_REDUCE_SEQ_FADD || + *ResOPC == ISD::VP_REDUCE_SEQ_FMUL) { + if (VPIntrin.getFastMathFlags().allowReassoc()) + return *ResOPC == ISD::VP_REDUCE_SEQ_FADD ? ISD::VP_REDUCE_FADD + : ISD::VP_REDUCE_FMUL; + } + return ResOPC.getValue(); } +void SelectionDAGBuilder::visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT, + SmallVector<SDValue, 7> &OpValues, + bool isGather) { + SDLoc DL = getCurSDLoc(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + Value *PtrOperand = VPIntrin.getArgOperand(0); + MaybeAlign Alignment = DAG.getEVTAlign(VT); + AAMDNodes AAInfo = VPIntrin.getAAMetadata(); + const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range); + SDValue LD; + bool AddToChain = true; + if (!isGather) { + // Do not serialize variable-length loads of constant memory with + // anything. + MemoryLocation ML; + if (VT.isScalableVector()) + ML = MemoryLocation::getAfter(PtrOperand); + else + ML = MemoryLocation( + PtrOperand, + LocationSize::precise( + DAG.getDataLayout().getTypeStoreSize(VPIntrin.getType())), + AAInfo); + AddToChain = !AA || !AA->pointsToConstantMemory(ML); + SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad, + VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo, Ranges); + LD = DAG.getLoadVP(VT, DL, InChain, OpValues[0], OpValues[1], OpValues[2], + MMO, false /*IsExpanding */); + } else { + unsigned AS = + PtrOperand->getType()->getScalarType()->getPointerAddressSpace(); + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo(AS), MachineMemOperand::MOLoad, + MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges); + SDValue Base, Index, Scale; + ISD::MemIndexType IndexType; + bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale, + this, VPIntrin.getParent()); + if (!UniformBase) { + Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout())); + Index = getValue(PtrOperand); + IndexType = ISD::SIGNED_UNSCALED; + Scale = + DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout())); + } + EVT IdxVT = Index.getValueType(); + EVT EltTy = IdxVT.getVectorElementType(); + if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) { + EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy); + Index = DAG.getNode(ISD::SIGN_EXTEND, DL, NewIdxVT, Index); + } + LD = DAG.getGatherVP( + DAG.getVTList(VT, MVT::Other), VT, DL, + {DAG.getRoot(), Base, Index, Scale, OpValues[1], OpValues[2]}, MMO, + IndexType); + } + if (AddToChain) + PendingLoads.push_back(LD.getValue(1)); + setValue(&VPIntrin, LD); +} + +void SelectionDAGBuilder::visitVPStoreScatter(const VPIntrinsic &VPIntrin, + SmallVector<SDValue, 7> &OpValues, + bool isScatter) { + SDLoc DL = getCurSDLoc(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + Value *PtrOperand = VPIntrin.getArgOperand(1); + EVT VT = OpValues[0].getValueType(); + MaybeAlign Alignment = DAG.getEVTAlign(VT); + AAMDNodes AAInfo = VPIntrin.getAAMetadata(); + SDValue ST; + if (!isScatter) { + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore, + VT.getStoreSize().getKnownMinSize(), *Alignment, AAInfo); + ST = + DAG.getStoreVP(getMemoryRoot(), DL, OpValues[0], OpValues[1], + OpValues[2], OpValues[3], MMO, false /* IsTruncating */); + } else { + unsigned AS = + PtrOperand->getType()->getScalarType()->getPointerAddressSpace(); + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo(AS), MachineMemOperand::MOStore, + MemoryLocation::UnknownSize, *Alignment, AAInfo); + SDValue Base, Index, Scale; + ISD::MemIndexType IndexType; + bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale, + this, VPIntrin.getParent()); + if (!UniformBase) { + Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout())); + Index = getValue(PtrOperand); + IndexType = ISD::SIGNED_UNSCALED; + Scale = + DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout())); + } + EVT IdxVT = Index.getValueType(); + EVT EltTy = IdxVT.getVectorElementType(); + if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) { + EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy); + Index = DAG.getNode(ISD::SIGN_EXTEND, DL, NewIdxVT, Index); + } + ST = DAG.getScatterVP(DAG.getVTList(MVT::Other), VT, DL, + {getMemoryRoot(), OpValues[0], Base, Index, Scale, + OpValues[2], OpValues[3]}, + MMO, IndexType); + } + DAG.setRoot(ST); + setValue(&VPIntrin, ST); +} + void SelectionDAGBuilder::visitVectorPredicationIntrinsic( const VPIntrinsic &VPIntrin) { SDLoc DL = getCurSDLoc(); @@ -7349,15 +7465,29 @@ void SelectionDAGBuilder::visitVectorPredicationIntrinsic( // Request operands. SmallVector<SDValue, 7> OpValues; - for (unsigned I = 0; I < VPIntrin.getNumArgOperands(); ++I) { + for (unsigned I = 0; I < VPIntrin.arg_size(); ++I) { auto Op = getValue(VPIntrin.getArgOperand(I)); if (I == EVLParamPos) Op = DAG.getNode(ISD::ZERO_EXTEND, DL, EVLParamVT, Op); OpValues.push_back(Op); } - SDValue Result = DAG.getNode(Opcode, DL, VTs, OpValues); - setValue(&VPIntrin, Result); + switch (Opcode) { + default: { + SDValue Result = DAG.getNode(Opcode, DL, VTs, OpValues); + setValue(&VPIntrin, Result); + break; + } + case ISD::VP_LOAD: + case ISD::VP_GATHER: + visitVPLoadGather(VPIntrin, ValueVTs[0], OpValues, + Opcode == ISD::VP_GATHER); + break; + case ISD::VP_STORE: + case ISD::VP_SCATTER: + visitVPStoreScatter(VPIntrin, OpValues, Opcode == ISD::VP_SCATTER); + break; + } } SDValue SelectionDAGBuilder::lowerStartEH(SDValue Chain, @@ -7760,12 +7890,11 @@ bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) { // because the return pointer needs to be adjusted by the size of // the copied memory. SDValue Root = isVol ? getRoot() : getMemoryRoot(); - AAMDNodes AAInfo; - I.getAAMetadata(AAInfo); SDValue MC = DAG.getMemcpy(Root, sdl, Dst, Src, Size, Alignment, isVol, false, /*isTailCall=*/false, MachinePointerInfo(I.getArgOperand(0)), - MachinePointerInfo(I.getArgOperand(1)), AAInfo); + MachinePointerInfo(I.getArgOperand(1)), + I.getAAMetadata()); assert(MC.getNode() != nullptr && "** memcpy should not be lowered as TailCall in mempcpy context **"); DAG.setRoot(MC); @@ -7918,6 +8047,8 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { } if (Function *F = I.getCalledFunction()) { + diagnoseDontCall(I); + if (F->isDeclaration()) { // Is this an LLVM intrinsic or a target-specific intrinsic? unsigned IID = F->getIntrinsicID(); @@ -8176,7 +8307,7 @@ public: } } - return TLI.getValueType(DL, OpTy, true); + return TLI.getAsmOperandValueType(DL, OpTy, true); } }; @@ -8261,9 +8392,10 @@ static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location, /// /// OpInfo describes the operand /// RefOpInfo describes the matching operand if any, the operand otherwise -static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL, - SDISelAsmOperandInfo &OpInfo, - SDISelAsmOperandInfo &RefOpInfo) { +static llvm::Optional<unsigned> +getRegistersForValue(SelectionDAG &DAG, const SDLoc &DL, + SDISelAsmOperandInfo &OpInfo, + SDISelAsmOperandInfo &RefOpInfo) { LLVMContext &Context = *DAG.getContext(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -8273,7 +8405,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL, // No work to do for memory operations. if (OpInfo.ConstraintType == TargetLowering::C_Memory) - return; + return None; // If this is a constraint for a single physreg, or a constraint for a // register class, find it. @@ -8283,7 +8415,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL, &TRI, RefOpInfo.ConstraintCode, RefOpInfo.ConstraintVT); // RC is unset only on failure. Return immediately. if (!RC) - return; + return None; // Get the actual register value type. This is important, because the user // may have asked for (e.g.) the AX register in i32 type. We need to @@ -8328,7 +8460,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL, // No need to allocate a matching input constraint since the constraint it's // matching to has already been allocated. if (OpInfo.isMatchingInputConstraint()) - return; + return None; EVT ValueVT = OpInfo.ConstraintVT; if (OpInfo.ConstraintVT == MVT::Other) @@ -8351,8 +8483,12 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL, // Do not check for single registers. if (AssignedReg) { - for (; *I != AssignedReg; ++I) - assert(I != RC->end() && "AssignedReg should be member of RC"); + I = std::find(I, RC->end(), AssignedReg); + if (I == RC->end()) { + // RC does not contain the selected register, which indicates a + // mismatch between the register and the required type/bitwidth. + return {AssignedReg}; + } } for (; NumRegs; --NumRegs, ++I) { @@ -8362,6 +8498,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL, } OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT); + return None; } static unsigned @@ -8452,12 +8589,12 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, // Process the call argument. BasicBlocks are labels, currently appearing // only in asm's. if (isa<CallBrInst>(Call) && - ArgNo - 1 >= (cast<CallBrInst>(&Call)->getNumArgOperands() - + ArgNo - 1 >= (cast<CallBrInst>(&Call)->arg_size() - cast<CallBrInst>(&Call)->getNumIndirectDests() - NumMatchingOps) && (NumMatchingOps == 0 || - ArgNo - 1 < (cast<CallBrInst>(&Call)->getNumArgOperands() - - NumMatchingOps))) { + ArgNo - 1 < + (cast<CallBrInst>(&Call)->arg_size() - NumMatchingOps))) { const auto *BA = cast<BlockAddress>(OpInfo.CallOperandVal); EVT VT = TLI.getValueType(DAG.getDataLayout(), BA->getType(), true); OpInfo.CallOperand = DAG.getTargetBlockAddress(BA, VT); @@ -8479,8 +8616,8 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, DAG.getDataLayout(), STy->getElementType(ResNo)); } else { assert(ResNo == 0 && "Asm only has one result!"); - OpInfo.ConstraintVT = - TLI.getSimpleValueType(DAG.getDataLayout(), Call.getType()); + OpInfo.ConstraintVT = TLI.getAsmOperandValueType( + DAG.getDataLayout(), Call.getType()).getSimpleVT(); } ++ResNo; } else { @@ -8595,7 +8732,18 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, OpInfo.isMatchingInputConstraint() ? ConstraintOperands[OpInfo.getMatchedOperand()] : OpInfo; - GetRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo); + const auto RegError = + getRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo); + if (RegError.hasValue()) { + const MachineFunction &MF = DAG.getMachineFunction(); + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + const char *RegName = TRI.getName(RegError.getValue()); + emitInlineAsmError(Call, "register '" + Twine(RegName) + + "' allocated for constraint '" + + Twine(OpInfo.ConstraintCode) + + "' does not match required type"); + return; + } auto DetectWriteToReservedRegister = [&]() { const MachineFunction &MF = DAG.getMachineFunction(); @@ -8674,11 +8822,13 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, MachineFunction &MF = DAG.getMachineFunction(); MachineRegisterInfo &MRI = MF.getRegInfo(); const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); - RegisterSDNode *R = dyn_cast<RegisterSDNode>(AsmNodeOperands[CurOp+1]); + auto *R = cast<RegisterSDNode>(AsmNodeOperands[CurOp+1]); Register TiedReg = R->getReg(); MVT RegVT = R->getSimpleValueType(0); - const TargetRegisterClass *RC = TiedReg.isVirtual() ? - MRI.getRegClass(TiedReg) : TRI.getMinimalPhysRegClass(TiedReg); + const TargetRegisterClass *RC = + TiedReg.isVirtual() ? MRI.getRegClass(TiedReg) + : RegVT != MVT::Untyped ? TLI.getRegClassFor(RegVT) + : TRI.getMinimalPhysRegClass(TiedReg); unsigned NumRegs = InlineAsm::getNumOperandRegisters(OpFlag); for (unsigned i = 0; i != NumRegs; ++i) Regs.push_back(MRI.createVirtualRegister(RC)); @@ -9317,7 +9467,7 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I, const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Op1 = getValue(I.getArgOperand(0)); SDValue Op2; - if (I.getNumArgOperands() > 1) + if (I.arg_size() > 1) Op2 = getValue(I.getArgOperand(1)); SDLoc dl = getCurSDLoc(); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); @@ -9671,9 +9821,10 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { // if it isn't first piece, alignment must be 1 // For scalable vectors the scalable part is currently handled // by individual targets, so we just use the known minimum size here. - ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), VT, - i < CLI.NumFixedArgs, i, - j*Parts[j].getValueType().getStoreSize().getKnownMinSize()); + ISD::OutputArg MyFlags( + Flags, Parts[j].getValueType().getSimpleVT(), VT, + i < CLI.NumFixedArgs, i, + j * Parts[j].getValueType().getStoreSize().getKnownMinSize()); if (NumParts > 1 && j == 0) MyFlags.Flags.setSplit(); else if (j != 0) { @@ -9841,10 +9992,10 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { None); // This is not an ABI copy. SDValue Chain = DAG.getEntryNode(); - ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) == - FuncInfo.PreferredExtendType.end()) - ? ISD::ANY_EXTEND - : FuncInfo.PreferredExtendType[V]; + ISD::NodeType ExtendType = ISD::ANY_EXTEND; + auto PreferredExtendIt = FuncInfo.PreferredExtendType.find(V); + if (PreferredExtendIt != FuncInfo.PreferredExtendType.end()) + ExtendType = PreferredExtendIt->second; RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V, ExtendType); PendingExports.push_back(Chain); } @@ -10490,27 +10641,6 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { ConstantsOut.clear(); } -/// Add a successor MBB to ParentMBB< creating a new MachineBB for BB if SuccMBB -/// is 0. -MachineBasicBlock * -SelectionDAGBuilder::StackProtectorDescriptor:: -AddSuccessorMBB(const BasicBlock *BB, - MachineBasicBlock *ParentMBB, - bool IsLikely, - MachineBasicBlock *SuccMBB) { - // If SuccBB has not been created yet, create it. - if (!SuccMBB) { - MachineFunction *MF = ParentMBB->getParent(); - MachineFunction::iterator BBI(ParentMBB); - SuccMBB = MF->CreateMachineBasicBlock(BB); - MF->insert(++BBI, SuccMBB); - } - // Add it as a successor of ParentMBB. - ParentMBB->addSuccessor( - SuccMBB, BranchProbabilityInfo::getBranchProbStackProtector(IsLikely)); - return SuccMBB; -} - MachineBasicBlock *SelectionDAGBuilder::NextBlock(MachineBasicBlock *MBB) { MachineFunction::iterator I(MBB); if (++I == FuncInfo.MF->end()) @@ -10675,12 +10805,10 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, } } - if (FallthroughUnreachable) { - // Skip the range check if the fallthrough block is unreachable. - JTH->OmitRangeCheck = true; - } + if (FallthroughUnreachable) + JTH->FallthroughUnreachable = true; - if (!JTH->OmitRangeCheck) + if (!JTH->FallthroughUnreachable) addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb); addSuccessorWithProb(CurMBB, JumpMBB, JumpProb); CurMBB->normalizeSuccProbs(); @@ -10718,10 +10846,8 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, BTB->DefaultProb -= DefaultProb / 2; } - if (FallthroughUnreachable) { - // Skip the range check if the fallthrough block is unreachable. - BTB->OmitRangeCheck = true; - } + if (FallthroughUnreachable) + BTB->FallthroughUnreachable = true; // If we're in the right place, emit the bit test header right now. if (CurMBB == SwitchMBB) { diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index df5be156821f..d6122aa0a739 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -18,6 +18,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/CodeGenCommonISel.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/SwitchLoweringUtils.h" @@ -180,204 +181,6 @@ private: SwitchCG::CaseClusterVector &Clusters, BranchProbability &PeeledCaseProb); - /// A class which encapsulates all of the information needed to generate a - /// stack protector check and signals to isel via its state being initialized - /// that a stack protector needs to be generated. - /// - /// *NOTE* The following is a high level documentation of SelectionDAG Stack - /// Protector Generation. The reason that it is placed here is for a lack of - /// other good places to stick it. - /// - /// High Level Overview of SelectionDAG Stack Protector Generation: - /// - /// Previously, generation of stack protectors was done exclusively in the - /// pre-SelectionDAG Codegen LLVM IR Pass "Stack Protector". This necessitated - /// splitting basic blocks at the IR level to create the success/failure basic - /// blocks in the tail of the basic block in question. As a result of this, - /// calls that would have qualified for the sibling call optimization were no - /// longer eligible for optimization since said calls were no longer right in - /// the "tail position" (i.e. the immediate predecessor of a ReturnInst - /// instruction). - /// - /// Then it was noticed that since the sibling call optimization causes the - /// callee to reuse the caller's stack, if we could delay the generation of - /// the stack protector check until later in CodeGen after the sibling call - /// decision was made, we get both the tail call optimization and the stack - /// protector check! - /// - /// A few goals in solving this problem were: - /// - /// 1. Preserve the architecture independence of stack protector generation. - /// - /// 2. Preserve the normal IR level stack protector check for platforms like - /// OpenBSD for which we support platform-specific stack protector - /// generation. - /// - /// The main problem that guided the present solution is that one can not - /// solve this problem in an architecture independent manner at the IR level - /// only. This is because: - /// - /// 1. The decision on whether or not to perform a sibling call on certain - /// platforms (for instance i386) requires lower level information - /// related to available registers that can not be known at the IR level. - /// - /// 2. Even if the previous point were not true, the decision on whether to - /// perform a tail call is done in LowerCallTo in SelectionDAG which - /// occurs after the Stack Protector Pass. As a result, one would need to - /// put the relevant callinst into the stack protector check success - /// basic block (where the return inst is placed) and then move it back - /// later at SelectionDAG/MI time before the stack protector check if the - /// tail call optimization failed. The MI level option was nixed - /// immediately since it would require platform-specific pattern - /// matching. The SelectionDAG level option was nixed because - /// SelectionDAG only processes one IR level basic block at a time - /// implying one could not create a DAG Combine to move the callinst. - /// - /// To get around this problem a few things were realized: - /// - /// 1. While one can not handle multiple IR level basic blocks at the - /// SelectionDAG Level, one can generate multiple machine basic blocks - /// for one IR level basic block. This is how we handle bit tests and - /// switches. - /// - /// 2. At the MI level, tail calls are represented via a special return - /// MIInst called "tcreturn". Thus if we know the basic block in which we - /// wish to insert the stack protector check, we get the correct behavior - /// by always inserting the stack protector check right before the return - /// statement. This is a "magical transformation" since no matter where - /// the stack protector check intrinsic is, we always insert the stack - /// protector check code at the end of the BB. - /// - /// Given the aforementioned constraints, the following solution was devised: - /// - /// 1. On platforms that do not support SelectionDAG stack protector check - /// generation, allow for the normal IR level stack protector check - /// generation to continue. - /// - /// 2. On platforms that do support SelectionDAG stack protector check - /// generation: - /// - /// a. Use the IR level stack protector pass to decide if a stack - /// protector is required/which BB we insert the stack protector check - /// in by reusing the logic already therein. If we wish to generate a - /// stack protector check in a basic block, we place a special IR - /// intrinsic called llvm.stackprotectorcheck right before the BB's - /// returninst or if there is a callinst that could potentially be - /// sibling call optimized, before the call inst. - /// - /// b. Then when a BB with said intrinsic is processed, we codegen the BB - /// normally via SelectBasicBlock. In said process, when we visit the - /// stack protector check, we do not actually emit anything into the - /// BB. Instead, we just initialize the stack protector descriptor - /// class (which involves stashing information/creating the success - /// mbbb and the failure mbb if we have not created one for this - /// function yet) and export the guard variable that we are going to - /// compare. - /// - /// c. After we finish selecting the basic block, in FinishBasicBlock if - /// the StackProtectorDescriptor attached to the SelectionDAGBuilder is - /// initialized, we produce the validation code with one of these - /// techniques: - /// 1) with a call to a guard check function - /// 2) with inlined instrumentation - /// - /// 1) We insert a call to the check function before the terminator. - /// - /// 2) We first find a splice point in the parent basic block - /// before the terminator and then splice the terminator of said basic - /// block into the success basic block. Then we code-gen a new tail for - /// the parent basic block consisting of the two loads, the comparison, - /// and finally two branches to the success/failure basic blocks. We - /// conclude by code-gening the failure basic block if we have not - /// code-gened it already (all stack protector checks we generate in - /// the same function, use the same failure basic block). - class StackProtectorDescriptor { - public: - StackProtectorDescriptor() = default; - - /// Returns true if all fields of the stack protector descriptor are - /// initialized implying that we should/are ready to emit a stack protector. - bool shouldEmitStackProtector() const { - return ParentMBB && SuccessMBB && FailureMBB; - } - - bool shouldEmitFunctionBasedCheckStackProtector() const { - return ParentMBB && !SuccessMBB && !FailureMBB; - } - - /// Initialize the stack protector descriptor structure for a new basic - /// block. - void initialize(const BasicBlock *BB, MachineBasicBlock *MBB, - bool FunctionBasedInstrumentation) { - // Make sure we are not initialized yet. - assert(!shouldEmitStackProtector() && "Stack Protector Descriptor is " - "already initialized!"); - ParentMBB = MBB; - if (!FunctionBasedInstrumentation) { - SuccessMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ true); - FailureMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ false, FailureMBB); - } - } - - /// Reset state that changes when we handle different basic blocks. - /// - /// This currently includes: - /// - /// 1. The specific basic block we are generating a - /// stack protector for (ParentMBB). - /// - /// 2. The successor machine basic block that will contain the tail of - /// parent mbb after we create the stack protector check (SuccessMBB). This - /// BB is visited only on stack protector check success. - void resetPerBBState() { - ParentMBB = nullptr; - SuccessMBB = nullptr; - } - - /// Reset state that only changes when we switch functions. - /// - /// This currently includes: - /// - /// 1. FailureMBB since we reuse the failure code path for all stack - /// protector checks created in an individual function. - /// - /// 2.The guard variable since the guard variable we are checking against is - /// always the same. - void resetPerFunctionState() { - FailureMBB = nullptr; - } - - MachineBasicBlock *getParentMBB() { return ParentMBB; } - MachineBasicBlock *getSuccessMBB() { return SuccessMBB; } - MachineBasicBlock *getFailureMBB() { return FailureMBB; } - - private: - /// The basic block for which we are generating the stack protector. - /// - /// As a result of stack protector generation, we will splice the - /// terminators of this basic block into the successor mbb SuccessMBB and - /// replace it with a compare/branch to the successor mbbs - /// SuccessMBB/FailureMBB depending on whether or not the stack protector - /// was violated. - MachineBasicBlock *ParentMBB = nullptr; - - /// A basic block visited on stack protector check success that contains the - /// terminators of ParentMBB. - MachineBasicBlock *SuccessMBB = nullptr; - - /// This basic block visited on stack protector check failure that will - /// contain a call to __stack_chk_fail(). - MachineBasicBlock *FailureMBB = nullptr; - - /// Add a successor machine basic block to ParentMBB. If the successor mbb - /// has not been created yet (i.e. if SuccMBB = 0), then the machine basic - /// block will be created. Assign a large weight if IsLikely is true. - MachineBasicBlock *AddSuccessorMBB(const BasicBlock *BB, - MachineBasicBlock *ParentMBB, - bool IsLikely, - MachineBasicBlock *SuccMBB = nullptr); - }; - private: const TargetMachine &TM; @@ -764,6 +567,10 @@ private: void visitIntrinsicCall(const CallInst &I, unsigned Intrinsic); void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic); void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI); + void visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT, + SmallVector<SDValue, 7> &OpValues, bool isGather); + void visitVPStoreScatter(const VPIntrinsic &VPIntrin, + SmallVector<SDValue, 7> &OpValues, bool isScatter); void visitVectorPredicationIntrinsic(const VPIntrinsic &VPIntrin); void visitVAStart(const CallInst &I); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 40083c614a6c..77e9e53668f9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -146,9 +146,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue(); if (IID < Intrinsic::num_intrinsics) return Intrinsic::getBaseName((Intrinsic::ID)IID).str(); - else if (!G) + if (!G) return "Unknown intrinsic"; - else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo()) + if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo()) return TII->getName(IID); llvm_unreachable("Invalid intrinsic ID"); } @@ -526,13 +526,13 @@ static void printMemOperand(raw_ostream &OS, const MachineMemOperand &MMO, if (G) { const MachineFunction *MF = &G->getMachineFunction(); return printMemOperand(OS, MMO, MF, MF->getFunction().getParent(), - &MF->getFrameInfo(), G->getSubtarget().getInstrInfo(), - *G->getContext()); - } else { - LLVMContext Ctx; - return printMemOperand(OS, MMO, /*MF=*/nullptr, /*M=*/nullptr, - /*MFI=*/nullptr, /*TII=*/nullptr, Ctx); + &MF->getFrameInfo(), + G->getSubtarget().getInstrInfo(), *G->getContext()); } + + LLVMContext Ctx; + return printMemOperand(OS, MMO, /*MF=*/nullptr, /*M=*/nullptr, + /*MFI=*/nullptr, /*TII=*/nullptr, Ctx); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -948,17 +948,19 @@ static bool printOperand(raw_ostream &OS, const SelectionDAG *G, if (!Value.getNode()) { OS << "<null>"; return false; - } else if (shouldPrintInline(*Value.getNode(), G)) { + } + + if (shouldPrintInline(*Value.getNode(), G)) { OS << Value->getOperationName(G) << ':'; Value->print_types(OS, G); Value->print_details(OS, G); return true; - } else { - OS << PrintNodeId(*Value.getNode()); - if (unsigned RN = Value.getResNo()) - OS << ':' << RN; - return false; } + + OS << PrintNodeId(*Value.getNode()); + if (unsigned RN = Value.getResNo()) + OS << ':' << RN; + return false; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -1012,15 +1014,12 @@ static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N, N->print(OS, G); - if (depth < 1) - return; - for (const SDValue &Op : N->op_values()) { // Don't follow chain operands. if (Op.getValueType() == MVT::Other) continue; OS << '\n'; - printrWithDepthHelper(OS, Op.getNode(), G, depth-1, indent+2); + printrWithDepthHelper(OS, Op.getNode(), G, depth - 1, indent + 2); } } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 1415cce3b1df..c7e37cf8ca14 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -33,6 +33,7 @@ #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/CodeGenCommonISel.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" @@ -575,7 +576,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { LiveInMap.insert(LI); // Insert DBG_VALUE instructions for function arguments to the entry block. - bool InstrRef = TM.Options.ValueTrackingVariableLocations; + bool InstrRef = MF->useDebugInstrRef(); for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) { MachineInstr *MI = FuncInfo->ArgDbgValues[e - i - 1]; assert(MI->getOpcode() != TargetOpcode::DBG_VALUE_LIST && @@ -699,7 +700,7 @@ static void reportFastISelFailure(MachineFunction &MF, R << (" (in function: " + MF.getName() + ")").str(); if (ShouldAbort) - report_fatal_error(R.getMsg()); + report_fatal_error(Twine(R.getMsg())); ORE.emit(R); } @@ -798,7 +799,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { #ifndef NDEBUG if (TTI.hasBranchDivergence()) - CurDAG->VerifyDAGDiverence(); + CurDAG->VerifyDAGDivergence(); #endif if (ViewDAGCombine1 && MatchFilterBB) @@ -818,7 +819,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { #ifndef NDEBUG if (TTI.hasBranchDivergence()) - CurDAG->VerifyDAGDiverence(); + CurDAG->VerifyDAGDivergence(); #endif // Second step, hack on the DAG until it only uses operations and types that @@ -840,7 +841,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { #ifndef NDEBUG if (TTI.hasBranchDivergence()) - CurDAG->VerifyDAGDiverence(); + CurDAG->VerifyDAGDivergence(); #endif // Only allow creation of legal node types. @@ -864,7 +865,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { #ifndef NDEBUG if (TTI.hasBranchDivergence()) - CurDAG->VerifyDAGDiverence(); + CurDAG->VerifyDAGDivergence(); #endif } @@ -882,7 +883,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { #ifndef NDEBUG if (TTI.hasBranchDivergence()) - CurDAG->VerifyDAGDiverence(); + CurDAG->VerifyDAGDivergence(); #endif { @@ -898,7 +899,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { #ifndef NDEBUG if (TTI.hasBranchDivergence()) - CurDAG->VerifyDAGDiverence(); + CurDAG->VerifyDAGDivergence(); #endif if (ViewDAGCombineLT && MatchFilterBB) @@ -918,7 +919,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { #ifndef NDEBUG if (TTI.hasBranchDivergence()) - CurDAG->VerifyDAGDiverence(); + CurDAG->VerifyDAGDivergence(); #endif } @@ -938,7 +939,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { #ifndef NDEBUG if (TTI.hasBranchDivergence()) - CurDAG->VerifyDAGDiverence(); + CurDAG->VerifyDAGDivergence(); #endif if (ViewDAGCombine2 && MatchFilterBB) @@ -958,7 +959,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { #ifndef NDEBUG if (TTI.hasBranchDivergence()) - CurDAG->VerifyDAGDiverence(); + CurDAG->VerifyDAGDivergence(); #endif if (OptLevel != CodeGenOpt::None) @@ -1045,25 +1046,25 @@ public: } // end anonymous namespace // This function is used to enforce the topological node id property -// property leveraged during Instruction selection. Before selection all -// nodes are given a non-negative id such that all nodes have a larger id than +// leveraged during instruction selection. Before the selection process all +// nodes are given a non-negative id such that all nodes have a greater id than // their operands. As this holds transitively we can prune checks that a node N // is a predecessor of M another by not recursively checking through M's -// operands if N's ID is larger than M's ID. This is significantly improves -// performance of for various legality checks (e.g. IsLegalToFold / -// UpdateChains). +// operands if N's ID is larger than M's ID. This significantly improves +// performance of various legality checks (e.g. IsLegalToFold / UpdateChains). -// However, when we fuse multiple nodes into a single node -// during selection we may induce a predecessor relationship between inputs and -// outputs of distinct nodes being merged violating the topological property. -// Should a fused node have a successor which has yet to be selected, our -// legality checks would be incorrect. To avoid this we mark all unselected -// sucessor nodes, i.e. id != -1 as invalid for pruning by bit-negating (x => +// However, when we fuse multiple nodes into a single node during the +// selection we may induce a predecessor relationship between inputs and +// outputs of distinct nodes being merged, violating the topological property. +// Should a fused node have a successor which has yet to be selected, +// our legality checks would be incorrect. To avoid this we mark all unselected +// successor nodes, i.e. id != -1, as invalid for pruning by bit-negating (x => // (-(x+1))) the ids and modify our pruning check to ignore negative Ids of M. // We use bit-negation to more clearly enforce that node id -1 can only be -// achieved by selected nodes). As the conversion is reversable the original Id, -// topological pruning can still be leveraged when looking for unselected nodes. -// This method is call internally in all ISel replacement calls. +// achieved by selected nodes. As the conversion is reversable to the original +// Id, topological pruning can still be leveraged when looking for unselected +// nodes. This method is called internally in all ISel replacement related +// functions. void SelectionDAGISel::EnforceNodeIdInvariant(SDNode *Node) { SmallVector<SDNode *, 4> Nodes; Nodes.push_back(Node); @@ -1080,7 +1081,7 @@ void SelectionDAGISel::EnforceNodeIdInvariant(SDNode *Node) { } } -// InvalidateNodeId - As discusses in EnforceNodeIdInvariant, mark a +// InvalidateNodeId - As explained in EnforceNodeIdInvariant, mark a // NodeId with the equivalent node id which is invalid for topological // pruning. void SelectionDAGISel::InvalidateNodeId(SDNode *N) { @@ -1226,7 +1227,10 @@ static void mapWasmLandingPadIndex(MachineBasicBlock *MBB, bool IsSingleCatchAllClause = CPI->getNumArgOperands() == 1 && cast<Constant>(CPI->getArgOperand(0))->isNullValue(); - if (!IsSingleCatchAllClause) { + // cathchpads for longjmp use an empty type list, e.g. catchpad within %0 [] + // and they don't need LSDA info + bool IsCatchLongjmp = CPI->getNumArgOperands() == 0; + if (!IsSingleCatchAllClause && !IsCatchLongjmp) { // Create a mapping from landing pad label to landing pad index. bool IntrFound = false; for (const User *U : CPI->users()) { @@ -1644,114 +1648,6 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { SDB->SPDescriptor.resetPerFunctionState(); } -/// Given that the input MI is before a partial terminator sequence TSeq, return -/// true if M + TSeq also a partial terminator sequence. -/// -/// A Terminator sequence is a sequence of MachineInstrs which at this point in -/// lowering copy vregs into physical registers, which are then passed into -/// terminator instructors so we can satisfy ABI constraints. A partial -/// terminator sequence is an improper subset of a terminator sequence (i.e. it -/// may be the whole terminator sequence). -static bool MIIsInTerminatorSequence(const MachineInstr &MI) { - // If we do not have a copy or an implicit def, we return true if and only if - // MI is a debug value. - if (!MI.isCopy() && !MI.isImplicitDef()) - // Sometimes DBG_VALUE MI sneak in between the copies from the vregs to the - // physical registers if there is debug info associated with the terminator - // of our mbb. We want to include said debug info in our terminator - // sequence, so we return true in that case. - return MI.isDebugValue(); - - // We have left the terminator sequence if we are not doing one of the - // following: - // - // 1. Copying a vreg into a physical register. - // 2. Copying a vreg into a vreg. - // 3. Defining a register via an implicit def. - - // OPI should always be a register definition... - MachineInstr::const_mop_iterator OPI = MI.operands_begin(); - if (!OPI->isReg() || !OPI->isDef()) - return false; - - // Defining any register via an implicit def is always ok. - if (MI.isImplicitDef()) - return true; - - // Grab the copy source... - MachineInstr::const_mop_iterator OPI2 = OPI; - ++OPI2; - assert(OPI2 != MI.operands_end() - && "Should have a copy implying we should have 2 arguments."); - - // Make sure that the copy dest is not a vreg when the copy source is a - // physical register. - if (!OPI2->isReg() || (!Register::isPhysicalRegister(OPI->getReg()) && - Register::isPhysicalRegister(OPI2->getReg()))) - return false; - - return true; -} - -/// Find the split point at which to splice the end of BB into its success stack -/// protector check machine basic block. -/// -/// On many platforms, due to ABI constraints, terminators, even before register -/// allocation, use physical registers. This creates an issue for us since -/// physical registers at this point can not travel across basic -/// blocks. Luckily, selectiondag always moves physical registers into vregs -/// when they enter functions and moves them through a sequence of copies back -/// into the physical registers right before the terminator creating a -/// ``Terminator Sequence''. This function is searching for the beginning of the -/// terminator sequence so that we can ensure that we splice off not just the -/// terminator, but additionally the copies that move the vregs into the -/// physical registers. -static MachineBasicBlock::iterator -FindSplitPointForStackProtector(MachineBasicBlock *BB, - const TargetInstrInfo &TII) { - MachineBasicBlock::iterator SplitPoint = BB->getFirstTerminator(); - if (SplitPoint == BB->begin()) - return SplitPoint; - - MachineBasicBlock::iterator Start = BB->begin(); - MachineBasicBlock::iterator Previous = SplitPoint; - --Previous; - - if (TII.isTailCall(*SplitPoint) && - Previous->getOpcode() == TII.getCallFrameDestroyOpcode()) { - // call itself, then we must insert before the sequence even starts. For - // example: - // <split point> - // ADJCALLSTACKDOWN ... - // <Moves> - // ADJCALLSTACKUP ... - // TAILJMP somewhere - // On the other hand, it could be an unrelated call in which case this tail call - // has to register moves of its own and should be the split point. For example: - // ADJCALLSTACKDOWN - // CALL something_else - // ADJCALLSTACKUP - // <split point> - // TAILJMP somewhere - do { - --Previous; - if (Previous->isCall()) - return SplitPoint; - } while(Previous->getOpcode() != TII.getCallFrameSetupOpcode()); - - return Previous; - } - - while (MIIsInTerminatorSequence(*Previous)) { - SplitPoint = Previous; - if (Previous == Start) - break; - --Previous; - } - - return SplitPoint; -} - void SelectionDAGISel::FinishBasicBlock() { LLVM_DEBUG(dbgs() << "Total amount of phi nodes to update: " @@ -1781,7 +1677,7 @@ SelectionDAGISel::FinishBasicBlock() { // Add load and check to the basicblock. FuncInfo->MBB = ParentMBB; FuncInfo->InsertPt = - FindSplitPointForStackProtector(ParentMBB, *TII); + findSplitPointForStackProtector(ParentMBB, *TII); SDB->visitSPDescriptorParent(SDB->SPDescriptor, ParentMBB); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); @@ -1800,7 +1696,7 @@ SelectionDAGISel::FinishBasicBlock() { // register allocation issues caused by us splitting the parent mbb. The // register allocator will clean up said virtual copies later on. MachineBasicBlock::iterator SplitPoint = - FindSplitPointForStackProtector(ParentMBB, *TII); + findSplitPointForStackProtector(ParentMBB, *TII); // Splice the terminator of ParentMBB into SuccessMBB. SuccessMBB->splice(SuccessMBB->end(), ParentMBB, @@ -1861,9 +1757,9 @@ SelectionDAGISel::FinishBasicBlock() { // test, and delete the last bit test. MachineBasicBlock *NextMBB; - if (BTB.ContiguousRange && j + 2 == ej) { - // Second-to-last bit-test with contiguous range: fall through to the - // target of the final bit test. + if ((BTB.ContiguousRange || BTB.FallthroughUnreachable) && j + 2 == ej) { + // Second-to-last bit-test with contiguous range or omitted range + // check: fall through to the target of the final bit test. NextMBB = BTB.Cases[j + 1].TargetBB; } else if (j + 1 == ej) { // For the last bit test, fall through to Default. @@ -1880,7 +1776,7 @@ SelectionDAGISel::FinishBasicBlock() { SDB->clear(); CodeGenAndEmitDAG(); - if (BTB.ContiguousRange && j + 2 == ej) { + if ((BTB.ContiguousRange || BTB.FallthroughUnreachable) && j + 2 == ej) { // Since we're not going to use the final bit test, remove it. BTB.Cases.pop_back(); break; @@ -3800,7 +3696,7 @@ void SelectionDAGISel::CannotYetSelect(SDNode *N) { else Msg << "unknown intrinsic #" << iid; } - report_fatal_error(Msg.str()); + report_fatal_error(Twine(Msg.str())); } char SelectionDAGISel::ID = 0; diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index a903c2401264..e2db9633bfb9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -1119,7 +1119,7 @@ void SelectionDAGBuilder::LowerCallSiteWithDeoptBundleImpl( StatepointLoweringInfo SI(DAG); unsigned ArgBeginIndex = Call->arg_begin() - Call->op_begin(); populateCallLoweringInfo( - SI.CLI, Call, ArgBeginIndex, Call->getNumArgOperands(), Callee, + SI.CLI, Call, ArgBeginIndex, Call->arg_size(), Callee, ForceVoidReturnTy ? Type::getVoidTy(*DAG.getContext()) : Call->getType(), false); if (!VarArgDisallowed) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 1c1dae8f953f..e4a69adff05b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -26,6 +26,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" +#include "llvm/Support/DivisionByConstantInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" @@ -537,7 +538,7 @@ bool TargetLowering::ShrinkDemandedConstant(SDValue Op, TargetLoweringOpt &TLO) const { EVT VT = Op.getValueType(); APInt DemandedElts = VT.isVector() - ? APInt::getAllOnesValue(VT.getVectorNumElements()) + ? APInt::getAllOnes(VT.getVectorNumElements()) : APInt(1, 1); return ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO); } @@ -621,7 +622,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, } APInt DemandedElts = VT.isVector() - ? APInt::getAllOnesValue(VT.getVectorNumElements()) + ? APInt::getAllOnes(VT.getVectorNumElements()) : APInt(1, 1); return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth, AssumeSingleUse); @@ -667,12 +668,12 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( DAG.getDataLayout().isLittleEndian()) { unsigned Scale = NumDstEltBits / NumSrcEltBits; unsigned NumSrcElts = SrcVT.getVectorNumElements(); - APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits); - APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts); + APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits); + APInt DemandedSrcElts = APInt::getZero(NumSrcElts); for (unsigned i = 0; i != Scale; ++i) { unsigned Offset = i * NumSrcEltBits; APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset); - if (!Sub.isNullValue()) { + if (!Sub.isZero()) { DemandedSrcBits |= Sub; for (unsigned j = 0; j != NumElts; ++j) if (DemandedElts[j]) @@ -690,8 +691,8 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( DAG.getDataLayout().isLittleEndian()) { unsigned Scale = NumSrcEltBits / NumDstEltBits; unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; - APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits); - APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts); + APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits); + APInt DemandedSrcElts = APInt::getZero(NumSrcElts); for (unsigned i = 0; i != NumElts; ++i) if (DemandedElts[i]) { unsigned Offset = (i % Scale) * NumDstEltBits; @@ -819,13 +820,21 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( break; } case ISD::INSERT_SUBVECTOR: { - // If we don't demand the inserted subvector, return the base vector. SDValue Vec = Op.getOperand(0); SDValue Sub = Op.getOperand(1); uint64_t Idx = Op.getConstantOperandVal(2); unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); - if (DemandedElts.extractBits(NumSubElts, Idx) == 0) + APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); + // If we don't demand the inserted subvector, return the base vector. + if (DemandedSubElts == 0) return Vec; + // If this simply widens the lowest subvector, see if we can do it earlier. + if (Idx == 0 && Vec.isUndef()) { + if (SDValue NewSub = SimplifyMultipleUseDemandedBits( + Sub, DemandedBits, DemandedSubElts, DAG, Depth + 1)) + return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), + Op.getOperand(0), NewSub, Op.getOperand(2)); + } break; } case ISD::VECTOR_SHUFFLE: { @@ -866,7 +875,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( unsigned Depth) const { EVT VT = Op.getValueType(); APInt DemandedElts = VT.isVector() - ? APInt::getAllOnesValue(VT.getVectorNumElements()) + ? APInt::getAllOnes(VT.getVectorNumElements()) : APInt(1, 1); return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG, Depth); @@ -875,7 +884,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( SDValue TargetLowering::SimplifyMultipleUseDemandedVectorElts( SDValue Op, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth) const { - APInt DemandedBits = APInt::getAllOnesValue(Op.getScalarValueSizeInBits()); + APInt DemandedBits = APInt::getAllOnes(Op.getScalarValueSizeInBits()); return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG, Depth); } @@ -942,8 +951,8 @@ bool TargetLowering::SimplifyDemandedBits( } // If this is the root being simplified, allow it to have multiple uses, // just set the DemandedBits/Elts to all bits. - DemandedBits = APInt::getAllOnesValue(BitWidth); - DemandedElts = APInt::getAllOnesValue(NumElts); + DemandedBits = APInt::getAllOnes(BitWidth); + DemandedElts = APInt::getAllOnes(NumElts); } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) { // Not demanding any bits/elts from Op. return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); @@ -1038,7 +1047,7 @@ bool TargetLowering::SimplifyDemandedBits( unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); APInt DemandedSrcElts = DemandedElts; - DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx); + DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx); KnownBits KnownSub, KnownSrc; if (SimplifyDemandedBits(Sub, DemandedBits, DemandedSubElts, KnownSub, TLO, @@ -1056,8 +1065,8 @@ bool TargetLowering::SimplifyDemandedBits( Known = KnownBits::commonBits(Known, KnownSrc); // Attempt to avoid multi-use src if we don't need anything from it. - if (!DemandedBits.isAllOnesValue() || !DemandedSubElts.isAllOnesValue() || - !DemandedSrcElts.isAllOnesValue()) { + if (!DemandedBits.isAllOnes() || !DemandedSubElts.isAllOnes() || + !DemandedSrcElts.isAllOnes()) { SDValue NewSub = SimplifyMultipleUseDemandedBits( Sub, DemandedBits, DemandedSubElts, TLO.DAG, Depth + 1); SDValue NewSrc = SimplifyMultipleUseDemandedBits( @@ -1086,7 +1095,7 @@ bool TargetLowering::SimplifyDemandedBits( return true; // Attempt to avoid multi-use src if we don't need anything from it. - if (!DemandedBits.isAllOnesValue() || !DemandedSrcElts.isAllOnesValue()) { + if (!DemandedBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) { SDValue DemandedSrc = SimplifyMultipleUseDemandedBits( Src, DemandedBits, DemandedSrcElts, TLO.DAG, Depth + 1); if (DemandedSrc) { @@ -1216,7 +1225,7 @@ bool TargetLowering::SimplifyDemandedBits( assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); // Attempt to avoid multi-use ops if we don't need anything from them. - if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { + if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) { SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( @@ -1263,7 +1272,7 @@ bool TargetLowering::SimplifyDemandedBits( assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); // Attempt to avoid multi-use ops if we don't need anything from them. - if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { + if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) { SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( @@ -1306,7 +1315,7 @@ bool TargetLowering::SimplifyDemandedBits( assert(!Known2.hasConflict() && "Bits known to be one AND zero?"); // Attempt to avoid multi-use ops if we don't need anything from them. - if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { + if (!DemandedBits.isAllOnes() || !DemandedElts.isAllOnes()) { SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1); SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( @@ -1351,8 +1360,7 @@ bool TargetLowering::SimplifyDemandedBits( // If the RHS is a constant, see if we can change it. Don't alter a -1 // constant because that's a 'not' op, and that is better for combining // and codegen. - if (!C->isAllOnesValue() && - DemandedBits.isSubsetOf(C->getAPIntValue())) { + if (!C->isAllOnes() && DemandedBits.isSubsetOf(C->getAPIntValue())) { // We're flipping all demanded bits. Flip the undemanded bits too. SDValue New = TLO.DAG.getNOT(dl, Op0, VT); return TLO.CombineTo(Op, New); @@ -1360,7 +1368,7 @@ bool TargetLowering::SimplifyDemandedBits( } // If we can't turn this into a 'not', try to shrink the constant. - if (!C || !C->isAllOnesValue()) + if (!C || !C->isAllOnes()) if (ShrinkDemandedConstant(Op, DemandedBits, DemandedElts, TLO)) return true; @@ -1605,7 +1613,7 @@ bool TargetLowering::SimplifyDemandedBits( // always convert this into a logical shr, even if the shift amount is // variable. The low bit of the shift cannot be an input sign bit unless // the shift amount is >= the size of the datatype, which is undefined. - if (DemandedBits.isOneValue()) + if (DemandedBits.isOne()) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1)); if (const APInt *SA = @@ -1655,7 +1663,7 @@ bool TargetLowering::SimplifyDemandedBits( Known.One.setHighBits(ShAmt); // Attempt to avoid multi-use ops if we don't need anything from them. - if (!InDemandedMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { + if (!InDemandedMask.isAllOnes() || !DemandedElts.isAllOnes()) { SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1); if (DemandedOp0) { @@ -1781,7 +1789,7 @@ bool TargetLowering::SimplifyDemandedBits( // If only 1 bit is demanded, replace with PARITY as long as we're before // op legalization. // FIXME: Limit to scalars for now. - if (DemandedBits.isOneValue() && !TLO.LegalOps && !VT.isVector()) + if (DemandedBits.isOne() && !TLO.LegalOps && !VT.isVector()) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT, Op.getOperand(0))); @@ -1795,9 +1803,9 @@ bool TargetLowering::SimplifyDemandedBits( // If we only care about the highest bit, don't bother shifting right. if (DemandedBits.isSignMask()) { - unsigned NumSignBits = - TLO.DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1); - bool AlreadySignExtended = NumSignBits >= BitWidth - ExVTBits + 1; + unsigned MinSignedBits = + TLO.DAG.ComputeMinSignedBits(Op0, DemandedElts, Depth + 1); + bool AlreadySignExtended = ExVTBits >= MinSignedBits; // However if the input is already sign extended we expect the sign // extension to be dropped altogether later and do not simplify. if (!AlreadySignExtended) { @@ -2071,7 +2079,7 @@ bool TargetLowering::SimplifyDemandedBits( // Demand the bits from every vector element without a constant index. unsigned NumSrcElts = SrcEltCnt.getFixedValue(); - APInt DemandedSrcElts = APInt::getAllOnesValue(NumSrcElts); + APInt DemandedSrcElts = APInt::getAllOnes(NumSrcElts); if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx)) if (CIdx->getAPIntValue().ult(NumSrcElts)) DemandedSrcElts = APInt::getOneBitSet(NumSrcElts, CIdx->getZExtValue()); @@ -2087,8 +2095,7 @@ bool TargetLowering::SimplifyDemandedBits( return true; // Attempt to avoid multi-use ops if we don't need anything from them. - if (!DemandedSrcBits.isAllOnesValue() || - !DemandedSrcElts.isAllOnesValue()) { + if (!DemandedSrcBits.isAllOnes() || !DemandedSrcElts.isAllOnes()) { if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits( Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) { SDValue NewOp = @@ -2138,12 +2145,12 @@ bool TargetLowering::SimplifyDemandedBits( TLO.DAG.getDataLayout().isLittleEndian()) { unsigned Scale = BitWidth / NumSrcEltBits; unsigned NumSrcElts = SrcVT.getVectorNumElements(); - APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits); - APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts); + APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits); + APInt DemandedSrcElts = APInt::getZero(NumSrcElts); for (unsigned i = 0; i != Scale; ++i) { unsigned Offset = i * NumSrcEltBits; APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset); - if (!Sub.isNullValue()) { + if (!Sub.isZero()) { DemandedSrcBits |= Sub; for (unsigned j = 0; j != NumElts; ++j) if (DemandedElts[j]) @@ -2164,8 +2171,8 @@ bool TargetLowering::SimplifyDemandedBits( TLO.DAG.getDataLayout().isLittleEndian()) { unsigned Scale = NumSrcEltBits / BitWidth; unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; - APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits); - APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts); + APInt DemandedSrcBits = APInt::getZero(NumSrcEltBits); + APInt DemandedSrcElts = APInt::getZero(NumSrcElts); for (unsigned i = 0; i != NumElts; ++i) if (DemandedElts[i]) { unsigned Offset = (i % Scale) * BitWidth; @@ -2222,7 +2229,7 @@ bool TargetLowering::SimplifyDemandedBits( } // Attempt to avoid multi-use ops if we don't need anything from them. - if (!LoMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { + if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) { SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1); SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( @@ -2245,8 +2252,8 @@ bool TargetLowering::SimplifyDemandedBits( // is probably not useful (and could be detrimental). ConstantSDNode *C = isConstOrConstSplat(Op1); APInt HighMask = APInt::getHighBitsSet(BitWidth, DemandedBitsLZ); - if (C && !C->isAllOnesValue() && !C->isOne() && - (C->getAPIntValue() | HighMask).isAllOnesValue()) { + if (C && !C->isAllOnes() && !C->isOne() && + (C->getAPIntValue() | HighMask).isAllOnes()) { SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT); // Disable the nsw and nuw flags. We can no longer guarantee that we // won't wrap after simplification. @@ -2344,7 +2351,7 @@ static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG, return SDValue(); }; - APInt KnownUndef = APInt::getNullValue(NumElts); + APInt KnownUndef = APInt::getZero(NumElts); for (unsigned i = 0; i != NumElts; ++i) { // If both inputs for this element are either constant or undef and match // the element type, compute the constant/undef result for this element of @@ -2371,7 +2378,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( unsigned NumElts = DemandedElts.getBitWidth(); assert(VT.isVector() && "Expected vector op"); - KnownUndef = KnownZero = APInt::getNullValue(NumElts); + KnownUndef = KnownZero = APInt::getZero(NumElts); // TODO: For now we assume we know nothing about scalable vectors. if (VT.isScalableVector()) @@ -2463,17 +2470,13 @@ bool TargetLowering::SimplifyDemandedVectorElts( return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero, TLO, Depth + 1); - APInt SrcZero, SrcUndef; - APInt SrcDemandedElts = APInt::getNullValue(NumSrcElts); + APInt SrcDemandedElts, SrcZero, SrcUndef; // Bitcast from 'large element' src vector to 'small element' vector, we // must demand a source element if any DemandedElt maps to it. if ((NumElts % NumSrcElts) == 0) { unsigned Scale = NumElts / NumSrcElts; - for (unsigned i = 0; i != NumElts; ++i) - if (DemandedElts[i]) - SrcDemandedElts.setBit(i / Scale); - + SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts); if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero, TLO, Depth + 1)) return true; @@ -2483,7 +2486,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( // TODO - bigendian once we have test coverage. if (TLO.DAG.getDataLayout().isLittleEndian()) { unsigned SrcEltSizeInBits = SrcVT.getScalarSizeInBits(); - APInt SrcDemandedBits = APInt::getNullValue(SrcEltSizeInBits); + APInt SrcDemandedBits = APInt::getZero(SrcEltSizeInBits); for (unsigned i = 0; i != NumElts; ++i) if (DemandedElts[i]) { unsigned Ofs = (i % Scale) * EltSizeInBits; @@ -2513,10 +2516,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( // of this vector. if ((NumSrcElts % NumElts) == 0) { unsigned Scale = NumSrcElts / NumElts; - for (unsigned i = 0; i != NumElts; ++i) - if (DemandedElts[i]) - SrcDemandedElts.setBits(i * Scale, (i + 1) * Scale); - + SrcDemandedElts = APIntOps::ScaleBitMask(DemandedElts, NumSrcElts); if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero, TLO, Depth + 1)) return true; @@ -2525,9 +2525,9 @@ bool TargetLowering::SimplifyDemandedVectorElts( // the output element will be as well, assuming it was demanded. for (unsigned i = 0; i != NumElts; ++i) { if (DemandedElts[i]) { - if (SrcZero.extractBits(Scale, i * Scale).isAllOnesValue()) + if (SrcZero.extractBits(Scale, i * Scale).isAllOnes()) KnownZero.setBit(i); - if (SrcUndef.extractBits(Scale, i * Scale).isAllOnesValue()) + if (SrcUndef.extractBits(Scale, i * Scale).isAllOnes()) KnownUndef.setBit(i); } } @@ -2536,7 +2536,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( } case ISD::BUILD_VECTOR: { // Check all elements and simplify any unused elements with UNDEF. - if (!DemandedElts.isAllOnesValue()) { + if (!DemandedElts.isAllOnes()) { // Don't simplify BROADCASTS. if (llvm::any_of(Op->op_values(), [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) { @@ -2589,7 +2589,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx); APInt DemandedSrcElts = DemandedElts; - DemandedSrcElts.insertBits(APInt::getNullValue(NumSubElts), Idx); + DemandedSrcElts.insertBits(APInt::getZero(NumSubElts), Idx); APInt SubUndef, SubZero; if (SimplifyDemandedVectorElts(Sub, DemandedSubElts, SubUndef, SubZero, TLO, @@ -2609,8 +2609,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( KnownZero.insertBits(SubZero, Idx); // Attempt to avoid multi-use ops if we don't need anything from them. - if (!DemandedSrcElts.isAllOnesValue() || - !DemandedSubElts.isAllOnesValue()) { + if (!DemandedSrcElts.isAllOnes() || !DemandedSubElts.isAllOnes()) { SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts( Src, DemandedSrcElts, TLO.DAG, Depth + 1); SDValue NewSub = SimplifyMultipleUseDemandedVectorElts( @@ -2642,7 +2641,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( KnownZero = SrcZero.extractBits(NumElts, Idx); // Attempt to avoid multi-use ops if we don't need anything from them. - if (!DemandedElts.isAllOnesValue()) { + if (!DemandedElts.isAllOnes()) { SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts( Src, DemandedSrcElts, TLO.DAG, Depth + 1); if (NewSrc) { @@ -2810,6 +2809,25 @@ bool TargetLowering::SimplifyDemandedVectorElts( if (DemandedElts.isSubsetOf(KnownUndef)) return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT)); KnownUndef.clearAllBits(); + + // zext - if we just need the bottom element then we can mask: + // zext(and(x,c)) -> and(x,c') iff the zext is the only user of the and. + if (DemandedSrcElts == 1 && TLO.DAG.getDataLayout().isLittleEndian() && + Src.getOpcode() == ISD::AND && Op->isOnlyUserOf(Src.getNode()) && + Op.getValueSizeInBits() == Src.getValueSizeInBits()) { + SDLoc DL(Op); + EVT SrcVT = Src.getValueType(); + EVT SrcSVT = SrcVT.getScalarType(); + SmallVector<SDValue> MaskElts; + MaskElts.push_back(TLO.DAG.getAllOnesConstant(DL, SrcSVT)); + MaskElts.append(NumSrcElts - 1, TLO.DAG.getConstant(0, DL, SrcSVT)); + SDValue Mask = TLO.DAG.getBuildVector(SrcVT, DL, MaskElts); + if (SDValue Fold = TLO.DAG.FoldConstantArithmetic( + ISD::AND, DL, SrcVT, {Src.getOperand(1), Mask})) { + Fold = TLO.DAG.getNode(ISD::AND, DL, SrcVT, Src.getOperand(0), Fold); + return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Fold)); + } + } } break; } @@ -2842,7 +2860,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( // Attempt to avoid multi-use ops if we don't need anything from them. // TODO - use KnownUndef to relax the demandedelts? - if (!DemandedElts.isAllOnesValue()) + if (!DemandedElts.isAllOnes()) if (SimplifyDemandedVectorEltsBinOp(Op0, Op1)) return true; break; @@ -2869,7 +2887,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( // Attempt to avoid multi-use ops if we don't need anything from them. // TODO - use KnownUndef to relax the demandedelts? - if (!DemandedElts.isAllOnesValue()) + if (!DemandedElts.isAllOnes()) if (SimplifyDemandedVectorEltsBinOp(Op0, Op1)) return true; break; @@ -2897,7 +2915,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( // Attempt to avoid multi-use ops if we don't need anything from them. // TODO - use KnownUndef to relax the demandedelts? - if (!DemandedElts.isAllOnesValue()) + if (!DemandedElts.isAllOnes()) if (SimplifyDemandedVectorEltsBinOp(Op0, Op1)) return true; break; @@ -2923,7 +2941,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( return true; } else { KnownBits Known; - APInt DemandedBits = APInt::getAllOnesValue(EltSizeInBits); + APInt DemandedBits = APInt::getAllOnes(EltSizeInBits); if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known, TLO, Depth, AssumeSingleUse)) return true; @@ -3111,9 +3129,9 @@ bool TargetLowering::isConstTrueVal(const SDNode *N) const { case UndefinedBooleanContent: return CVal[0]; case ZeroOrOneBooleanContent: - return CVal.isOneValue(); + return CVal.isOne(); case ZeroOrNegativeOneBooleanContent: - return CVal.isAllOnesValue(); + return CVal.isAllOnes(); } llvm_unreachable("Invalid boolean contents"); @@ -3140,7 +3158,7 @@ bool TargetLowering::isConstFalseVal(const SDNode *N) const { if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent) return !CN->getAPIntValue()[0]; - return CN->isNullValue(); + return CN->isZero(); } bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT, @@ -3156,7 +3174,7 @@ bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT, return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1)); case TargetLowering::UndefinedBooleanContent: case TargetLowering::ZeroOrNegativeOneBooleanContent: - return N->isAllOnesValue() && SExt; + return N->isAllOnes() && SExt; } llvm_unreachable("Unexpected enumeration."); } @@ -3210,7 +3228,7 @@ SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, // Bail out if the compare operand that we want to turn into a zero is // already a zero (otherwise, infinite loop). auto *YConst = dyn_cast<ConstantSDNode>(Y); - if (YConst && YConst->isNullValue()) + if (YConst && YConst->isZero()) return SDValue(); // Transform this into: ~X & Y == 0. @@ -3325,7 +3343,7 @@ SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift( EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond, DAGCombinerInfo &DCI, const SDLoc &DL) const { assert(isConstOrConstSplat(N1C) && - isConstOrConstSplat(N1C)->getAPIntValue().isNullValue() && + isConstOrConstSplat(N1C)->getAPIntValue().isZero() && "Should be a comparison with 0."); assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Valid only for [in]equality comparisons."); @@ -3548,7 +3566,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an // equality comparison, then we're just comparing whether X itself is // zero. - if (N0.getOpcode() == ISD::SRL && (C1.isNullValue() || C1.isOneValue()) && + if (N0.getOpcode() == ISD::SRL && (C1.isZero() || C1.isOne()) && N0.getOperand(0).getOpcode() == ISD::CTLZ && isPowerOf2_32(N0.getScalarValueSizeInBits())) { if (ConstantSDNode *ShAmt = isConstOrConstSplat(N0.getOperand(1))) { @@ -3648,8 +3666,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, (isConstFalseVal(N1C) || isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) { - bool Inverse = (N1C->isNullValue() && Cond == ISD::SETEQ) || - (!N1C->isNullValue() && Cond == ISD::SETNE); + bool Inverse = (N1C->isZero() && Cond == ISD::SETEQ) || + (!N1C->isZero() && Cond == ISD::SETNE); if (!Inverse) return TopSetCC; @@ -3800,8 +3818,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // Otherwise, make this a use of a zext. return DAG.getSetCC(dl, VT, ZextOp, DAG.getConstant(C1 & Imm, dl, ExtDstTy), Cond); - } else if ((N1C->isNullValue() || N1C->isOne()) && - (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { + } else if ((N1C->isZero() || N1C->isOne()) && + (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC if (N0.getOpcode() == ISD::SETCC && isTypeLegal(VT) && VT.bitsLE(N0.getValueType()) && @@ -3894,7 +3912,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // icmp eq/ne (urem %x, %y), 0 // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem': // icmp eq/ne %x, 0 - if (N0.getOpcode() == ISD::UREM && N1C->isNullValue() && + if (N0.getOpcode() == ISD::UREM && N1C->isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0)); KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1)); @@ -3902,6 +3920,17 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond); } + // Fold set_cc seteq (ashr X, BW-1), -1 -> set_cc setlt X, 0 + // and set_cc setne (ashr X, BW-1), -1 -> set_cc setge X, 0 + if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + N0.getOpcode() == ISD::SRA && isa<ConstantSDNode>(N0.getOperand(1)) && + N0.getConstantOperandAPInt(1) == OpVT.getScalarSizeInBits() - 1 && + N1C && N1C->isAllOnes()) { + return DAG.getSetCC(dl, VT, N0.getOperand(0), + DAG.getConstant(0, dl, OpVT), + Cond == ISD::SETEQ ? ISD::SETLT : ISD::SETGE); + } + if (SDValue V = optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl)) return V; @@ -4001,7 +4030,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (Cond == ISD::SETEQ || Cond == ISD::SETNE) { // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0 - if (C1.isNullValue()) + if (C1.isZero()) if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift( VT, N0, N1, Cond, DCI, dl)) return CC; @@ -4010,8 +4039,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // For example, when high 32-bits of i64 X are known clear: // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0 // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1 - bool CmpZero = N1C->getAPIntValue().isNullValue(); - bool CmpNegOne = N1C->getAPIntValue().isAllOnesValue(); + bool CmpZero = N1C->getAPIntValue().isZero(); + bool CmpNegOne = N1C->getAPIntValue().isAllOnes(); if ((CmpZero || CmpNegOne) && N0.hasOneUse()) { // Match or(lo,shl(hi,bw/2)) pattern. auto IsConcat = [&](SDValue V, SDValue &Lo, SDValue &Hi) { @@ -4140,7 +4169,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, N0.getOpcode() == ISD::AND && N0.hasOneUse()) { if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { const APInt &AndRHSC = AndRHS->getAPIntValue(); - if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) { + if (AndRHSC.isNegatedPowerOf2() && (AndRHSC & C1) == C1) { unsigned ShiftBits = AndRHSC.countTrailingZeros(); if (!TLI.shouldAvoidTransformToShift(ShValTy, ShiftBits)) { SDValue Shift = @@ -4336,7 +4365,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // When division is cheap or optimizing for minimum size, // fall through to DIVREM creation by skipping this fold. - if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize)) { + if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) { if (N0.getOpcode() == ISD::UREM) { if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl)) return Folded; @@ -4687,7 +4716,8 @@ TargetLowering::ParseConstraints(const DataLayout &DL, getSimpleValueType(DL, STy->getElementType(ResNo)); } else { assert(ResNo == 0 && "Asm only has one result!"); - OpInfo.ConstraintVT = getSimpleValueType(DL, Call.getType()); + OpInfo.ConstraintVT = + getAsmOperandValueType(DL, Call.getType()).getSimpleVT(); } ++ResNo; break; @@ -5049,7 +5079,7 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, SmallVector<SDValue, 16> Shifts, Factors; auto BuildSDIVPattern = [&](ConstantSDNode *C) { - if (C->isNullValue()) + if (C->isZero()) return false; APInt Divisor = C->getAPIntValue(); unsigned Shift = Divisor.countTrailingZeros(); @@ -5151,31 +5181,31 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks; auto BuildSDIVPattern = [&](ConstantSDNode *C) { - if (C->isNullValue()) + if (C->isZero()) return false; const APInt &Divisor = C->getAPIntValue(); - APInt::ms magics = Divisor.magic(); + SignedDivisionByConstantInfo magics = SignedDivisionByConstantInfo::get(Divisor); int NumeratorFactor = 0; int ShiftMask = -1; - if (Divisor.isOneValue() || Divisor.isAllOnesValue()) { + if (Divisor.isOne() || Divisor.isAllOnes()) { // If d is +1/-1, we just multiply the numerator by +1/-1. NumeratorFactor = Divisor.getSExtValue(); - magics.m = 0; - magics.s = 0; + magics.Magic = 0; + magics.ShiftAmount = 0; ShiftMask = 0; - } else if (Divisor.isStrictlyPositive() && magics.m.isNegative()) { + } else if (Divisor.isStrictlyPositive() && magics.Magic.isNegative()) { // If d > 0 and m < 0, add the numerator. NumeratorFactor = 1; - } else if (Divisor.isNegative() && magics.m.isStrictlyPositive()) { + } else if (Divisor.isNegative() && magics.Magic.isStrictlyPositive()) { // If d < 0 and m > 0, subtract the numerator. NumeratorFactor = -1; } - MagicFactors.push_back(DAG.getConstant(magics.m, dl, SVT)); + MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT)); Factors.push_back(DAG.getConstant(NumeratorFactor, dl, SVT)); - Shifts.push_back(DAG.getConstant(magics.s, dl, ShSVT)); + Shifts.push_back(DAG.getConstant(magics.ShiftAmount, dl, ShSVT)); ShiftMasks.push_back(DAG.getConstant(ShiftMask, dl, SVT)); return true; }; @@ -5296,33 +5326,33 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors; auto BuildUDIVPattern = [&](ConstantSDNode *C) { - if (C->isNullValue()) + if (C->isZero()) return false; // FIXME: We should use a narrower constant when the upper // bits are known to be zero. const APInt& Divisor = C->getAPIntValue(); - APInt::mu magics = Divisor.magicu(); + UnsignedDivisonByConstantInfo magics = UnsignedDivisonByConstantInfo::get(Divisor); unsigned PreShift = 0, PostShift = 0; // If the divisor is even, we can avoid using the expensive fixup by // shifting the divided value upfront. - if (magics.a != 0 && !Divisor[0]) { + if (magics.IsAdd != 0 && !Divisor[0]) { PreShift = Divisor.countTrailingZeros(); // Get magic number for the shifted divisor. - magics = Divisor.lshr(PreShift).magicu(PreShift); - assert(magics.a == 0 && "Should use cheap fixup now"); + magics = UnsignedDivisonByConstantInfo::get(Divisor.lshr(PreShift), PreShift); + assert(magics.IsAdd == 0 && "Should use cheap fixup now"); } - APInt Magic = magics.m; + APInt Magic = magics.Magic; unsigned SelNPQ; - if (magics.a == 0 || Divisor.isOneValue()) { - assert(magics.s < Divisor.getBitWidth() && + if (magics.IsAdd == 0 || Divisor.isOne()) { + assert(magics.ShiftAmount < Divisor.getBitWidth() && "We shouldn't generate an undefined shift!"); - PostShift = magics.s; + PostShift = magics.ShiftAmount; SelNPQ = false; } else { - PostShift = magics.s - 1; + PostShift = magics.ShiftAmount - 1; SelNPQ = true; } @@ -5330,7 +5360,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, MagicFactors.push_back(DAG.getConstant(Magic, dl, SVT)); NPQFactors.push_back( DAG.getConstant(SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1) - : APInt::getNullValue(EltBits), + : APInt::getZero(EltBits), dl, SVT)); PostShifts.push_back(DAG.getConstant(PostShift, dl, ShSVT)); UseNPQ |= SelNPQ; @@ -5510,13 +5540,13 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, auto BuildUREMPattern = [&](ConstantSDNode *CDiv, ConstantSDNode *CCmp) { // Division by 0 is UB. Leave it to be constant-folded elsewhere. - if (CDiv->isNullValue()) + if (CDiv->isZero()) return false; const APInt &D = CDiv->getAPIntValue(); const APInt &Cmp = CCmp->getAPIntValue(); - ComparingWithAllZeros &= Cmp.isNullValue(); + ComparingWithAllZeros &= Cmp.isZero(); // x u% C1` is *always* less than C1. So given `x u% C1 == C2`, // if C2 is not less than C1, the comparison is always false. @@ -5528,26 +5558,26 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, // If all lanes are tautological (either all divisors are ones, or divisor // is not greater than the constant we are comparing with), // we will prefer to avoid the fold. - bool TautologicalLane = D.isOneValue() || TautologicalInvertedLane; + bool TautologicalLane = D.isOne() || TautologicalInvertedLane; HadTautologicalLanes |= TautologicalLane; AllLanesAreTautological &= TautologicalLane; // If we are comparing with non-zero, we need'll need to subtract said // comparison value from the LHS. But there is no point in doing that if // every lane where we are comparing with non-zero is tautological.. - if (!Cmp.isNullValue()) + if (!Cmp.isZero()) AllComparisonsWithNonZerosAreTautological &= TautologicalLane; // Decompose D into D0 * 2^K unsigned K = D.countTrailingZeros(); - assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate."); + assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate."); APInt D0 = D.lshr(K); // D is even if it has trailing zeros. HadEvenDivisor |= (K != 0); // D is a power-of-two if D0 is one. // If all divisors are power-of-two, we will prefer to avoid the fold. - AllDivisorsArePowerOfTwo &= D0.isOneValue(); + AllDivisorsArePowerOfTwo &= D0.isOne(); // P = inv(D0, 2^W) // 2^W requires W + 1 bits, so we have to extend and then truncate. @@ -5555,20 +5585,20 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, APInt P = D0.zext(W + 1) .multiplicativeInverse(APInt::getSignedMinValue(W + 1)) .trunc(W); - assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable - assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check."); + assert(!P.isZero() && "No multiplicative inverse!"); // unreachable + assert((D0 * P).isOne() && "Multiplicative inverse sanity check."); // Q = floor((2^W - 1) u/ D) // R = ((2^W - 1) u% D) APInt Q, R; - APInt::udivrem(APInt::getAllOnesValue(W), D, Q, R); + APInt::udivrem(APInt::getAllOnes(W), D, Q, R); // If we are comparing with zero, then that comparison constant is okay, // else it may need to be one less than that. if (Cmp.ugt(R)) Q -= 1; - assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) && + assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) && "We are expecting that K is always less than all-ones for ShSVT"); // If the lane is tautological the result can be constant-folded. @@ -5751,7 +5781,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, // TODO: Could support comparing with non-zero too. ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode); - if (!CompTarget || !CompTarget->isNullValue()) + if (!CompTarget || !CompTarget->isZero()) return SDValue(); bool HadIntMinDivisor = false; @@ -5764,7 +5794,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, auto BuildSREMPattern = [&](ConstantSDNode *C) { // Division by 0 is UB. Leave it to be constant-folded elsewhere. - if (C->isNullValue()) + if (C->isZero()) return false; // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine. @@ -5777,12 +5807,12 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, HadIntMinDivisor |= D.isMinSignedValue(); // If all divisors are ones, we will prefer to avoid the fold. - HadOneDivisor |= D.isOneValue(); - AllDivisorsAreOnes &= D.isOneValue(); + HadOneDivisor |= D.isOne(); + AllDivisorsAreOnes &= D.isOne(); // Decompose D into D0 * 2^K unsigned K = D.countTrailingZeros(); - assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate."); + assert((!D.isOne() || (K == 0)) && "For divisor '1' we won't rotate."); APInt D0 = D.lshr(K); if (!D.isMinSignedValue()) { @@ -5793,7 +5823,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, // D is a power-of-two if D0 is one. This includes INT_MIN. // If all divisors are power-of-two, we will prefer to avoid the fold. - AllDivisorsArePowerOfTwo &= D0.isOneValue(); + AllDivisorsArePowerOfTwo &= D0.isOne(); // P = inv(D0, 2^W) // 2^W requires W + 1 bits, so we have to extend and then truncate. @@ -5801,8 +5831,8 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, APInt P = D0.zext(W + 1) .multiplicativeInverse(APInt::getSignedMinValue(W + 1)) .trunc(W); - assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable - assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check."); + assert(!P.isZero() && "No multiplicative inverse!"); // unreachable + assert((D0 * P).isOne() && "Multiplicative inverse sanity check."); // A = floor((2^(W - 1) - 1) / D0) & -2^K APInt A = APInt::getSignedMaxValue(W).udiv(D0); @@ -5817,14 +5847,14 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, // Q = floor((2 * A) / (2^K)) APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K)); - assert(APInt::getAllOnesValue(SVT.getSizeInBits()).ugt(A) && + assert(APInt::getAllOnes(SVT.getSizeInBits()).ugt(A) && "We are expecting that A is always less than all-ones for SVT"); - assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) && + assert(APInt::getAllOnes(ShSVT.getSizeInBits()).ugt(K) && "We are expecting that K is always less than all-ones for ShSVT"); // If the divisor is 1 the result can be constant-folded. Likewise, we // don't care about INT_MIN lanes, those can be set to undef if appropriate. - if (D.isOneValue()) { + if (D.isOne()) { // Set P, A and K to a bogus values so we can try to splat them. P = 0; A = -1; @@ -5950,7 +5980,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue IntMax = DAG.getConstant( APInt::getSignedMaxValue(SVT.getScalarSizeInBits()), DL, VT); SDValue Zero = - DAG.getConstant(APInt::getNullValue(SVT.getScalarSizeInBits()), DL, VT); + DAG.getConstant(APInt::getZero(SVT.getScalarSizeInBits()), DL, VT); // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded. SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ); @@ -6776,7 +6806,7 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result, // the destination signmask can't be represented by the float, so we can // just use FP_TO_SINT directly. const fltSemantics &APFSem = DAG.EVTToAPFloatSemantics(SrcVT); - APFloat APF(APFSem, APInt::getNullValue(SrcVT.getScalarSizeInBits())); + APFloat APF(APFSem, APInt::getZero(SrcVT.getScalarSizeInBits())); APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits()); if (APFloat::opOverflow & APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) { @@ -6969,8 +6999,18 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node, return SDValue(); } -bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result, - SelectionDAG &DAG) const { +// Only expand vector types if we have the appropriate vector bit operations. +static bool canExpandVectorCTPOP(const TargetLowering &TLI, EVT VT) { + assert(VT.isVector() && "Expected vector type"); + unsigned Len = VT.getScalarSizeInBits(); + return TLI.isOperationLegalOrCustom(ISD::ADD, VT) && + TLI.isOperationLegalOrCustom(ISD::SUB, VT) && + TLI.isOperationLegalOrCustom(ISD::SRL, VT) && + (Len == 8 || TLI.isOperationLegalOrCustom(ISD::MUL, VT)) && + TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT); +} + +SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const { SDLoc dl(Node); EVT VT = Node->getValueType(0); EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); @@ -6980,15 +7020,11 @@ bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result, // TODO: Add support for irregular type lengths. if (!(Len <= 128 && Len % 8 == 0)) - return false; + return SDValue(); // Only expand vector types if we have the appropriate vector bit operations. - if (VT.isVector() && (!isOperationLegalOrCustom(ISD::ADD, VT) || - !isOperationLegalOrCustom(ISD::SUB, VT) || - !isOperationLegalOrCustom(ISD::SRL, VT) || - (Len != 8 && !isOperationLegalOrCustom(ISD::MUL, VT)) || - !isOperationLegalOrCustomOrPromote(ISD::AND, VT))) - return false; + if (VT.isVector() && !canExpandVectorCTPOP(*this, VT)) + return SDValue(); // This is the "best" algorithm from // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel @@ -7025,12 +7061,10 @@ bool TargetLowering::expandCTPOP(SDNode *Node, SDValue &Result, DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::MUL, dl, VT, Op, Mask01), DAG.getConstant(Len - 8, dl, ShVT)); - Result = Op; - return true; + return Op; } -bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result, - SelectionDAG &DAG) const { +SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const { SDLoc dl(Node); EVT VT = Node->getValueType(0); EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); @@ -7039,10 +7073,8 @@ bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result, // If the non-ZERO_UNDEF version is supported we can use that instead. if (Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF && - isOperationLegalOrCustom(ISD::CTLZ, VT)) { - Result = DAG.getNode(ISD::CTLZ, dl, VT, Op); - return true; - } + isOperationLegalOrCustom(ISD::CTLZ, VT)) + return DAG.getNode(ISD::CTLZ, dl, VT, Op); // If the ZERO_UNDEF version is supported use that and handle the zero case. if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) { @@ -7051,17 +7083,18 @@ bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result, SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op); SDValue Zero = DAG.getConstant(0, dl, VT); SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ); - Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero, + return DAG.getSelect(dl, VT, SrcIsZero, DAG.getConstant(NumBitsPerElt, dl, VT), CTLZ); - return true; } // Only expand vector types if we have the appropriate vector bit operations. + // This includes the operations needed to expand CTPOP if it isn't supported. if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) || - !isOperationLegalOrCustom(ISD::CTPOP, VT) || + (!isOperationLegalOrCustom(ISD::CTPOP, VT) && + !canExpandVectorCTPOP(*this, VT)) || !isOperationLegalOrCustom(ISD::SRL, VT) || !isOperationLegalOrCustomOrPromote(ISD::OR, VT))) - return false; + return SDValue(); // for now, we do this: // x = x | (x >> 1); @@ -7078,12 +7111,10 @@ bool TargetLowering::expandCTLZ(SDNode *Node, SDValue &Result, DAG.getNode(ISD::SRL, dl, VT, Op, Tmp)); } Op = DAG.getNOT(dl, Op, VT); - Result = DAG.getNode(ISD::CTPOP, dl, VT, Op); - return true; + return DAG.getNode(ISD::CTPOP, dl, VT, Op); } -bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result, - SelectionDAG &DAG) const { +SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const { SDLoc dl(Node); EVT VT = Node->getValueType(0); SDValue Op = Node->getOperand(0); @@ -7091,10 +7122,8 @@ bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result, // If the non-ZERO_UNDEF version is supported we can use that instead. if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF && - isOperationLegalOrCustom(ISD::CTTZ, VT)) { - Result = DAG.getNode(ISD::CTTZ, dl, VT, Op); - return true; - } + isOperationLegalOrCustom(ISD::CTTZ, VT)) + return DAG.getNode(ISD::CTTZ, dl, VT, Op); // If the ZERO_UNDEF version is supported use that and handle the zero case. if (isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF, VT)) { @@ -7103,19 +7132,20 @@ bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result, SDValue CTTZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, VT, Op); SDValue Zero = DAG.getConstant(0, dl, VT); SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ); - Result = DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero, + return DAG.getSelect(dl, VT, SrcIsZero, DAG.getConstant(NumBitsPerElt, dl, VT), CTTZ); - return true; } // Only expand vector types if we have the appropriate vector bit operations. + // This includes the operations needed to expand CTPOP if it isn't supported. if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) || (!isOperationLegalOrCustom(ISD::CTPOP, VT) && - !isOperationLegalOrCustom(ISD::CTLZ, VT)) || + !isOperationLegalOrCustom(ISD::CTLZ, VT) && + !canExpandVectorCTPOP(*this, VT)) || !isOperationLegalOrCustom(ISD::SUB, VT) || !isOperationLegalOrCustomOrPromote(ISD::AND, VT) || !isOperationLegalOrCustomOrPromote(ISD::XOR, VT))) - return false; + return SDValue(); // for now, we use: { return popcount(~x & (x - 1)); } // unless the target has ctlz but not ctpop, in which case we use: @@ -7127,18 +7157,15 @@ bool TargetLowering::expandCTTZ(SDNode *Node, SDValue &Result, // If ISD::CTLZ is legal and CTPOP isn't, then do that instead. if (isOperationLegal(ISD::CTLZ, VT) && !isOperationLegal(ISD::CTPOP, VT)) { - Result = - DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT), - DAG.getNode(ISD::CTLZ, dl, VT, Tmp)); - return true; + return DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(NumBitsPerElt, dl, VT), + DAG.getNode(ISD::CTLZ, dl, VT, Tmp)); } - Result = DAG.getNode(ISD::CTPOP, dl, VT, Tmp); - return true; + return DAG.getNode(ISD::CTPOP, dl, VT, Tmp); } -bool TargetLowering::expandABS(SDNode *N, SDValue &Result, - SelectionDAG &DAG, bool IsNegative) const { +SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG, + bool IsNegative) const { SDLoc dl(N); EVT VT = N->getValueType(0); EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); @@ -7148,27 +7175,24 @@ bool TargetLowering::expandABS(SDNode *N, SDValue &Result, if (!IsNegative && isOperationLegal(ISD::SUB, VT) && isOperationLegal(ISD::SMAX, VT)) { SDValue Zero = DAG.getConstant(0, dl, VT); - Result = DAG.getNode(ISD::SMAX, dl, VT, Op, - DAG.getNode(ISD::SUB, dl, VT, Zero, Op)); - return true; + return DAG.getNode(ISD::SMAX, dl, VT, Op, + DAG.getNode(ISD::SUB, dl, VT, Zero, Op)); } // abs(x) -> umin(x,sub(0,x)) if (!IsNegative && isOperationLegal(ISD::SUB, VT) && isOperationLegal(ISD::UMIN, VT)) { SDValue Zero = DAG.getConstant(0, dl, VT); - Result = DAG.getNode(ISD::UMIN, dl, VT, Op, - DAG.getNode(ISD::SUB, dl, VT, Zero, Op)); - return true; + return DAG.getNode(ISD::UMIN, dl, VT, Op, + DAG.getNode(ISD::SUB, dl, VT, Zero, Op)); } // 0 - abs(x) -> smin(x, sub(0,x)) if (IsNegative && isOperationLegal(ISD::SUB, VT) && isOperationLegal(ISD::SMIN, VT)) { SDValue Zero = DAG.getConstant(0, dl, VT); - Result = DAG.getNode(ISD::SMIN, dl, VT, Op, - DAG.getNode(ISD::SUB, dl, VT, Zero, Op)); - return true; + return DAG.getNode(ISD::SMIN, dl, VT, Op, + DAG.getNode(ISD::SUB, dl, VT, Zero, Op)); } // Only expand vector types if we have the appropriate vector operations. @@ -7177,20 +7201,19 @@ bool TargetLowering::expandABS(SDNode *N, SDValue &Result, (!IsNegative && !isOperationLegalOrCustom(ISD::ADD, VT)) || (IsNegative && !isOperationLegalOrCustom(ISD::SUB, VT)) || !isOperationLegalOrCustomOrPromote(ISD::XOR, VT))) - return false; + return SDValue(); SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, Op, DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT)); if (!IsNegative) { SDValue Add = DAG.getNode(ISD::ADD, dl, VT, Op, Shift); - Result = DAG.getNode(ISD::XOR, dl, VT, Add, Shift); - } else { - // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y)) - SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift); - Result = DAG.getNode(ISD::SUB, dl, VT, Shift, Xor); + return DAG.getNode(ISD::XOR, dl, VT, Add, Shift); } - return true; + + // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y)) + SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift); + return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor); } SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const { @@ -7265,34 +7288,31 @@ SDValue TargetLowering::expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const { // TODO: We can easily support i4/i2 legal types if any target ever does. if (Sz >= 8 && isPowerOf2_32(Sz)) { // Create the masks - repeating the pattern every byte. - APInt MaskHi4 = APInt::getSplat(Sz, APInt(8, 0xF0)); - APInt MaskHi2 = APInt::getSplat(Sz, APInt(8, 0xCC)); - APInt MaskHi1 = APInt::getSplat(Sz, APInt(8, 0xAA)); - APInt MaskLo4 = APInt::getSplat(Sz, APInt(8, 0x0F)); - APInt MaskLo2 = APInt::getSplat(Sz, APInt(8, 0x33)); - APInt MaskLo1 = APInt::getSplat(Sz, APInt(8, 0x55)); + APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F)); + APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33)); + APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55)); // BSWAP if the type is wider than a single byte. Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op); - // swap i4: ((V & 0xF0) >> 4) | ((V & 0x0F) << 4) - Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi4, dl, VT)); - Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo4, dl, VT)); - Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(4, dl, SHVT)); + // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4) + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT)); + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask4, dl, VT)); + Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT)); Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT)); Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); - // swap i2: ((V & 0xCC) >> 2) | ((V & 0x33) << 2) - Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi2, dl, VT)); - Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo2, dl, VT)); - Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(2, dl, SHVT)); + // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2) + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT)); + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask2, dl, VT)); + Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT)); Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT)); Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); - // swap i1: ((V & 0xAA) >> 1) | ((V & 0x55) << 1) - Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi1, dl, VT)); - Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo1, dl, VT)); - Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(1, dl, SHVT)); + // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1) + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT)); + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Mask1, dl, VT)); + Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT)); Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT)); Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); return Tmp; @@ -7802,13 +7822,15 @@ TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask, static SDValue clampDynamicVectorIndex(SelectionDAG &DAG, SDValue Idx, EVT VecVT, const SDLoc &dl, - unsigned NumSubElts) { - if (!VecVT.isScalableVector() && isa<ConstantSDNode>(Idx)) - return Idx; + ElementCount SubEC) { + assert(!(SubEC.isScalable() && VecVT.isFixedLengthVector()) && + "Cannot index a scalable vector within a fixed-width vector"); - EVT IdxVT = Idx.getValueType(); unsigned NElts = VecVT.getVectorMinNumElements(); - if (VecVT.isScalableVector()) { + unsigned NumSubElts = SubEC.getKnownMinValue(); + EVT IdxVT = Idx.getValueType(); + + if (VecVT.isScalableVector() && !SubEC.isScalable()) { // If this is a constant index and we know the value plus the number of the // elements in the subvector minus one is less than the minimum number of // elements then it's safe to return Idx. @@ -7855,16 +7877,16 @@ SDValue TargetLowering::getVectorSubVecPointer(SelectionDAG &DAG, unsigned EltSize = EltVT.getFixedSizeInBits() / 8; // FIXME: should be ABI size. assert(EltSize * 8 == EltVT.getFixedSizeInBits() && "Converting bits to bytes lost precision"); - - // Scalable vectors don't need clamping as these are checked at compile time - if (SubVecVT.isFixedLengthVector()) { - assert(SubVecVT.getVectorElementType() == EltVT && - "Sub-vector must be a fixed vector with matching element type"); - Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl, - SubVecVT.getVectorNumElements()); - } + assert(SubVecVT.getVectorElementType() == EltVT && + "Sub-vector must be a vector with matching element type"); + Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl, + SubVecVT.getVectorElementCount()); EVT IdxVT = Index.getValueType(); + if (SubVecVT.isScalableVector()) + Index = + DAG.getNode(ISD::MUL, dl, IdxVT, Index, + DAG.getVScale(dl, IdxVT, APInt(IdxVT.getSizeInBits(), 1))); Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index, DAG.getConstant(EltSize, dl, IdxVT)); @@ -7920,7 +7942,7 @@ SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op, ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); SDLoc dl(Op); if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { - if (C->isNullValue() && CC == ISD::SETEQ) { + if (C->isZero() && CC == ISD::SETEQ) { EVT VT = Op.getOperand(0).getValueType(); SDValue Zext = Op.getOperand(0); if (VT.bitsLT(MVT::i32)) { @@ -7948,10 +7970,8 @@ TargetLowering::getCanonicalIndexType(ISD::MemIndexType IndexType, EVT MemVT, (IndexType == ISD::SIGNED_SCALED) || (IndexType == ISD::SIGNED_UNSCALED); // Scaling is unimportant for bytes, canonicalize to unscaled. - if (IsScaledIndex && MemVT.getScalarType() == MVT::i8) { - IsScaledIndex = false; - IndexType = IsSignedIndex ? ISD::SIGNED_UNSCALED : ISD::UNSIGNED_UNSCALED; - } + if (IsScaledIndex && MemVT.getScalarType() == MVT::i8) + return IsSignedIndex ? ISD::SIGNED_UNSCALED : ISD::UNSIGNED_UNSCALED; return IndexType; } @@ -8072,14 +8092,12 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const { return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff); } - // SatMax -> Overflow && SumDiff < 0 - // SatMin -> Overflow && SumDiff >= 0 + // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff APInt MinVal = APInt::getSignedMinValue(BitWidth); - APInt MaxVal = APInt::getSignedMaxValue(BitWidth); SDValue SatMin = DAG.getConstant(MinVal, dl, VT); - SDValue SatMax = DAG.getConstant(MaxVal, dl, VT); - SDValue SumNeg = DAG.getSetCC(dl, BoolVT, SumDiff, Zero, ISD::SETLT); - Result = DAG.getSelect(dl, VT, SumNeg, SatMax, SatMin); + SDValue Shift = DAG.getNode(ISD::SRA, dl, VT, SumDiff, + DAG.getConstant(BitWidth - 1, dl, VT)); + Result = DAG.getNode(ISD::XOR, dl, VT, Shift, SatMin); return DAG.getSelect(dl, VT, Overflow, Result, SumDiff); } @@ -8154,8 +8172,11 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const { APInt MaxVal = APInt::getSignedMaxValue(VTSize); SDValue SatMin = DAG.getConstant(MinVal, dl, VT); SDValue SatMax = DAG.getConstant(MaxVal, dl, VT); - SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT); - Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin); + // Xor the inputs, if resulting sign bit is 0 the product will be + // positive, else negative. + SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS); + SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Xor, Zero, ISD::SETLT); + Result = DAG.getSelect(dl, VT, ProdNeg, SatMin, SatMax); return DAG.getSelect(dl, VT, Overflow, Result, Product); } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) { SDValue Result = @@ -8390,7 +8411,7 @@ void TargetLowering::expandSADDSUBO( // If SADDSAT/SSUBSAT is legal, compare results to detect overflow. unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT; - if (isOperationLegalOrCustom(OpcSat, LHS.getValueType())) { + if (isOperationLegal(OpcSat, LHS.getValueType())) { SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS); SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE); Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType); @@ -8443,8 +8464,8 @@ bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result, EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2); if (VT.isVector()) - WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT, - VT.getVectorNumElements()); + WideVT = + EVT::getVectorVT(*DAG.getContext(), WideVT, VT.getVectorElementCount()); SDValue BottomHalf; SDValue TopHalf; |
