diff options
Diffstat (limited to 'llvm/lib/CodeGen/SelectionDAG')
24 files changed, 5946 insertions, 2409 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 654879115ff9..0a3ebd73d272 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -20,8 +20,6 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IntervalMap.h" -#include "llvm/ADT/None.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallBitVector.h" @@ -69,9 +67,11 @@ #include <cstdint> #include <functional> #include <iterator> +#include <optional> #include <string> #include <tuple> #include <utility> +#include <variant> using namespace llvm; @@ -135,6 +135,11 @@ static cl::opt<bool> EnableShrinkLoadReplaceStoreWithStore( cl::desc("DAG combiner enable load/<replace bytes>/store with " "a narrower store")); +static cl::opt<bool> EnableVectorFCopySignExtendRound( + "combiner-vector-fcopysign-extend-round", cl::Hidden, cl::init(false), + cl::desc( + "Enable merging extends and rounds into FCOPYSIGN on vector types")); + namespace { class DAGCombiner { @@ -246,8 +251,8 @@ namespace { for (MVT VT : MVT::all_valuetypes()) if (EVT(VT).isSimple() && VT != MVT::Other && TLI.isTypeLegal(EVT(VT)) && - VT.getSizeInBits().getKnownMinSize() >= MaximumLegalStoreInBits) - MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinSize(); + VT.getSizeInBits().getKnownMinValue() >= MaximumLegalStoreInBits) + MaximumLegalStoreInBits = VT.getSizeInBits().getKnownMinValue(); } void ConsiderForPruning(SDNode *N) { @@ -382,6 +387,10 @@ namespace { SDValue PromoteExtend(SDValue Op); bool PromoteLoad(SDValue Op); + SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, + SDValue RHS, SDValue True, SDValue False, + ISD::CondCode CC); + /// Call the node-specific routine that knows how to fold each /// particular type of node. If that doesn't do anything, try the /// target-specific DAG combines. @@ -434,6 +443,7 @@ namespace { SDValue visitOR(SDNode *N); SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N); SDValue visitXOR(SDNode *N); + SDValue SimplifyVCastOp(SDNode *N, const SDLoc &DL); SDValue SimplifyVBinOp(SDNode *N, const SDLoc &DL); SDValue visitSHL(SDNode *N); SDValue visitSRA(SDNode *N); @@ -494,6 +504,8 @@ namespace { SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain); SDValue replaceStoreOfFPConstant(StoreSDNode *ST); + bool refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(SDNode *N); + SDValue visitSTORE(SDNode *N); SDValue visitLIFETIME_END(SDNode *N); SDValue visitINSERT_VECTOR_ELT(SDNode *N); @@ -508,6 +520,8 @@ namespace { SDValue visitMSTORE(SDNode *N); SDValue visitMGATHER(SDNode *N); SDValue visitMSCATTER(SDNode *N); + SDValue visitVPGATHER(SDNode *N); + SDValue visitVPSCATTER(SDNode *N); SDValue visitFP_TO_FP16(SDNode *N); SDValue visitFP16_TO_FP(SDNode *N); SDValue visitFP_TO_BF16(SDNode *N); @@ -551,6 +565,7 @@ namespace { SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1, const SDLoc &DL); SDValue foldSubToUSubSat(EVT DstVT, SDNode *N); + SDValue foldABSToABD(SDNode *N); SDValue unfoldMaskedMerge(SDNode *N); SDValue unfoldExtremeBitClearingToShifts(SDNode *N); SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, @@ -567,6 +582,7 @@ namespace { SDValue CombineExtLoad(SDNode *N); SDValue CombineZExtLogicopShiftLoad(SDNode *N); SDValue combineRepeatedFPDivisors(SDNode *N); + SDValue mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex); SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex); SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); SDValue BuildSDIV(SDNode *N); @@ -602,6 +618,7 @@ namespace { SDValue splitMergedValStore(StoreSDNode *ST); SDValue TransformFPLoadStorePair(SDNode *N); SDValue convertBuildVecZextToZext(SDNode *N); + SDValue convertBuildVecZextToBuildVecWithZeros(SDNode *N); SDValue reduceBuildVecExtToExtBuildVec(SDNode *N); SDValue reduceBuildVecTruncToBitCast(SDNode *N); SDValue reduceBuildVecToShuffle(SDNode *N); @@ -1204,19 +1221,14 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.dump(&DAG); dbgs() << "\nWith: "; TLO.New.dump(&DAG); dbgs() << '\n'); - // Replace all uses. If any nodes become isomorphic to other nodes and - // are deleted, make sure to remove them from our worklist. - WorklistRemover DeadNodes(*this); + // Replace all uses. DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New); // Push the new node and any (possibly new) users onto the worklist. AddToWorklistWithUsers(TLO.New.getNode()); - // Finally, if the node is now dead, remove it from the graph. The node - // may not be dead if the replacement process recursively simplified to - // something else needing this node. - if (TLO.Old->use_empty()) - deleteAndRecombine(TLO.Old.getNode()); + // Finally, if the node is now dead, remove it from the graph. + recursivelyDeleteUnusedNodes(TLO.Old.getNode()); } /// Check the specified integer node value to see if it can be simplified or if @@ -1263,11 +1275,12 @@ void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: "; Trunc.dump(&DAG); dbgs() << '\n'); - WorklistRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc); DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1)); - deleteAndRecombine(Load); + AddToWorklist(Trunc.getNode()); + recursivelyDeleteUnusedNodes(Load); } SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { @@ -1522,13 +1535,15 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: "; Result.dump(&DAG); dbgs() << '\n'); - WorklistRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1)); - deleteAndRecombine(N); + AddToWorklist(Result.getNode()); + recursivelyDeleteUnusedNodes(N); return true; } + return false; } @@ -1746,7 +1761,8 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::AssertAlign: return visitAssertAlign(N); case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); case ISD::SIGN_EXTEND_VECTOR_INREG: - case ISD::ZERO_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N); + case ISD::ZERO_EXTEND_VECTOR_INREG: + case ISD::ANY_EXTEND_VECTOR_INREG: return visitEXTEND_VECTOR_INREG(N); case ISD::TRUNCATE: return visitTRUNCATE(N); case ISD::BITCAST: return visitBITCAST(N); case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); @@ -1964,7 +1980,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { Changed = true; break; } - LLVM_FALLTHROUGH; + [[fallthrough]]; default: // Only add if it isn't already in the list. @@ -2187,54 +2203,29 @@ static SDValue foldSelectWithIdentityConstant(SDNode *N, SelectionDAG &DAG, std::swap(N0, N1); // TODO: Should this apply to scalar select too? - if (!N1.hasOneUse() || N1.getOpcode() != ISD::VSELECT) + if (N1.getOpcode() != ISD::VSELECT || !N1.hasOneUse()) return SDValue(); + // We can't hoist div/rem because of immediate UB (not speculatable). unsigned Opcode = N->getOpcode(); + if (!DAG.isSafeToSpeculativelyExecute(Opcode)) + return SDValue(); + EVT VT = N->getValueType(0); SDValue Cond = N1.getOperand(0); SDValue TVal = N1.getOperand(1); SDValue FVal = N1.getOperand(2); - // TODO: The cases should match with IR's ConstantExpr::getBinOpIdentity(). - // TODO: Target-specific opcodes could be added. Ex: "isCommutativeBinOp()". - // TODO: With fast-math (NSZ), allow the opposite-sign form of zero? - auto isIdentityConstantForOpcode = [](unsigned Opcode, SDValue V) { - if (ConstantFPSDNode *C = isConstOrConstSplatFP(V)) { - switch (Opcode) { - case ISD::FADD: // X + -0.0 --> X - return C->isZero() && C->isNegative(); - case ISD::FSUB: // X - 0.0 --> X - return C->isZero() && !C->isNegative(); - case ISD::FMUL: // X * 1.0 --> X - case ISD::FDIV: // X / 1.0 --> X - return C->isExactlyValue(1.0); - } - } - if (ConstantSDNode *C = isConstOrConstSplat(V)) { - switch (Opcode) { - case ISD::ADD: // X + 0 --> X - case ISD::SUB: // X - 0 --> X - case ISD::SHL: // X << 0 --> X - case ISD::SRA: // X s>> 0 --> X - case ISD::SRL: // X u>> 0 --> X - return C->isZero(); - case ISD::MUL: // X * 1 --> X - return C->isOne(); - } - } - return false; - }; - // This transform increases uses of N0, so freeze it to be safe. // binop N0, (vselect Cond, IDC, FVal) --> vselect Cond, N0, (binop N0, FVal) - if (isIdentityConstantForOpcode(Opcode, TVal)) { + unsigned OpNo = ShouldCommuteOperands ? 0 : 1; + if (isNeutralConstant(Opcode, N->getFlags(), TVal, OpNo)) { SDValue F0 = DAG.getFreeze(N0); SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, FVal, N->getFlags()); return DAG.getSelect(SDLoc(N), VT, Cond, F0, NewBO); } // binop N0, (vselect Cond, TVal, IDC) --> vselect Cond, (binop N0, TVal), N0 - if (isIdentityConstantForOpcode(Opcode, FVal)) { + if (isNeutralConstant(Opcode, N->getFlags(), FVal, OpNo)) { SDValue F0 = DAG.getFreeze(N0); SDValue NewBO = DAG.getNode(Opcode, SDLoc(N), VT, F0, TVal, N->getFlags()); return DAG.getSelect(SDLoc(N), VT, Cond, NewBO, F0); @@ -2289,8 +2280,8 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) { // or X, (select Cond, -1, 0) --> select Cond, -1, X bool CanFoldNonConst = (BinOpcode == ISD::AND || BinOpcode == ISD::OR) && - (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) && - (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF)); + ((isNullOrNullSplat(CT) && isAllOnesOrAllOnesSplat(CF)) || + (isNullOrNullSplat(CF) && isAllOnesOrAllOnesSplat(CT))); SDValue CBO = BO->getOperand(SelOpNo ^ 1); if (!CanFoldNonConst && @@ -2298,23 +2289,41 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) { !DAG.isConstantFPBuildVectorOrConstantFP(CBO)) return SDValue(); - // We have a select-of-constants followed by a binary operator with a - // constant. Eliminate the binop by pulling the constant math into the select. - // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO SDLoc DL(Sel); - SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT) - : DAG.getNode(BinOpcode, DL, VT, CT, CBO); - if (!CanFoldNonConst && !NewCT.isUndef() && - !isConstantOrConstantVector(NewCT, true) && - !DAG.isConstantFPBuildVectorOrConstantFP(NewCT)) - return SDValue(); + SDValue NewCT, NewCF; - SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF) - : DAG.getNode(BinOpcode, DL, VT, CF, CBO); - if (!CanFoldNonConst && !NewCF.isUndef() && - !isConstantOrConstantVector(NewCF, true) && - !DAG.isConstantFPBuildVectorOrConstantFP(NewCF)) - return SDValue(); + if (CanFoldNonConst) { + // If CBO is an opaque constant, we can't rely on getNode to constant fold. + if ((BinOpcode == ISD::AND && isNullOrNullSplat(CT)) || + (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CT))) + NewCT = CT; + else + NewCT = CBO; + + if ((BinOpcode == ISD::AND && isNullOrNullSplat(CF)) || + (BinOpcode == ISD::OR && isAllOnesOrAllOnesSplat(CF))) + NewCF = CF; + else + NewCF = CBO; + } else { + // We have a select-of-constants followed by a binary operator with a + // constant. Eliminate the binop by pulling the constant math into the + // select. Example: add (select Cond, CT, CF), CBO --> select Cond, CT + + // CBO, CF + CBO + NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT) + : DAG.getNode(BinOpcode, DL, VT, CT, CBO); + if (!CanFoldNonConst && !NewCT.isUndef() && + !isConstantOrConstantVector(NewCT, true) && + !DAG.isConstantFPBuildVectorOrConstantFP(NewCT)) + return SDValue(); + + NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF) + : DAG.getNode(BinOpcode, DL, VT, CF, CBO); + if (!CanFoldNonConst && !NewCF.isUndef() && + !isConstantOrConstantVector(NewCF, true) && + !DAG.isConstantFPBuildVectorOrConstantFP(NewCF)) + return SDValue(); + } SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF); SelectOp->setFlags(BO->getFlags()); @@ -2668,9 +2677,9 @@ SDValue DAGCombiner::visitADD(SDNode *N) { } // fold a+vscale(c1)+vscale(c2) -> a+vscale(c1+c2) - if ((N0.getOpcode() == ISD::ADD) && - (N0.getOperand(1).getOpcode() == ISD::VSCALE) && - (N1.getOpcode() == ISD::VSCALE)) { + if (N0.getOpcode() == ISD::ADD && + N0.getOperand(1).getOpcode() == ISD::VSCALE && + N1.getOpcode() == ISD::VSCALE) { const APInt &VS0 = N0.getOperand(1)->getConstantOperandAPInt(0); const APInt &VS1 = N1->getConstantOperandAPInt(0); SDValue VS = DAG.getVScale(DL, VT, VS0 + VS1); @@ -2687,9 +2696,9 @@ SDValue DAGCombiner::visitADD(SDNode *N) { } // Fold a + step_vector(c1) + step_vector(c2) to a + step_vector(c1+c2) - if ((N0.getOpcode() == ISD::ADD) && - (N0.getOperand(1).getOpcode() == ISD::STEP_VECTOR) && - (N1.getOpcode() == ISD::STEP_VECTOR)) { + if (N0.getOpcode() == ISD::ADD && + N0.getOperand(1).getOpcode() == ISD::STEP_VECTOR && + N1.getOpcode() == ISD::STEP_VECTOR) { const APInt &SV0 = N0.getOperand(1)->getConstantOperandAPInt(0); const APInt &SV1 = N1->getConstantOperandAPInt(0); APInt NewStep = SV0 + SV1; @@ -2789,16 +2798,26 @@ static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) { /// the opcode and bypass the mask operation. static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1, SelectionDAG &DAG, const SDLoc &DL) { + if (N1.getOpcode() == ISD::ZERO_EXTEND) + N1 = N1.getOperand(0); + if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1))) return SDValue(); EVT VT = N0.getValueType(); - if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits()) + SDValue N10 = N1.getOperand(0); + if (N10.getValueType() != VT && N10.getOpcode() == ISD::TRUNCATE) + N10 = N10.getOperand(0); + + if (N10.getValueType() != VT) + return SDValue(); + + if (DAG.ComputeNumSignBits(N10) != VT.getScalarSizeInBits()) return SDValue(); // add N0, (and (AssertSext X, i1), 1) --> sub N0, X // sub N0, (and (AssertSext X, i1), 1) --> add N0, X - return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0)); + return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N10); } /// Helper for doing combines based on N0 and N1 being added to each other. @@ -3079,6 +3098,17 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) { if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N)) return Combined; + // We want to avoid useless duplication. + // TODO: This is done automatically for binary operations. As ADDCARRY is + // not a binary operation, this is not really possible to leverage this + // existing mechanism for it. However, if more operations require the same + // deduplication logic, then it may be worth generalize. + SDValue Ops[] = {N1, N0, CarryIn}; + SDNode *CSENode = + DAG.getNodeIfExists(ISD::ADDCARRY, N->getVTList(), Ops, N->getFlags()); + if (CSENode) + return SDValue(CSENode, 0); + return SDValue(); } @@ -3110,7 +3140,7 @@ SDValue DAGCombiner::visitSADDO_CARRY(SDNode *N) { * (addcarry X, 0, (addcarry A, B, Z):Carry) * * The end result is usually an increase in operation required, but because the - * carry is now linearized, other tranforms can kick in and optimize the DAG. + * carry is now linearized, other transforms can kick in and optimize the DAG. * * Patterns typically look something like * (uaddo A, B) @@ -3492,11 +3522,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (VT.isVector()) { SDValue N1S = DAG.getSplatValue(N1, true); if (N1S && N1S.getOpcode() == ISD::SUB && - isNullConstant(N1S.getOperand(0))) { - if (VT.isScalableVector()) - return DAG.getSplatVector(VT, DL, N1S.getOperand(1)); - return DAG.getSplatBuildVector(VT, DL, N1S.getOperand(1)); - } + isNullConstant(N1S.getOperand(0))) + return DAG.getSplat(VT, DL, N1S.getOperand(1)); } } @@ -3625,7 +3652,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { return V; // (x - y) - 1 -> add (xor y, -1), x - if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) { + if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() && isOneOrOneSplat(N1)) { SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), DAG.getAllOnesConstant(DL, VT)); return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0)); @@ -3642,26 +3669,26 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { // Hoist one-use addition by non-opaque constant: // (x + C) - y -> (x - y) + C - if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD && + if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() && isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) { SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1); return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1)); } // y - (x + C) -> (y - x) - C - if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD && + if (N1.getOpcode() == ISD::ADD && N1.hasOneUse() && isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) { SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0)); return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1)); } // (x - C) - y -> (x - y) - C // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors. - if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && + if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() && isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) { SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1); return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1)); } // (C - x) - y -> C - (x + y) - if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && + if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() && isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) { SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1); return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add); @@ -3716,7 +3743,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { } // canonicalize (sub X, (vscale * C)) to (add X, (vscale * -C)) - if (N1.getOpcode() == ISD::VSCALE) { + if (N1.getOpcode() == ISD::VSCALE && N1.hasOneUse()) { const APInt &IntVal = N1.getConstantOperandAPInt(0); return DAG.getNode(ISD::ADD, DL, VT, N0, DAG.getVScale(DL, VT, -IntVal)); } @@ -3749,6 +3776,12 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { return DAG.getNode(ISD::ADD, DL, VT, N1, N0); } + // (sub (subcarry X, 0, Carry), Y) -> (subcarry X, Y, Carry) + if (N0.getOpcode() == ISD::SUBCARRY && isNullConstant(N0.getOperand(1)) && + N0.getResNo() == 0 && N0.hasOneUse()) + return DAG.getNode(ISD::SUBCARRY, DL, N0->getVTList(), + N0.getOperand(0), N1, N0.getOperand(2)); + if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) { // (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry) if (SDValue Carry = getAsCarry(TLI, N0)) { @@ -3772,6 +3805,24 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { } } + // max(a,b) - min(a,b) --> abd(a,b) + auto MatchSubMaxMin = [&](unsigned Max, unsigned Min, unsigned Abd) { + if (N0.getOpcode() != Max || N1.getOpcode() != Min) + return SDValue(); + if ((N0.getOperand(0) != N1.getOperand(0) || + N0.getOperand(1) != N1.getOperand(1)) && + (N0.getOperand(0) != N1.getOperand(1) || + N0.getOperand(1) != N1.getOperand(0))) + return SDValue(); + if (!TLI.isOperationLegalOrCustom(Abd, VT)) + return SDValue(); + return DAG.getNode(Abd, DL, VT, N0.getOperand(0), N0.getOperand(1)); + }; + if (SDValue R = MatchSubMaxMin(ISD::SMAX, ISD::SMIN, ISD::ABDS)) + return R; + if (SDValue R = MatchSubMaxMin(ISD::UMAX, ISD::UMIN, ISD::ABDU)) + return R; + return SDValue(); } @@ -3996,8 +4047,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { // fold (mul x, -1) -> 0-x if (N1IsConst && ConstValue1.isAllOnes()) - return DAG.getNode(ISD::SUB, DL, VT, - DAG.getConstant(0, DL, VT), N0); + return DAG.getNegative(N0, DL, VT); // fold (mul x, (1 << c)) -> x << c if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) && @@ -4021,6 +4071,21 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { getShiftAmountTy(N0.getValueType())))); } + // Attempt to reuse an existing umul_lohi/smul_lohi node, but only if the + // hi result is in use in case we hit this mid-legalization. + for (unsigned LoHiOpc : {ISD::UMUL_LOHI, ISD::SMUL_LOHI}) { + if (!LegalOperations || TLI.isOperationLegalOrCustom(LoHiOpc, VT)) { + SDVTList LoHiVT = DAG.getVTList(VT, VT); + // TODO: Can we match commutable operands with getNodeIfExists? + if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N0, N1})) + if (LoHi->hasAnyUseOfValue(1)) + return SDValue(LoHi, 0); + if (SDNode *LoHi = DAG.getNodeIfExists(LoHiOpc, LoHiVT, {N1, N0})) + if (LoHi->hasAnyUseOfValue(1)) + return SDValue(LoHi, 0); + } + } + // Try to transform: // (1) multiply-by-(power-of-2 +/- 1) into shift and add/sub. // mul x, (2^N + 1) --> add (shl x, N), x @@ -4064,7 +4129,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { DAG.getConstant(TZeros, DL, VT))) : DAG.getNode(MathOp, DL, VT, Shl, N0); if (ConstValue1.isNegative()) - R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R); + R = DAG.getNegative(R, DL, VT); return R; } } @@ -4108,21 +4173,21 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { DAG.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1)); // Fold (mul (vscale * C0), C1) to (vscale * (C0 * C1)). - if (N0.getOpcode() == ISD::VSCALE) - if (ConstantSDNode *NC1 = isConstOrConstSplat(N1)) { - const APInt &C0 = N0.getConstantOperandAPInt(0); - const APInt &C1 = NC1->getAPIntValue(); - return DAG.getVScale(DL, VT, C0 * C1); - } + ConstantSDNode *NC1 = isConstOrConstSplat(N1); + if (N0.getOpcode() == ISD::VSCALE && NC1) { + const APInt &C0 = N0.getConstantOperandAPInt(0); + const APInt &C1 = NC1->getAPIntValue(); + return DAG.getVScale(DL, VT, C0 * C1); + } // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)). APInt MulVal; - if (N0.getOpcode() == ISD::STEP_VECTOR) - if (ISD::isConstantSplatVector(N1.getNode(), MulVal)) { - const APInt &C0 = N0.getConstantOperandAPInt(0); - APInt NewStep = C0 * MulVal; - return DAG.getStepVector(DL, VT, NewStep); - } + if (N0.getOpcode() == ISD::STEP_VECTOR && + ISD::isConstantSplatVector(N1.getNode(), MulVal)) { + const APInt &C0 = N0.getConstantOperandAPInt(0); + APInt NewStep = C0 * MulVal; + return DAG.getStepVector(DL, VT, NewStep); + } // Fold ((mul x, 0/undef) -> 0, // (mul x, 1) -> x) -> x) @@ -4318,7 +4383,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { // fold (sdiv X, -1) -> 0-X ConstantSDNode *N1C = isConstOrConstSplat(N1); if (N1C && N1C->isAllOnes()) - return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0); + return DAG.getNegative(N0, DL, VT); // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0) if (N1C && N1C->getAPIntValue().isMinSignedValue()) @@ -4465,10 +4530,11 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { // fold (udiv X, -1) -> select(X == -1, 1, 0) ConstantSDNode *N1C = isConstOrConstSplat(N1); - if (N1C && N1C->isAllOnes()) + if (N1C && N1C->isAllOnes() && CCVT.isVector() == VT.isVector()) { return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ), DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT)); + } if (SDValue V = simplifyDivRem(N, DAG)) return V; @@ -4571,7 +4637,8 @@ SDValue DAGCombiner::visitREM(SDNode *N) { // fold (urem X, -1) -> select(FX == -1, 0, FX) // Freeze the numerator to avoid a miscompile with an undefined value. - if (!isSigned && llvm::isAllOnesOrAllOnesSplat(N1, /*AllowUndefs*/ false)) { + if (!isSigned && llvm::isAllOnesOrAllOnesSplat(N1, /*AllowUndefs*/ false) && + CCVT.isVector() == VT.isVector()) { SDValue F0 = DAG.getFreeze(N0); SDValue EqualsNeg1 = DAG.getSetCC(DL, CCVT, F0, N1, ISD::SETEQ); return DAG.getSelect(DL, VT, EqualsNeg1, DAG.getConstant(0, DL, VT), F0); @@ -5328,6 +5395,21 @@ SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) { return DAG.getNode(HandOpcode, DL, VT, Logic); } + // For funnel shifts FSHL/FSHR: + // logic_op (OP x, x1, s), (OP y, y1, s) --> + // --> OP (logic_op x, y), (logic_op, x1, y1), s + if ((HandOpcode == ISD::FSHL || HandOpcode == ISD::FSHR) && + N0.getOperand(2) == N1.getOperand(2)) { + if (!N0.hasOneUse() || !N1.hasOneUse()) + return SDValue(); + SDValue X1 = N0.getOperand(1); + SDValue Y1 = N1.getOperand(1); + SDValue S = N0.getOperand(2); + SDValue Logic0 = DAG.getNode(LogicOpcode, DL, VT, X, Y); + SDValue Logic1 = DAG.getNode(LogicOpcode, DL, VT, X1, Y1); + return DAG.getNode(HandOpcode, DL, VT, Logic0, Logic1, S); + } + // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) // Only perform this optimization up until type legalization, before // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by @@ -6139,6 +6221,43 @@ static SDValue foldLogicOfShifts(SDNode *N, SDValue LogicOp, SDValue ShiftOp, return DAG.getNode(LogicOpcode, DL, VT, NewShift, Z); } +/// Given a tree of logic operations with shape like +/// (LOGIC (LOGIC (X, Y), LOGIC (Z, Y))) +/// try to match and fold shift operations with the same shift amount. +/// For example: +/// LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W) --> +/// --> LOGIC (SH (LOGIC X0, X1), Y), (LOGIC Z, W) +static SDValue foldLogicTreeOfShifts(SDNode *N, SDValue LeftHand, + SDValue RightHand, SelectionDAG &DAG) { + unsigned LogicOpcode = N->getOpcode(); + assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR || + LogicOpcode == ISD::XOR)); + if (LeftHand.getOpcode() != LogicOpcode || + RightHand.getOpcode() != LogicOpcode) + return SDValue(); + if (!LeftHand.hasOneUse() || !RightHand.hasOneUse()) + return SDValue(); + + // Try to match one of following patterns: + // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC (SH X1, Y), W) + // LOGIC (LOGIC (SH X0, Y), Z), (LOGIC W, (SH X1, Y)) + // Note that foldLogicOfShifts will handle commuted versions of the left hand + // itself. + SDValue CombinedShifts, W; + SDValue R0 = RightHand.getOperand(0); + SDValue R1 = RightHand.getOperand(1); + if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R0, DAG))) + W = R1; + else if ((CombinedShifts = foldLogicOfShifts(N, LeftHand, R1, DAG))) + W = R0; + else + return SDValue(); + + EVT VT = N->getValueType(0); + SDLoc DL(N); + return DAG.getNode(LogicOpcode, DL, VT, CombinedShifts, W); +} + SDValue DAGCombiner::visitAND(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -6175,8 +6294,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // fold (and (masked_load) (splat_vec (x, ...))) to zext_masked_load auto *MLoad = dyn_cast<MaskedLoadSDNode>(N0); ConstantSDNode *Splat = isConstOrConstSplat(N1, true, true); - if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && N0.hasOneUse() && - Splat && N1.hasOneUse()) { + if (MLoad && MLoad->getExtensionType() == ISD::EXTLOAD && Splat && + N1.hasOneUse()) { EVT LoadVT = MLoad->getMemoryVT(); EVT ExtVT = VT; if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT, LoadVT)) { @@ -6186,11 +6305,16 @@ SDValue DAGCombiner::visitAND(SDNode *N) { uint64_t ElementSize = LoadVT.getVectorElementType().getScalarSizeInBits(); if (Splat->getAPIntValue().isMask(ElementSize)) { - return DAG.getMaskedLoad( + auto NewLoad = DAG.getMaskedLoad( ExtVT, SDLoc(N), MLoad->getChain(), MLoad->getBasePtr(), MLoad->getOffset(), MLoad->getMask(), MLoad->getPassThru(), LoadVT, MLoad->getMemOperand(), MLoad->getAddressingMode(), ISD::ZEXTLOAD, MLoad->isExpandingLoad()); + bool LoadHasOtherUsers = !N0.hasOneUse(); + CombineTo(N, NewLoad); + if (LoadHasOtherUsers) + CombineTo(MLoad, NewLoad.getValue(0), NewLoad.getValue(1)); + return SDValue(N, 0); } } } @@ -6213,14 +6337,6 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags())) return RAND; - // Try to convert a constant mask AND into a shuffle clear mask. - if (VT.isVector()) - if (SDValue Shuffle = XformToShuffleWithZero(N)) - return Shuffle; - - if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N)) - return Combined; - // fold (and (or x, C), D) -> D if (C & D) == D auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) { return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue()); @@ -6228,23 +6344,32 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (N0.getOpcode() == ISD::OR && ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset)) return N1; + // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits. if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { SDValue N0Op0 = N0.getOperand(0); APInt Mask = ~N1C->getAPIntValue(); Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits()); - if (DAG.MaskedValueIsZero(N0Op0, Mask)) { - SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), - N0.getValueType(), N0Op0); - - // Replace uses of the AND with uses of the Zero extend node. - CombineTo(N, Zext); + if (DAG.MaskedValueIsZero(N0Op0, Mask)) + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N0.getValueType(), N0Op0); + } - // We actually want to replace all uses of the any_extend with the - // zero_extend, to avoid duplicating things. This will later cause this - // AND to be folded. - CombineTo(N0.getNode(), Zext); - return SDValue(N, 0); // Return N so it doesn't get rechecked! + // fold (and (ext (and V, c1)), c2) -> (and (ext V), (and c1, (ext c2))) + if (ISD::isExtOpcode(N0.getOpcode())) { + unsigned ExtOpc = N0.getOpcode(); + SDValue N0Op0 = N0.getOperand(0); + if (N0Op0.getOpcode() == ISD::AND && + (ExtOpc != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0Op0, VT)) && + DAG.isConstantIntBuildVectorOrConstantInt(N1) && + DAG.isConstantIntBuildVectorOrConstantInt(N0Op0.getOperand(1)) && + N0->hasOneUse() && N0Op0->hasOneUse()) { + SDLoc DL(N); + SDValue NewMask = + DAG.getNode(ISD::AND, DL, VT, N1, + DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(1))); + return DAG.getNode(ISD::AND, DL, VT, + DAG.getNode(ExtOpc, DL, VT, N0Op0.getOperand(0)), + NewMask); } } @@ -6353,6 +6478,33 @@ SDValue DAGCombiner::visitAND(SDNode *N) { } } + // Try to convert a constant mask AND into a shuffle clear mask. + if (VT.isVector()) + if (SDValue Shuffle = XformToShuffleWithZero(N)) + return Shuffle; + + if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N)) + return Combined; + + if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() && N1C && + ISD::isExtOpcode(N0.getOperand(0).getOpcode())) { + SDValue Ext = N0.getOperand(0); + EVT ExtVT = Ext->getValueType(0); + SDValue Extendee = Ext->getOperand(0); + + unsigned ScalarWidth = Extendee.getValueType().getScalarSizeInBits(); + if (N1C->getAPIntValue().isMask(ScalarWidth) && + (!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, ExtVT))) { + // (and (extract_subvector (zext|anyext|sext v) _) iN_mask) + // => (extract_subvector (iN_zeroext v)) + SDValue ZeroExtExtendee = + DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), ExtVT, Extendee); + + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, ZeroExtExtendee, + N0.getOperand(1)); + } + } + // fold (and (masked_gather x)) -> (zext_masked_gather x) if (auto *GN0 = dyn_cast<MaskedGatherSDNode>(N0)) { EVT MemVT = GN0->getMemoryVT(); @@ -6493,6 +6645,12 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (SDValue V = foldAndToUsubsat(N, DAG)) return V; + // Postpone until legalization completed to avoid interference with bswap + // folding + if (LegalOperations || VT.isVector()) + if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG)) + return R; + return SDValue(); } @@ -6892,6 +7050,10 @@ static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1, SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); + // fold or (and x, y), x --> x + if (N00 == N1 || N01 == N1) + return N1; + // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y) // TODO: Set AllowUndefs = true. if (getBitwiseNotOperand(N01, N00, @@ -6904,6 +7066,24 @@ static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1, return DAG.getNode(ISD::OR, SDLoc(N), VT, N01, N1); } + if (N0.getOpcode() == ISD::XOR) { + // fold or (xor x, y), x --> or x, y + // or (xor x, y), (x and/or y) --> or x, y + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + if (N00 == N1) + return DAG.getNode(ISD::OR, SDLoc(N), VT, N01, N1); + if (N01 == N1) + return DAG.getNode(ISD::OR, SDLoc(N), VT, N00, N1); + + if (N1.getOpcode() == ISD::AND || N1.getOpcode() == ISD::OR) { + SDValue N10 = N1.getOperand(0); + SDValue N11 = N1.getOperand(1); + if ((N00 == N10 && N01 == N11) || (N00 == N11 && N01 == N10)) + return DAG.getNode(ISD::OR, SDLoc(N), VT, N00, N01); + } + } + if (SDValue R = foldLogicOfShifts(N, N0, N1, DAG)) return R; @@ -7093,10 +7273,17 @@ SDValue DAGCombiner::visitOR(SDNode *N) { if (SDValue Combined = visitADDLike(N)) return Combined; + // Postpone until legalization completed to avoid interference with bswap + // folding + if (LegalOperations || VT.isVector()) + if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG)) + return R; + return SDValue(); } -static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) { +static SDValue stripConstantMask(const SelectionDAG &DAG, SDValue Op, + SDValue &Mask) { if (Op.getOpcode() == ISD::AND && DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) { Mask = Op.getOperand(1); @@ -7106,7 +7293,7 @@ static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) { } /// Match "(X shl/srl V1) & V2" where V2 may not be present. -static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift, +static bool matchRotateHalf(const SelectionDAG &DAG, SDValue Op, SDValue &Shift, SDValue &Mask) { Op = stripConstantMask(DAG, Op, Mask); if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) { @@ -7144,9 +7331,8 @@ static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift, SDValue ExtractFrom, SDValue &Mask, const SDLoc &DL) { assert(OppShift && ExtractFrom && "Empty SDValue"); - assert( - (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) && - "Existing shift must be valid as a rotate half"); + if (OppShift.getOpcode() != ISD::SHL && OppShift.getOpcode() != ISD::SRL) + return SDValue(); ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask); @@ -7301,12 +7487,14 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, unsigned MaskLoBits = 0; if (IsRotate && isPowerOf2_64(EltSize)) { unsigned Bits = Log2_64(EltSize); - APInt DemandedBits = - APInt::getLowBitsSet(Neg.getScalarValueSizeInBits(), Bits); - if (SDValue Inner = - TLI.SimplifyMultipleUseDemandedBits(Neg, DemandedBits, DAG)) { - Neg = Inner; - MaskLoBits = Bits; + unsigned NegBits = Neg.getScalarValueSizeInBits(); + if (NegBits >= Bits) { + APInt DemandedBits = APInt::getLowBitsSet(NegBits, Bits); + if (SDValue Inner = + TLI.SimplifyMultipleUseDemandedBits(Neg, DemandedBits, DAG)) { + Neg = Inner; + MaskLoBits = Bits; + } } } @@ -7322,11 +7510,13 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, // affect Mask's demanded bits, just replace Pos with Pos'. These operations // are redundant for the purpose of the equality. if (MaskLoBits) { - APInt DemandedBits = - APInt::getLowBitsSet(Pos.getScalarValueSizeInBits(), MaskLoBits); - if (SDValue Inner = - TLI.SimplifyMultipleUseDemandedBits(Pos, DemandedBits, DAG)) { - Pos = Inner; + unsigned PosBits = Pos.getScalarValueSizeInBits(); + if (PosBits >= MaskLoBits) { + APInt DemandedBits = APInt::getLowBitsSet(PosBits, MaskLoBits); + if (SDValue Inner = + TLI.SimplifyMultipleUseDemandedBits(Pos, DemandedBits, DAG)) { + Pos = Inner; + } } } @@ -7551,6 +7741,10 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { std::swap(LHSMask, RHSMask); } + // Something has gone wrong - we've lost the shl/srl pair - bail. + if (LHSShift.getOpcode() != ISD::SHL || RHSShift.getOpcode() != ISD::SRL) + return SDValue(); + unsigned EltSizeInBits = VT.getScalarSizeInBits(); SDValue LHSShiftArg = LHSShift.getOperand(0); SDValue LHSShiftAmt = LHSShift.getOperand(1); @@ -7586,7 +7780,7 @@ SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { }; // TODO: Support pre-legalization funnel-shift by constant. - bool IsRotate = LHSShift.getOperand(0) == RHSShift.getOperand(0); + bool IsRotate = LHSShiftArg == RHSShiftArg; if (!IsRotate && !(HasFSHL || HasFSHR)) { if (TLI.isTypeLegal(VT) && LHS.hasOneUse() && RHS.hasOneUse() && ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) { @@ -7714,87 +7908,135 @@ struct ByteProvider { // ByteOffset is the offset of the byte in the value produced by the load. LoadSDNode *Load = nullptr; unsigned ByteOffset = 0; + unsigned VectorOffset = 0; ByteProvider() = default; - static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) { - return ByteProvider(Load, ByteOffset); + static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset, + unsigned VectorOffset) { + return ByteProvider(Load, ByteOffset, VectorOffset); } - static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); } + static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0, 0); } bool isConstantZero() const { return !Load; } bool isMemory() const { return Load; } bool operator==(const ByteProvider &Other) const { - return Other.Load == Load && Other.ByteOffset == ByteOffset; + return Other.Load == Load && Other.ByteOffset == ByteOffset && + Other.VectorOffset == VectorOffset; } private: - ByteProvider(LoadSDNode *Load, unsigned ByteOffset) - : Load(Load), ByteOffset(ByteOffset) {} + ByteProvider(LoadSDNode *Load, unsigned ByteOffset, unsigned VectorOffset) + : Load(Load), ByteOffset(ByteOffset), VectorOffset(VectorOffset) {} }; } // end anonymous namespace /// Recursively traverses the expression calculating the origin of the requested -/// byte of the given value. Returns None if the provider can't be calculated. +/// byte of the given value. Returns std::nullopt if the provider can't be +/// calculated. +/// +/// For all the values except the root of the expression, we verify that the +/// value has exactly one use and if not then return std::nullopt. This way if +/// the origin of the byte is returned it's guaranteed that the values which +/// contribute to the byte are not used outside of this expression. + +/// However, there is a special case when dealing with vector loads -- we allow +/// more than one use if the load is a vector type. Since the values that +/// contribute to the byte ultimately come from the ExtractVectorElements of the +/// Load, we don't care if the Load has uses other than ExtractVectorElements, +/// because those operations are independent from the pattern to be combined. +/// For vector loads, we simply care that the ByteProviders are adjacent +/// positions of the same vector, and their index matches the byte that is being +/// provided. This is captured by the \p VectorIndex algorithm. \p VectorIndex +/// is the index used in an ExtractVectorElement, and \p StartingIndex is the +/// byte position we are trying to provide for the LoadCombine. If these do +/// not match, then we can not combine the vector loads. \p Index uses the +/// byte position we are trying to provide for and is matched against the +/// shl and load size. The \p Index algorithm ensures the requested byte is +/// provided for by the pattern, and the pattern does not over provide bytes. /// -/// For all the values except the root of the expression verifies that the value -/// has exactly one use and if it's not true return None. This way if the origin -/// of the byte is returned it's guaranteed that the values which contribute to -/// the byte are not used outside of this expression. /// -/// Because the parts of the expression are not allowed to have more than one -/// use this function iterates over trees, not DAGs. So it never visits the same -/// node more than once. -static const Optional<ByteProvider> +/// The supported LoadCombine pattern for vector loads is as follows +/// or +/// / \ +/// or shl +/// / \ | +/// or shl zext +/// / \ | | +/// shl zext zext EVE* +/// | | | | +/// zext EVE* EVE* LOAD +/// | | | +/// EVE* LOAD LOAD +/// | +/// LOAD +/// +/// *ExtractVectorElement +static const std::optional<ByteProvider> calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, - bool Root = false) { + std::optional<uint64_t> VectorIndex, + unsigned StartingIndex = 0) { + // Typical i64 by i8 pattern requires recursion up to 8 calls depth if (Depth == 10) - return None; + return std::nullopt; + + // Only allow multiple uses if the instruction is a vector load (in which + // case we will use the load for every ExtractVectorElement) + if (Depth && !Op.hasOneUse() && + (Op.getOpcode() != ISD::LOAD || !Op.getValueType().isVector())) + return std::nullopt; - if (!Root && !Op.hasOneUse()) - return None; + // Fail to combine if we have encountered anything but a LOAD after handling + // an ExtractVectorElement. + if (Op.getOpcode() != ISD::LOAD && VectorIndex.has_value()) + return std::nullopt; - assert(Op.getValueType().isScalarInteger() && "can't handle other types"); unsigned BitWidth = Op.getValueSizeInBits(); if (BitWidth % 8 != 0) - return None; + return std::nullopt; unsigned ByteWidth = BitWidth / 8; assert(Index < ByteWidth && "invalid index requested"); (void) ByteWidth; switch (Op.getOpcode()) { case ISD::OR: { - auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1); + auto LHS = + calculateByteProvider(Op->getOperand(0), Index, Depth + 1, VectorIndex); if (!LHS) - return None; - auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1); + return std::nullopt; + auto RHS = + calculateByteProvider(Op->getOperand(1), Index, Depth + 1, VectorIndex); if (!RHS) - return None; + return std::nullopt; if (LHS->isConstantZero()) return RHS; if (RHS->isConstantZero()) return LHS; - return None; + return std::nullopt; } case ISD::SHL: { auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1)); if (!ShiftOp) - return None; + return std::nullopt; uint64_t BitShift = ShiftOp->getZExtValue(); + if (BitShift % 8 != 0) - return None; + return std::nullopt; uint64_t ByteShift = BitShift / 8; + // If we are shifting by an amount greater than the index we are trying to + // provide, then do not provide anything. Otherwise, subtract the index by + // the amount we shifted by. return Index < ByteShift ? ByteProvider::getConstantZero() : calculateByteProvider(Op->getOperand(0), Index - ByteShift, - Depth + 1); + Depth + 1, VectorIndex, Index); } case ISD::ANY_EXTEND: case ISD::SIGN_EXTEND: @@ -7802,37 +8044,70 @@ calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth, SDValue NarrowOp = Op->getOperand(0); unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits(); if (NarrowBitWidth % 8 != 0) - return None; + return std::nullopt; uint64_t NarrowByteWidth = NarrowBitWidth / 8; if (Index >= NarrowByteWidth) return Op.getOpcode() == ISD::ZERO_EXTEND - ? Optional<ByteProvider>(ByteProvider::getConstantZero()) - : None; - return calculateByteProvider(NarrowOp, Index, Depth + 1); + ? std::optional<ByteProvider>(ByteProvider::getConstantZero()) + : std::nullopt; + return calculateByteProvider(NarrowOp, Index, Depth + 1, VectorIndex, + StartingIndex); } case ISD::BSWAP: return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1, - Depth + 1); + Depth + 1, VectorIndex, StartingIndex); + case ISD::EXTRACT_VECTOR_ELT: { + auto OffsetOp = dyn_cast<ConstantSDNode>(Op->getOperand(1)); + if (!OffsetOp) + return std::nullopt; + + VectorIndex = OffsetOp->getZExtValue(); + + SDValue NarrowOp = Op->getOperand(0); + unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits(); + if (NarrowBitWidth % 8 != 0) + return std::nullopt; + uint64_t NarrowByteWidth = NarrowBitWidth / 8; + + // Check to see if the position of the element in the vector corresponds + // with the byte we are trying to provide for. In the case of a vector of + // i8, this simply means the VectorIndex == StartingIndex. For non i8 cases, + // the element will provide a range of bytes. For example, if we have a + // vector of i16s, each element provides two bytes (V[1] provides byte 2 and + // 3). + if (*VectorIndex * NarrowByteWidth > StartingIndex) + return std::nullopt; + if ((*VectorIndex + 1) * NarrowByteWidth <= StartingIndex) + return std::nullopt; + + return calculateByteProvider(Op->getOperand(0), Index, Depth + 1, + VectorIndex, StartingIndex); + } case ISD::LOAD: { auto L = cast<LoadSDNode>(Op.getNode()); if (!L->isSimple() || L->isIndexed()) - return None; + return std::nullopt; unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits(); if (NarrowBitWidth % 8 != 0) - return None; + return std::nullopt; uint64_t NarrowByteWidth = NarrowBitWidth / 8; + // If the width of the load does not reach byte we are trying to provide for + // and it is not a ZEXTLOAD, then the load does not provide for the byte in + // question if (Index >= NarrowByteWidth) return L->getExtensionType() == ISD::ZEXTLOAD - ? Optional<ByteProvider>(ByteProvider::getConstantZero()) - : None; - return ByteProvider::getMemory(L, Index); + ? std::optional<ByteProvider>(ByteProvider::getConstantZero()) + : std::nullopt; + + unsigned BPVectorIndex = VectorIndex.value_or(0U); + return ByteProvider::getMemory(L, Index, BPVectorIndex); } } - return None; + return std::nullopt; } static unsigned littleEndianByteAt(unsigned BW, unsigned i) { @@ -7845,13 +8120,13 @@ static unsigned bigEndianByteAt(unsigned BW, unsigned i) { // Check if the bytes offsets we are looking at match with either big or // little endian value loaded. Return true for big endian, false for little -// endian, and None if match failed. -static Optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets, - int64_t FirstOffset) { +// endian, and std::nullopt if match failed. +static std::optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets, + int64_t FirstOffset) { // The endian can be decided only when it is 2 bytes at least. unsigned Width = ByteOffsets.size(); if (Width < 2) - return None; + return std::nullopt; bool BigEndian = true, LittleEndian = true; for (unsigned i = 0; i < Width; i++) { @@ -7859,7 +8134,7 @@ static Optional<bool> isBigEndian(const ArrayRef<int64_t> ByteOffsets, LittleEndian &= CurrentByteOffset == littleEndianByteAt(Width, i); BigEndian &= CurrentByteOffset == bigEndianByteAt(Width, i); if (!BigEndian && !LittleEndian) - return None; + return std::nullopt; } assert((BigEndian != LittleEndian) && "It should be either big endian or" @@ -7922,9 +8197,13 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) { while (auto *Store = dyn_cast<StoreSDNode>(Chain)) { // All stores must be the same size to ensure that we are writing all of the // bytes in the wide value. + // This store should have exactly one use as a chain operand for another + // store in the merging set. If there are other chain uses, then the + // transform may not be safe because order of loads/stores outside of this + // set may not be preserved. // TODO: We could allow multiple sizes by tracking each stored byte. if (Store->getMemoryVT() != MemVT || !Store->isSimple() || - Store->isIndexed()) + Store->isIndexed() || !Store->hasOneUse()) return SDValue(); Stores.push_back(Store); Chain = Store->getChain(); @@ -7948,7 +8227,7 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) { SmallVector<int64_t, 8> OffsetMap(NumStores, INT64_MAX); int64_t FirstOffset = INT64_MAX; StoreSDNode *FirstStore = nullptr; - Optional<BaseIndexOffset> Base; + std::optional<BaseIndexOffset> Base; for (auto *Store : Stores) { // All the stores store different parts of the CombinedValue. A truncate is // required to get the partial value. @@ -8016,7 +8295,7 @@ SDValue DAGCombiner::mergeTruncStores(StoreSDNode *N) { // Check that a store of the wide type is both allowed and fast on the target const DataLayout &Layout = DAG.getDataLayout(); - bool Fast = false; + unsigned Fast = 0; bool Allowed = TLI.allowsMemoryAccess(Context, Layout, WideVT, *FirstStore->getMemOperand(), &Fast); if (!Allowed || !Fast) @@ -8120,7 +8399,8 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian(); auto MemoryByteOffset = [&] (ByteProvider P) { assert(P.isMemory() && "Must be a memory byte provider"); - unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits(); + unsigned LoadBitWidth = P.Load->getMemoryVT().getScalarSizeInBits(); + assert(LoadBitWidth % 8 == 0 && "can only analyze providers for individual bytes not bit"); unsigned LoadByteWidth = LoadBitWidth / 8; @@ -8129,11 +8409,11 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { : littleEndianByteAt(LoadByteWidth, P.ByteOffset); }; - Optional<BaseIndexOffset> Base; + std::optional<BaseIndexOffset> Base; SDValue Chain; SmallPtrSet<LoadSDNode *, 8> Loads; - Optional<ByteProvider> FirstByteProvider; + std::optional<ByteProvider> FirstByteProvider; int64_t FirstOffset = INT64_MAX; // Check if all the bytes of the OR we are looking at are loaded from the same @@ -8141,7 +8421,9 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { SmallVector<int64_t, 8> ByteOffsets(ByteWidth); unsigned ZeroExtendedBytes = 0; for (int i = ByteWidth - 1; i >= 0; --i) { - auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true); + auto P = + calculateByteProvider(SDValue(N, 0), i, 0, /*VectorIndex*/ std::nullopt, + /*StartingIndex*/ i); if (!P) return SDValue(); @@ -8155,10 +8437,6 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { assert(P->isMemory() && "provenance should either be memory or zero"); LoadSDNode *L = P->Load; - assert(L->hasNUsesOfValue(1, 0) && L->isSimple() && - !L->isIndexed() && - "Must be enforced by calculateByteProvider"); - assert(L->getOffset().isUndef() && "Unindexed load must have undef offset"); // All loads must share the same chain SDValue LChain = L->getChain(); @@ -8170,8 +8448,25 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { // Loads must share the same base address BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG); int64_t ByteOffsetFromBase = 0; + + // For vector loads, the expected load combine pattern will have an + // ExtractElement for each index in the vector. While each of these + // ExtractElements will be accessing the same base address as determined + // by the load instruction, the actual bytes they interact with will differ + // due to different ExtractElement indices. To accurately determine the + // byte position of an ExtractElement, we offset the base load ptr with + // the index multiplied by the byte size of each element in the vector. + if (L->getMemoryVT().isVector()) { + unsigned LoadWidthInBit = L->getMemoryVT().getScalarSizeInBits(); + if (LoadWidthInBit % 8 != 0) + return SDValue(); + unsigned ByteOffsetFromVector = P->VectorOffset * LoadWidthInBit / 8; + Ptr.addToOffset(ByteOffsetFromVector); + } + if (!Base) Base = Ptr; + else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase)) return SDValue(); @@ -8187,6 +8482,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { Loads.insert(L); } + assert(!Loads.empty() && "All the bytes of the value must be loaded from " "memory, so there must be at least one load which produces the value"); assert(Base && "Base address of the accessed memory location must be set"); @@ -8210,8 +8506,8 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { // Check if the bytes of the OR we are looking at match with either big or // little endian value load - Optional<bool> IsBigEndian = isBigEndian( - makeArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset); + std::optional<bool> IsBigEndian = isBigEndian( + ArrayRef(ByteOffsets).drop_back(ZeroExtendedBytes), FirstOffset); if (!IsBigEndian) return SDValue(); @@ -8246,7 +8542,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) { return SDValue(); // Check that a load of the wide type is both allowed and fast on the target - bool Fast = false; + unsigned Fast = 0; bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT, *FirstLoad->getMemOperand(), &Fast); @@ -8419,6 +8715,11 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags())) return RXOR; + // fold (a^b) -> (a|b) iff a and b share no bits. + if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) && + DAG.haveNoCommonBitsSet(N0, N1)) + return DAG.getNode(ISD::OR, DL, VT, N0, N1); + // look for 'add-like' folds: // XOR(N0,MIN_SIGNED_VALUE) == ADD(N0,MIN_SIGNED_VALUE) if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) && @@ -8510,8 +8811,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { // fold (not (add X, -1)) -> (neg X) if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::ADD && isAllOnesOrAllOnesSplat(N0.getOperand(1))) { - return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), - N0.getOperand(0)); + return DAG.getNegative(N0.getOperand(0), DL, VT); } // fold (xor (and x, y), y) -> (and (not x), y) @@ -8573,6 +8873,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { return R; if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG)) return R; + if (SDValue R = foldLogicTreeOfShifts(N, N0, N1, DAG)) + return R; // Unfold ((x ^ y) & m) ^ y into (x & m) | (y & ~m) if profitable if (SDValue MM = unfoldMaskedMerge(N)) @@ -8672,13 +8974,9 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N) { if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level)) return SDValue(); - // TODO: This is limited to early combining because it may reveal regressions - // otherwise. But since we just checked a target hook to see if this is - // desirable, that should have filtered out cases where this interferes - // with some other pattern matching. - if (!LegalTypes) - if (SDValue R = combineShiftOfShiftedLogic(N, DAG)) - return R; + // Fold shift(bitop(shift(x,c1),y), c2) -> bitop(shift(x,c1+c2),shift(y,c2)). + if (SDValue R = combineShiftOfShiftedLogic(N, DAG)) + return R; // We want to pull some binops through shifts, so that we have (and (shift)) // instead of (shift (and)), likewise for add, or, xor, etc. This sort of @@ -8697,11 +8995,6 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N) { break; } - // We require the RHS of the binop to be a constant and not opaque as well. - ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS.getOperand(1)); - if (!BinOpCst) - return SDValue(); - // FIXME: disable this unless the input to the binop is a shift by a constant // or is copy/select. Enable this in other cases when figure out it's exactly // profitable. @@ -8719,16 +9012,17 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N) { if (IsCopyOrSelect && N->hasOneUse()) return SDValue(); - // Fold the constants, shifting the binop RHS by the shift amount. + // Attempt to fold the constants, shifting the binop RHS by the shift amount. SDLoc DL(N); EVT VT = N->getValueType(0); - SDValue NewRHS = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(1), - N->getOperand(1)); - assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!"); + if (SDValue NewRHS = DAG.FoldConstantArithmetic( + N->getOpcode(), DL, VT, {LHS.getOperand(1), N->getOperand(1)})) { + SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0), + N->getOperand(1)); + return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS); + } - SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0), - N->getOperand(1)); - return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS); + return SDValue(); } SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) { @@ -8806,7 +9100,7 @@ SDValue DAGCombiner::visitRotate(SDNode *N) { unsigned NextOp = N0.getOpcode(); // fold (rot* (rot* x, c2), c1) - // -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize)) % bitsize) + // -> (rot* x, ((c1 % bitsize) +- (c2 % bitsize) + bitsize) % bitsize) if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) { SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1); SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)); @@ -8822,6 +9116,8 @@ SDValue DAGCombiner::visitRotate(SDNode *N) { if (Norm1 && Norm2) if (SDValue CombinedShift = DAG.FoldConstantArithmetic( CombineOp, dl, ShiftVT, {Norm1, Norm2})) { + CombinedShift = DAG.FoldConstantArithmetic(ISD::ADD, dl, ShiftVT, + {CombinedShift, BitsizeC}); SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic( ISD::UREM, dl, ShiftVT, {CombinedShift, BitsizeC}); return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0), @@ -9086,23 +9382,22 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { return NewSHL; // Fold (shl (vscale * C0), C1) to (vscale * (C0 << C1)). - if (N0.getOpcode() == ISD::VSCALE) - if (ConstantSDNode *NC1 = isConstOrConstSplat(N->getOperand(1))) { - const APInt &C0 = N0.getConstantOperandAPInt(0); - const APInt &C1 = NC1->getAPIntValue(); - return DAG.getVScale(SDLoc(N), VT, C0 << C1); - } + if (N0.getOpcode() == ISD::VSCALE && N1C) { + const APInt &C0 = N0.getConstantOperandAPInt(0); + const APInt &C1 = N1C->getAPIntValue(); + return DAG.getVScale(SDLoc(N), VT, C0 << C1); + } // Fold (shl step_vector(C0), C1) to (step_vector(C0 << C1)). APInt ShlVal; - if (N0.getOpcode() == ISD::STEP_VECTOR) - if (ISD::isConstantSplatVector(N1.getNode(), ShlVal)) { - const APInt &C0 = N0.getConstantOperandAPInt(0); - if (ShlVal.ult(C0.getBitWidth())) { - APInt NewStep = C0 << ShlVal; - return DAG.getStepVector(SDLoc(N), VT, NewStep); - } + if (N0.getOpcode() == ISD::STEP_VECTOR && + ISD::isConstantSplatVector(N1.getNode(), ShlVal)) { + const APInt &C0 = N0.getConstantOperandAPInt(0); + if (ShlVal.ult(C0.getBitWidth())) { + APInt NewStep = C0 << ShlVal; + return DAG.getStepVector(SDLoc(N), VT, NewStep); } + } return SDValue(); } @@ -9142,6 +9437,28 @@ static SDValue combineShiftToMULH(SDNode *N, SelectionDAG &DAG, EVT NarrowVT = LeftOp.getOperand(0).getValueType(); unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits(); + // return true if U may use the lower bits of its operands + auto UserOfLowerBits = [NarrowVTSize](SDNode *U) { + if (U->getOpcode() != ISD::SRL && U->getOpcode() != ISD::SRA) { + return true; + } + ConstantSDNode *UShiftAmtSrc = isConstOrConstSplat(U->getOperand(1)); + if (!UShiftAmtSrc) { + return true; + } + unsigned UShiftAmt = UShiftAmtSrc->getZExtValue(); + return UShiftAmt < NarrowVTSize; + }; + + // If the lower part of the MUL is also used and MUL_LOHI is supported + // do not introduce the MULH in favor of MUL_LOHI + unsigned MulLoHiOp = IsSignExt ? ISD::SMUL_LOHI : ISD::UMUL_LOHI; + if (!ShiftOperand.hasOneUse() && + TLI.isOperationLegalOrCustom(MulLoHiOp, NarrowVT) && + llvm::any_of(ShiftOperand->uses(), UserOfLowerBits)) { + return SDValue(); + } + SDValue MulhRightOp; if (ConstantSDNode *Constant = isConstOrConstSplat(RightOp)) { unsigned ActiveBits = IsSignExt @@ -9649,16 +9966,23 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // However when after the source operand of SRL is optimized into AND, the SRL // itself may not be optimized further. Look for it and add the BRCOND into // the worklist. + // + // The also tends to happen for binary operations when SimplifyDemandedBits + // is involved. + // + // FIXME: This is unecessary if we process the DAG in topological order, + // which we plan to do. This workaround can be removed once the DAG is + // processed in topological order. if (N->hasOneUse()) { SDNode *Use = *N->use_begin(); - if (Use->getOpcode() == ISD::BRCOND) - AddToWorklist(Use); - else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) { - // Also look pass the truncate. + + // Look pass the truncate. + if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) Use = *Use->use_begin(); - if (Use->getOpcode() == ISD::BRCOND) - AddToWorklist(Use); - } + + if (Use->getOpcode() == ISD::BRCOND || Use->getOpcode() == ISD::AND || + Use->getOpcode() == ISD::OR || Use->getOpcode() == ISD::XOR) + AddToWorklist(Use); } // Try to transform this shift into a multiply-high if @@ -9734,7 +10058,7 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) { uint64_t PtrOff = IsFSHL ? (((BitWidth - ShAmt) % BitWidth) / 8) : (ShAmt / 8); Align NewAlign = commonAlignment(RHS->getAlign(), PtrOff); - bool Fast = false; + unsigned Fast = 0; if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, RHS->getAddressSpace(), NewAlign, RHS->getMemOperand()->getFlags(), &Fast) && @@ -9817,8 +10141,8 @@ SDValue DAGCombiner::visitSHLSAT(SDNode *N) { // Given a ABS node, detect the following pattern: // (ABS (SUB (EXTEND a), (EXTEND b))). // Generates UABD/SABD instruction. -static SDValue combineABSToABD(SDNode *N, SelectionDAG &DAG, - const TargetLowering &TLI) { +SDValue DAGCombiner::foldABSToABD(SDNode *N) { + EVT VT = N->getValueType(0); SDValue AbsOp1 = N->getOperand(0); SDValue Op0, Op1; @@ -9831,10 +10155,14 @@ static SDValue combineABSToABD(SDNode *N, SelectionDAG &DAG, unsigned Opc0 = Op0.getOpcode(); // Check if the operands of the sub are (zero|sign)-extended. if (Opc0 != Op1.getOpcode() || - (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND)) + (Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND)) { + // fold (abs (sub nsw x, y)) -> abds(x, y) + if (AbsOp1->getFlags().hasNoSignedWrap() && + TLI.isOperationLegalOrCustom(ISD::ABDS, VT)) + return DAG.getNode(ISD::ABDS, SDLoc(N), VT, Op0, Op1); return SDValue(); + } - EVT VT = N->getValueType(0); EVT VT1 = Op0.getOperand(0).getValueType(); EVT VT2 = Op1.getOperand(0).getValueType(); unsigned ABDOpcode = (Opc0 == ISD::SIGN_EXTEND) ? ISD::ABDS : ISD::ABDU; @@ -9871,9 +10199,24 @@ SDValue DAGCombiner::visitABS(SDNode *N) { if (DAG.SignBitIsZero(N0)) return N0; - if (SDValue ABD = combineABSToABD(N, DAG, TLI)) + if (SDValue ABD = foldABSToABD(N)) return ABD; + // fold (abs (sign_extend_inreg x)) -> (zero_extend (abs (truncate x))) + // iff zero_extend/truncate are free. + if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) { + EVT ExtVT = cast<VTSDNode>(N0.getOperand(1))->getVT(); + if (TLI.isTruncateFree(VT, ExtVT) && TLI.isZExtFree(ExtVT, VT) && + TLI.isTypeDesirableForOp(ISD::ABS, ExtVT) && + hasOperation(ISD::ABS, ExtVT)) { + SDLoc DL(N); + return DAG.getNode( + ISD::ZERO_EXTEND, DL, VT, + DAG.getNode(ISD::ABS, DL, ExtVT, + DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N0.getOperand(0)))); + } + } + return SDValue(); } @@ -10027,14 +10370,11 @@ static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS); } -/// Generate Min/Max node -static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, - SDValue RHS, SDValue True, SDValue False, - ISD::CondCode CC, const TargetLowering &TLI, - SelectionDAG &DAG) { - if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True)) - return SDValue(); - +static SDValue combineMinNumMaxNumImpl(const SDLoc &DL, EVT VT, SDValue LHS, + SDValue RHS, SDValue True, SDValue False, + ISD::CondCode CC, + const TargetLowering &TLI, + SelectionDAG &DAG) { EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); switch (CC) { case ISD::SETOLT: @@ -10075,6 +10415,46 @@ static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, } } +/// Generate Min/Max node +SDValue DAGCombiner::combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS, + SDValue RHS, SDValue True, + SDValue False, ISD::CondCode CC) { + if ((LHS == True && RHS == False) || (LHS == False && RHS == True)) + return combineMinNumMaxNumImpl(DL, VT, LHS, RHS, True, False, CC, TLI, DAG); + + // If we can't directly match this, try to see if we can pull an fneg out of + // the select. + SDValue NegTrue = TLI.getCheaperOrNeutralNegatedExpression( + True, DAG, LegalOperations, ForCodeSize); + if (!NegTrue) + return SDValue(); + + HandleSDNode NegTrueHandle(NegTrue); + + // Try to unfold an fneg from the select if we are comparing the negated + // constant. + // + // select (setcc x, K) (fneg x), -K -> fneg(minnum(x, K)) + // + // TODO: Handle fabs + if (LHS == NegTrue) { + // If we can't directly match this, try to see if we can pull an fneg out of + // the select. + SDValue NegRHS = TLI.getCheaperOrNeutralNegatedExpression( + RHS, DAG, LegalOperations, ForCodeSize); + if (NegRHS) { + HandleSDNode NegRHSHandle(NegRHS); + if (NegRHS == False) { + SDValue Combined = combineMinNumMaxNumImpl(DL, VT, LHS, RHS, NegTrue, + False, CC, TLI, DAG); + return DAG.getNode(ISD::FNEG, DL, VT, Combined); + } + } + } + + return SDValue(); +} + /// If a (v)select has a condition value that is a sign-bit test, try to smear /// the condition operand sign-bit across the value width and use it as a mask. static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) { @@ -10112,6 +10492,25 @@ static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) { return SDValue(); } +static bool shouldConvertSelectOfConstantsToMath(const SDValue &Cond, EVT VT, + const TargetLowering &TLI) { + if (!TLI.convertSelectOfConstantsToMath(VT)) + return false; + + if (Cond.getOpcode() != ISD::SETCC || !Cond->hasOneUse()) + return true; + if (!TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) + return true; + + ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); + if (CC == ISD::SETLT && isNullOrNullSplat(Cond.getOperand(1))) + return true; + if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(Cond.getOperand(1))) + return true; + + return false; +} + SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { SDValue Cond = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -10128,96 +10527,106 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) { if (!C1 || !C2) return SDValue(); + if (CondVT != MVT::i1 || LegalOperations) { + // fold (select Cond, 0, 1) -> (xor Cond, 1) + // We can't do this reliably if integer based booleans have different contents + // to floating point based booleans. This is because we can't tell whether we + // have an integer-based boolean or a floating-point-based boolean unless we + // can find the SETCC that produced it and inspect its operands. This is + // fairly easy if C is the SETCC node, but it can potentially be + // undiscoverable (or not reasonably discoverable). For example, it could be + // in another basic block or it could require searching a complicated + // expression. + if (CondVT.isInteger() && + TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) == + TargetLowering::ZeroOrOneBooleanContent && + TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) == + TargetLowering::ZeroOrOneBooleanContent && + C1->isZero() && C2->isOne()) { + SDValue NotCond = + DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT)); + if (VT.bitsEq(CondVT)) + return NotCond; + return DAG.getZExtOrTrunc(NotCond, DL, VT); + } + + return SDValue(); + } + // Only do this before legalization to avoid conflicting with target-specific // transforms in the other direction (create a select from a zext/sext). There // is also a target-independent combine here in DAGCombiner in the other // direction for (select Cond, -1, 0) when the condition is not i1. - if (CondVT == MVT::i1 && !LegalOperations) { - if (C1->isZero() && C2->isOne()) { - // select Cond, 0, 1 --> zext (!Cond) - SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1); - if (VT != MVT::i1) - NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond); - return NotCond; - } - if (C1->isZero() && C2->isAllOnes()) { - // select Cond, 0, -1 --> sext (!Cond) - SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1); - if (VT != MVT::i1) - NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond); - return NotCond; - } - if (C1->isOne() && C2->isZero()) { - // select Cond, 1, 0 --> zext (Cond) - if (VT != MVT::i1) - Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); - return Cond; - } - if (C1->isAllOnes() && C2->isZero()) { - // select Cond, -1, 0 --> sext (Cond) - if (VT != MVT::i1) - Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond); - return Cond; - } - - // Use a target hook because some targets may prefer to transform in the - // other direction. - if (TLI.convertSelectOfConstantsToMath(VT)) { - // For any constants that differ by 1, we can transform the select into an - // extend and add. - const APInt &C1Val = C1->getAPIntValue(); - const APInt &C2Val = C2->getAPIntValue(); - if (C1Val - 1 == C2Val) { - // select Cond, C1, C1-1 --> add (zext Cond), C1-1 - if (VT != MVT::i1) - Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); - return DAG.getNode(ISD::ADD, DL, VT, Cond, N2); - } - if (C1Val + 1 == C2Val) { - // select Cond, C1, C1+1 --> add (sext Cond), C1+1 - if (VT != MVT::i1) - Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond); - return DAG.getNode(ISD::ADD, DL, VT, Cond, N2); - } + assert(CondVT == MVT::i1 && !LegalOperations); - // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2) - if (C1Val.isPowerOf2() && C2Val.isZero()) { - if (VT != MVT::i1) - Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); - SDValue ShAmtC = - DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL); - return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC); - } + // select Cond, 1, 0 --> zext (Cond) + if (C1->isOne() && C2->isZero()) + return DAG.getZExtOrTrunc(Cond, DL, VT); - if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG)) - return V; - } + // select Cond, -1, 0 --> sext (Cond) + if (C1->isAllOnes() && C2->isZero()) + return DAG.getSExtOrTrunc(Cond, DL, VT); + + // select Cond, 0, 1 --> zext (!Cond) + if (C1->isZero() && C2->isOne()) { + SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1); + NotCond = DAG.getZExtOrTrunc(NotCond, DL, VT); + return NotCond; + } + // select Cond, 0, -1 --> sext (!Cond) + if (C1->isZero() && C2->isAllOnes()) { + SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1); + NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT); + return NotCond; + } + + // Use a target hook because some targets may prefer to transform in the + // other direction. + if (!shouldConvertSelectOfConstantsToMath(Cond, VT, TLI)) return SDValue(); + + // For any constants that differ by 1, we can transform the select into + // an extend and add. + const APInt &C1Val = C1->getAPIntValue(); + const APInt &C2Val = C2->getAPIntValue(); + + // select Cond, C1, C1-1 --> add (zext Cond), C1-1 + if (C1Val - 1 == C2Val) { + Cond = DAG.getZExtOrTrunc(Cond, DL, VT); + return DAG.getNode(ISD::ADD, DL, VT, Cond, N2); } - // fold (select Cond, 0, 1) -> (xor Cond, 1) - // We can't do this reliably if integer based booleans have different contents - // to floating point based booleans. This is because we can't tell whether we - // have an integer-based boolean or a floating-point-based boolean unless we - // can find the SETCC that produced it and inspect its operands. This is - // fairly easy if C is the SETCC node, but it can potentially be - // undiscoverable (or not reasonably discoverable). For example, it could be - // in another basic block or it could require searching a complicated - // expression. - if (CondVT.isInteger() && - TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) == - TargetLowering::ZeroOrOneBooleanContent && - TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) == - TargetLowering::ZeroOrOneBooleanContent && - C1->isZero() && C2->isOne()) { - SDValue NotCond = - DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT)); - if (VT.bitsEq(CondVT)) - return NotCond; - return DAG.getZExtOrTrunc(NotCond, DL, VT); + // select Cond, C1, C1+1 --> add (sext Cond), C1+1 + if (C1Val + 1 == C2Val) { + Cond = DAG.getSExtOrTrunc(Cond, DL, VT); + return DAG.getNode(ISD::ADD, DL, VT, Cond, N2); + } + + // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2) + if (C1Val.isPowerOf2() && C2Val.isZero()) { + Cond = DAG.getZExtOrTrunc(Cond, DL, VT); + SDValue ShAmtC = + DAG.getShiftAmountConstant(C1Val.exactLogBase2(), VT, DL); + return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC); + } + + // select Cond, -1, C --> or (sext Cond), C + if (C1->isAllOnes()) { + Cond = DAG.getSExtOrTrunc(Cond, DL, VT); + return DAG.getNode(ISD::OR, DL, VT, Cond, N2); + } + + // select Cond, C, -1 --> or (sext (not Cond)), C + if (C2->isAllOnes()) { + SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1); + NotCond = DAG.getSExtOrTrunc(NotCond, DL, VT); + return DAG.getNode(ISD::OR, DL, VT, NotCond, N1); } + if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG)) + return V; + return SDValue(); } @@ -10325,10 +10734,17 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { if (SDValue V = DAG.simplifySelect(N0, N1, N2)) return V; - if (SDValue V = foldSelectOfConstants(N)) + if (SDValue V = foldBoolSelectToLogic(N, DAG)) return V; - if (SDValue V = foldBoolSelectToLogic(N, DAG)) + // select (not Cond), N1, N2 -> select Cond, N2, N1 + if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) { + SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1); + SelectOp->setFlags(Flags); + return SelectOp; + } + + if (SDValue V = foldSelectOfConstants(N)) return V; // If we can fold this based on the true/false value, do so. @@ -10413,13 +10829,6 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { } } - // select (not Cond), N1, N2 -> select Cond, N2, N1 - if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) { - SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1); - SelectOp->setFlags(Flags); - return SelectOp; - } - // Fold selects based on a setcc into other things, such as min/max/abs. if (N0.getOpcode() == ISD::SETCC) { SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1); @@ -10430,8 +10839,8 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // // This is OK if we don't care what happens if either operand is a NaN. if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI)) - if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, - CC, TLI, DAG)) + if (SDValue FMinMax = + combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2, CC)) return FMinMax; // Use 'unsigned add with overflow' to optimize an unsigned saturating add. @@ -10542,23 +10951,37 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { } bool refineUniformBase(SDValue &BasePtr, SDValue &Index, bool IndexIsScaled, - SelectionDAG &DAG) { - if (!isNullConstant(BasePtr) || Index.getOpcode() != ISD::ADD) + SelectionDAG &DAG, const SDLoc &DL) { + if (Index.getOpcode() != ISD::ADD) return false; // Only perform the transformation when existing operands can be reused. if (IndexIsScaled) return false; - // For now we check only the LHS of the add. - SDValue LHS = Index.getOperand(0); - SDValue SplatVal = DAG.getSplatValue(LHS); - if (!SplatVal || SplatVal.getValueType() != BasePtr.getValueType()) + if (!isNullConstant(BasePtr) && !Index.hasOneUse()) return false; - BasePtr = SplatVal; - Index = Index.getOperand(1); - return true; + EVT VT = BasePtr.getValueType(); + if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(0)); + SplatVal && SplatVal.getValueType() == VT) { + if (isNullConstant(BasePtr)) + BasePtr = SplatVal; + else + BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal); + Index = Index.getOperand(1); + return true; + } + if (SDValue SplatVal = DAG.getSplatValue(Index.getOperand(1)); + SplatVal && SplatVal.getValueType() == VT) { + if (isNullConstant(BasePtr)) + BasePtr = SplatVal; + else + BasePtr = DAG.getNode(ISD::ADD, DL, VT, BasePtr, SplatVal); + Index = Index.getOperand(0); + return true; + } + return false; } // Fold sext/zext of index into index type. @@ -10593,6 +11016,37 @@ bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT, return false; } +SDValue DAGCombiner::visitVPSCATTER(SDNode *N) { + VPScatterSDNode *MSC = cast<VPScatterSDNode>(N); + SDValue Mask = MSC->getMask(); + SDValue Chain = MSC->getChain(); + SDValue Index = MSC->getIndex(); + SDValue Scale = MSC->getScale(); + SDValue StoreVal = MSC->getValue(); + SDValue BasePtr = MSC->getBasePtr(); + SDValue VL = MSC->getVectorLength(); + ISD::MemIndexType IndexType = MSC->getIndexType(); + SDLoc DL(N); + + // Zap scatters with a zero mask. + if (ISD::isConstantSplatVectorAllZeros(Mask.getNode())) + return Chain; + + if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) { + SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL}; + return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(), + DL, Ops, MSC->getMemOperand(), IndexType); + } + + if (refineIndexType(Index, IndexType, StoreVal.getValueType(), DAG)) { + SDValue Ops[] = {Chain, StoreVal, BasePtr, Index, Scale, Mask, VL}; + return DAG.getScatterVP(DAG.getVTList(MVT::Other), MSC->getMemoryVT(), + DL, Ops, MSC->getMemOperand(), IndexType); + } + + return SDValue(); +} + SDValue DAGCombiner::visitMSCATTER(SDNode *N) { MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N); SDValue Mask = MSC->getMask(); @@ -10608,7 +11062,7 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) { if (ISD::isConstantSplatVectorAllZeros(Mask.getNode())) return Chain; - if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG)) { + if (refineUniformBase(BasePtr, Index, MSC->isIndexScaled(), DAG, DL)) { SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, Scale}; return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), MSC->getMemoryVT(), DL, Ops, MSC->getMemOperand(), IndexType, @@ -10674,8 +11128,9 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { // If this is a TRUNC followed by a masked store, fold this into a masked // truncating store. We can do this even if this is already a masked // truncstore. + // TODO: Try combine to masked compress store if possiable. if ((Value.getOpcode() == ISD::TRUNCATE) && Value->hasOneUse() && - MST->isUnindexed() && + MST->isUnindexed() && !MST->isCompressingStore() && TLI.canCombineTruncStore(Value.getOperand(0).getValueType(), MST->getMemoryVT(), LegalOperations)) { auto Mask = TLI.promoteTargetBoolean(DAG, MST->getMask(), @@ -10689,6 +11144,34 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitVPGATHER(SDNode *N) { + VPGatherSDNode *MGT = cast<VPGatherSDNode>(N); + SDValue Mask = MGT->getMask(); + SDValue Chain = MGT->getChain(); + SDValue Index = MGT->getIndex(); + SDValue Scale = MGT->getScale(); + SDValue BasePtr = MGT->getBasePtr(); + SDValue VL = MGT->getVectorLength(); + ISD::MemIndexType IndexType = MGT->getIndexType(); + SDLoc DL(N); + + if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) { + SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL}; + return DAG.getGatherVP( + DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL, + Ops, MGT->getMemOperand(), IndexType); + } + + if (refineIndexType(Index, IndexType, N->getValueType(0), DAG)) { + SDValue Ops[] = {Chain, BasePtr, Index, Scale, Mask, VL}; + return DAG.getGatherVP( + DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL, + Ops, MGT->getMemOperand(), IndexType); + } + + return SDValue(); +} + SDValue DAGCombiner::visitMGATHER(SDNode *N) { MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N); SDValue Mask = MGT->getMask(); @@ -10704,7 +11187,7 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) { if (ISD::isConstantSplatVectorAllZeros(Mask.getNode())) return CombineTo(N, PassThru, MGT->getChain()); - if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG)) { + if (refineUniformBase(BasePtr, Index, MGT->isIndexScaled(), DAG, DL)) { SDValue Ops[] = {Chain, PassThru, Mask, BasePtr, Index, Scale}; return DAG.getMaskedGather( DAG.getVTList(N->getValueType(0), MVT::Other), MGT->getMemoryVT(), DL, @@ -10756,7 +11239,7 @@ SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) { SDValue N2 = N->getOperand(2); EVT VT = N->getValueType(0); if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 || - !TLI.convertSelectOfConstantsToMath(VT) || + !shouldConvertSelectOfConstantsToMath(Cond, VT, TLI) || !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) || !ISD::isBuildVectorOfConstantSDNodes(N2.getNode())) return SDValue(); @@ -10869,8 +11352,7 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { // NaN. // if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) { - if (SDValue FMinMax = - combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG)) + if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC)) return FMinMax; } @@ -11011,8 +11493,7 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { if (SatCC == ISD::SETUGT && Other.getOpcode() == ISD::ADD && ISD::matchBinaryPredicate(OpRHS, CondRHS, MatchUSUBSAT, /*AllowUndefs*/ true)) { - OpRHS = DAG.getNode(ISD::SUB, DL, VT, - DAG.getConstant(0, DL, VT), OpRHS); + OpRHS = DAG.getNegative(OpRHS, DL, VT); return DAG.getNode(ISD::USUBSAT, DL, VT, OpLHS, OpRHS); } @@ -11083,6 +11564,11 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { if (N2 == N3) return N2; + // select_cc bool, 0, x, y, seteq -> select bool, y, x + if (CC == ISD::SETEQ && !LegalTypes && N0.getValueType() == MVT::i1 && + isNullConstant(N1)) + return DAG.getSelect(SDLoc(N), N2.getValueType(), N0, N3, N2); + // Determine if the condition we're dealing with is constant if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1, CC, SDLoc(N), false)) { @@ -11297,9 +11783,11 @@ static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, SDLoc DL(N); assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || - Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || - Opcode == ISD::ZERO_EXTEND_VECTOR_INREG) - && "Expected EXTEND dag node in input!"); + Opcode == ISD::ANY_EXTEND || + Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || + Opcode == ISD::ZERO_EXTEND_VECTOR_INREG || + Opcode == ISD::ANY_EXTEND_VECTOR_INREG) && + "Expected EXTEND dag node in input!"); // fold (sext c1) -> c1 // fold (zext c1) -> c1 @@ -11347,15 +11835,13 @@ static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, SmallVector<SDValue, 8> Elts; unsigned NumElts = VT.getVectorNumElements(); - // For zero-extensions, UNDEF elements still guarantee to have the upper - // bits set to zero. - bool IsZext = - Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG; - for (unsigned i = 0; i != NumElts; ++i) { SDValue Op = N0.getOperand(i); if (Op.isUndef()) { - Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT)); + if (Opcode == ISD::ANY_EXTEND || Opcode == ISD::ANY_EXTEND_VECTOR_INREG) + Elts.push_back(DAG.getUNDEF(SVT)); + else + Elts.push_back(DAG.getConstant(0, DL, SVT)); continue; } @@ -11926,7 +12412,7 @@ SDValue DAGCombiner::foldSextSetcc(SDNode *N) { if (SDValue SCC = SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true)) return SCC; - if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) { + if (!VT.isVector() && !shouldConvertSelectOfConstantsToMath(N0, VT, TLI)) { EVT SetCCVT = getSetCCResultType(N00VT); // Don't do this transform for i1 because there's a select transform // that would reverse it. @@ -11947,6 +12433,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { EVT VT = N->getValueType(0); SDLoc DL(N); + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVCastOp(N, DL)) + return FoldedVOp; + // sext(undef) = 0 because the top bit will all be the same. if (N0.isUndef()) return DAG.getConstant(0, DL, VT); @@ -11959,6 +12449,16 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0)); + // fold (sext (sext_inreg x)) -> (sext (trunc x)) + if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) { + SDValue N00 = N0.getOperand(0); + EVT ExtVT = cast<VTSDNode>(N0->getOperand(1))->getVT(); + if (N00.getOpcode() == ISD::TRUNCATE && (!LegalOperations || TLI.isTypeLegal(ExtVT))) { + SDValue T = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N00.getOperand(0)); + return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, T); + } + } + if (N0.getOpcode() == ISD::TRUNCATE) { // fold (sext (truncate (load x))) -> (sext (smaller load x)) // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n))) @@ -12095,7 +12595,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND && TLI.isOperationLegalOrCustom(ISD::SUB, VT)) { SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT); - return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext); + return DAG.getNegative(Zext, DL, VT); } // Eliminate this sign extend by doing a decrement in the destination type: // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1) @@ -12192,10 +12692,41 @@ static SDValue widenCtPop(SDNode *Extend, SelectionDAG &DAG) { return DAG.getNode(ISD::CTPOP, DL, VT, NewZext); } +// If we have (zext (abs X)) where X is a type that will be promoted by type +// legalization, convert to (abs (sext X)). But don't extend past a legal type. +static SDValue widenAbs(SDNode *Extend, SelectionDAG &DAG) { + assert(Extend->getOpcode() == ISD::ZERO_EXTEND && "Expected zero extend."); + + EVT VT = Extend->getValueType(0); + if (VT.isVector()) + return SDValue(); + + SDValue Abs = Extend->getOperand(0); + if (Abs.getOpcode() != ISD::ABS || !Abs.hasOneUse()) + return SDValue(); + + EVT AbsVT = Abs.getValueType(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (TLI.getTypeAction(*DAG.getContext(), AbsVT) != + TargetLowering::TypePromoteInteger) + return SDValue(); + + EVT LegalVT = TLI.getTypeToTransformTo(*DAG.getContext(), AbsVT); + + SDValue SExt = + DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Abs), LegalVT, Abs.getOperand(0)); + SDValue NewAbs = DAG.getNode(ISD::ABS, SDLoc(Abs), LegalVT, SExt); + return DAG.getZExtOrTrunc(NewAbs, SDLoc(Extend), VT); +} + SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVCastOp(N, SDLoc(N))) + return FoldedVOp; + // zext(undef) = 0 if (N0.isUndef()) return DAG.getConstant(0, SDLoc(N), VT); @@ -12452,6 +12983,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { if (SDValue NewCtPop = widenCtPop(N, DAG)) return NewCtPop; + if (SDValue V = widenAbs(N, DAG)) + return V; + if (SDValue Res = tryToFoldExtendSelectLoad(N, TLI, DAG)) return Res; @@ -12878,8 +13412,8 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) { auto AdjustBigEndianShift = [&](unsigned ShAmt) { unsigned LVTStoreBits = - LN0->getMemoryVT().getStoreSizeInBits().getFixedSize(); - unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedSize(); + LN0->getMemoryVT().getStoreSizeInBits().getFixedValue(); + unsigned EVTStoreBits = ExtVT.getStoreSizeInBits().getFixedValue(); return LVTStoreBits - EVTStoreBits - ShAmt; }; @@ -13120,16 +13654,75 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1); } + // Fold (iM_signext_inreg + // (extract_subvector (zext|anyext|sext iN_v to _) _) + // from iN) + // -> (extract_subvector (signext iN_v to iM)) + if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && N0.hasOneUse() && + ISD::isExtOpcode(N0.getOperand(0).getOpcode())) { + SDValue InnerExt = N0.getOperand(0); + EVT InnerExtVT = InnerExt->getValueType(0); + SDValue Extendee = InnerExt->getOperand(0); + + if (ExtVTBits == Extendee.getValueType().getScalarSizeInBits() && + (!LegalOperations || + TLI.isOperationLegal(ISD::SIGN_EXTEND, InnerExtVT))) { + SDValue SignExtExtendee = + DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), InnerExtVT, Extendee); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, SignExtExtendee, + N0.getOperand(1)); + } + } + return SDValue(); } +static SDValue +foldExtendVectorInregToExtendOfSubvector(SDNode *N, const TargetLowering &TLI, + SelectionDAG &DAG, + bool LegalOperations) { + unsigned InregOpcode = N->getOpcode(); + unsigned Opcode = DAG.getOpcode_EXTEND(InregOpcode); + + SDValue Src = N->getOperand(0); + EVT VT = N->getValueType(0); + EVT SrcVT = EVT::getVectorVT(*DAG.getContext(), + Src.getValueType().getVectorElementType(), + VT.getVectorElementCount()); + + assert((InregOpcode == ISD::SIGN_EXTEND_VECTOR_INREG || + InregOpcode == ISD::ZERO_EXTEND_VECTOR_INREG || + InregOpcode == ISD::ANY_EXTEND_VECTOR_INREG) && + "Expected EXTEND_VECTOR_INREG dag node in input!"); + + // Profitability check: our operand must be an one-use CONCAT_VECTORS. + // FIXME: one-use check may be overly restrictive + if (!Src.hasOneUse() || Src.getOpcode() != ISD::CONCAT_VECTORS) + return SDValue(); + + // Profitability check: we must be extending exactly one of it's operands. + // FIXME: this is probably overly restrictive. + Src = Src.getOperand(0); + if (Src.getValueType() != SrcVT) + return SDValue(); + + if (LegalOperations && !TLI.isOperationLegal(Opcode, VT)) + return SDValue(); + + return DAG.getNode(Opcode, SDLoc(N), VT, Src); +} + SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); - // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same. - if (N0.isUndef()) - return DAG.getConstant(0, SDLoc(N), VT); + if (N0.isUndef()) { + // aext_vector_inreg(undef) = undef because the top bits are undefined. + // {s/z}ext_vector_inreg(undef) = 0 because the top bits must be the same. + return N->getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG + ? DAG.getUNDEF(VT) + : DAG.getConstant(0, SDLoc(N), VT); + } if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes)) return Res; @@ -13137,6 +13730,10 @@ SDValue DAGCombiner::visitEXTEND_VECTOR_INREG(SDNode *N) { if (SimplifyDemandedVectorElts(SDValue(N, 0))) return SDValue(N, 0); + if (SDValue R = foldExtendVectorInregToExtendOfSubvector(N, TLI, DAG, + LegalOperations)) + return R; + return SDValue(); } @@ -13394,18 +13991,6 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); - // See if we can simplify the input to this truncate through knowledge that - // only the low bits are being used. - // For example "trunc (or (shl x, 8), y)" // -> trunc y - // Currently we only perform this optimization on scalars because vectors - // may have different active low bits. - if (!VT.isVector()) { - APInt Mask = - APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits()); - if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask)) - return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter); - } - // fold (truncate (extract_subvector(ext x))) -> // (extract_subvector x) // TODO: This can be generalized to cover cases where the truncate and extract @@ -13510,7 +14095,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { LD1->getAddressSpace() != LD2->getAddressSpace()) return SDValue(); - bool LD1Fast = false; + unsigned LD1Fast = 0; EVT LD1VT = LD1->getValueType(0); unsigned LD1Bytes = LD1VT.getStoreSize(); if ((!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) && @@ -13840,15 +14425,72 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) { if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, /*PoisonOnly*/ false)) return N0; - // Fold freeze(bitcast(x)) -> bitcast(freeze(x)). - // TODO: Replace with pushFreezeToPreventPoisonFromPropagating fold. - if (N0.getOpcode() == ISD::BITCAST) - return DAG.getBitcast(N->getValueType(0), - DAG.getNode(ISD::FREEZE, SDLoc(N0), - N0.getOperand(0).getValueType(), - N0.getOperand(0))); + // Fold freeze(op(x, ...)) -> op(freeze(x), ...). + // Try to push freeze through instructions that propagate but don't produce + // poison as far as possible. If an operand of freeze follows three + // conditions 1) one-use, 2) does not produce poison, and 3) has all but one + // guaranteed-non-poison operands (or is a BUILD_VECTOR or similar) then push + // the freeze through to the operands that are not guaranteed non-poison. + // NOTE: we will strip poison-generating flags, so ignore them here. + if (DAG.canCreateUndefOrPoison(N0, /*PoisonOnly*/ false, + /*ConsiderFlags*/ false) || + N0->getNumValues() != 1 || !N0->hasOneUse()) + return SDValue(); - return SDValue(); + bool AllowMultipleMaybePoisonOperands = N0.getOpcode() == ISD::BUILD_VECTOR; + + SmallSetVector<SDValue, 8> MaybePoisonOperands; + for (SDValue Op : N0->ops()) { + if (DAG.isGuaranteedNotToBeUndefOrPoison(Op, /*PoisonOnly*/ false, + /*Depth*/ 1)) + continue; + bool HadMaybePoisonOperands = !MaybePoisonOperands.empty(); + bool IsNewMaybePoisonOperand = MaybePoisonOperands.insert(Op); + if (!HadMaybePoisonOperands) + continue; + if (IsNewMaybePoisonOperand && !AllowMultipleMaybePoisonOperands) { + // Multiple maybe-poison ops when not allowed - bail out. + return SDValue(); + } + } + // NOTE: the whole op may be not guaranteed to not be undef or poison because + // it could create undef or poison due to it's poison-generating flags. + // So not finding any maybe-poison operands is fine. + + for (SDValue MaybePoisonOperand : MaybePoisonOperands) { + // Don't replace every single UNDEF everywhere with frozen UNDEF, though. + if (MaybePoisonOperand.getOpcode() == ISD::UNDEF) + continue; + // First, freeze each offending operand. + SDValue FrozenMaybePoisonOperand = DAG.getFreeze(MaybePoisonOperand); + // Then, change all other uses of unfrozen operand to use frozen operand. + DAG.ReplaceAllUsesOfValueWith(MaybePoisonOperand, FrozenMaybePoisonOperand); + if (FrozenMaybePoisonOperand.getOpcode() == ISD::FREEZE && + FrozenMaybePoisonOperand.getOperand(0) == FrozenMaybePoisonOperand) { + // But, that also updated the use in the freeze we just created, thus + // creating a cycle in a DAG. Let's undo that by mutating the freeze. + DAG.UpdateNodeOperands(FrozenMaybePoisonOperand.getNode(), + MaybePoisonOperand); + } + } + + // The whole node may have been updated, so the value we were holding + // may no longer be valid. Re-fetch the operand we're `freeze`ing. + N0 = N->getOperand(0); + + // Finally, recreate the node, it's operands were updated to use + // frozen operands, so we just need to use it's "original" operands. + SmallVector<SDValue> Ops(N0->op_begin(), N0->op_end()); + // Special-handle ISD::UNDEF, each single one of them can be it's own thing. + for (SDValue &Op : Ops) { + if (Op.getOpcode() == ISD::UNDEF) + Op = DAG.getFreeze(Op); + } + // NOTE: this strips poison generating flags. + SDValue R = DAG.getNode(N0.getOpcode(), SDLoc(N0), N0->getVTList(), Ops); + assert(DAG.isGuaranteedNotToBeUndefOrPoison(R, /*PoisonOnly*/ false) && + "Can't create node that may be undef/poison!"); + return R; } /// We know that BV is a build_vector node with Constant, ConstantFP or Undef @@ -14012,26 +14654,37 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E) // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E) + // This also works with nested fma instructions: + // fadd (fma A, B, (fma (C, D, (fmul (E, F))))), G --> + // fma A, B, (fma C, D, fma (E, F, G)) + // fadd (G, (fma A, B, (fma (C, D, (fmul (E, F)))))) --> + // fma A, B, (fma C, D, fma (E, F, G)). // This requires reassociation because it changes the order of operations. - SDValue FMA, E; - if (CanReassociate && isFusedOp(N0) && - N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() && - N0.getOperand(2).hasOneUse()) { - FMA = N0; - E = N1; - } else if (CanReassociate && isFusedOp(N1) && - N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() && - N1.getOperand(2).hasOneUse()) { - FMA = N1; - E = N0; - } - if (FMA && E) { - SDValue A = FMA.getOperand(0); - SDValue B = FMA.getOperand(1); - SDValue C = FMA.getOperand(2).getOperand(0); - SDValue D = FMA.getOperand(2).getOperand(1); - SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E); - return DAG.getNode(PreferredFusedOpcode, SL, VT, A, B, CDE); + if (CanReassociate) { + SDValue FMA, E; + if (isFusedOp(N0) && N0.hasOneUse()) { + FMA = N0; + E = N1; + } else if (isFusedOp(N1) && N1.hasOneUse()) { + FMA = N1; + E = N0; + } + + SDValue TmpFMA = FMA; + while (E && isFusedOp(TmpFMA) && TmpFMA.hasOneUse()) { + SDValue FMul = TmpFMA->getOperand(2); + if (FMul.getOpcode() == ISD::FMUL && FMul.hasOneUse()) { + SDValue C = FMul.getOperand(0); + SDValue D = FMul.getOperand(1); + SDValue CDE = DAG.getNode(PreferredFusedOpcode, SL, VT, C, D, E); + DAG.ReplaceAllUsesOfValueWith(FMul, CDE); + // Replacing the inner FMul could cause the outer FMA to be simplified + // away. + return FMA.getOpcode() == ISD::DELETED_NODE ? SDValue() : FMA; + } + + TmpFMA = TmpFMA->getOperand(2); + } } // Look through FP_EXTEND nodes to do more combining. @@ -14331,8 +14984,8 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { return Options.UnsafeFPMath || N->getFlags().hasAllowReassociation(); }; - auto isContractableAndReassociableFMUL = [isContractableFMUL, - isReassociable](SDValue N) { + auto isContractableAndReassociableFMUL = [&isContractableFMUL, + &isReassociable](SDValue N) { return isContractableFMUL(N) && isReassociable(N.getNode()); }; @@ -14567,8 +15220,8 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) { SDValue DAGCombiner::visitFADD(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - bool N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0); - bool N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1); + SDNode *N0CFP = DAG.isConstantFPBuildVectorOrConstantFP(N0); + SDNode *N1CFP = DAG.isConstantFPBuildVectorOrConstantFP(N1); EVT VT = N->getValueType(0); SDLoc DL(N); const TargetOptions &Options = DAG.getTarget().Options; @@ -14665,8 +15318,10 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // of rounding steps. if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) { if (N0.getOpcode() == ISD::FMUL) { - bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0)); - bool CFP01 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)); + SDNode *CFP00 = + DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0)); + SDNode *CFP01 = + DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)); // (fadd (fmul x, c), x) -> (fmul x, c+1) if (CFP01 && !CFP00 && N0.getOperand(0) == N1) { @@ -14686,8 +15341,10 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { } if (N1.getOpcode() == ISD::FMUL) { - bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0)); - bool CFP11 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1)); + SDNode *CFP10 = + DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0)); + SDNode *CFP11 = + DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(1)); // (fadd x, (fmul x, c)) -> (fmul x, c+1) if (CFP11 && !CFP10 && N1.getOperand(0) == N0) { @@ -14707,7 +15364,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { } if (N0.getOpcode() == ISD::FADD) { - bool CFP00 = DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0)); + SDNode *CFP00 = + DAG.isConstantFPBuildVectorOrConstantFP(N0.getOperand(0)); // (fadd (fadd x, x), x) -> (fmul x, 3.0) if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) && (N0.getOperand(0) == N1)) { @@ -14717,7 +15375,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { } if (N1.getOpcode() == ISD::FADD) { - bool CFP10 = DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0)); + SDNode *CFP10 = + DAG.isConstantFPBuildVectorOrConstantFP(N1.getOperand(0)); // (fadd x, (fadd x, x)) -> (fmul x, 3.0) if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && N1.getOperand(0) == N0) { @@ -14930,12 +15589,14 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { TargetLowering::NegatibleCost::Expensive; SDValue NegN0 = TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0); - SDValue NegN1 = - TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1); - if (NegN0 && NegN1 && - (CostN0 == TargetLowering::NegatibleCost::Cheaper || - CostN1 == TargetLowering::NegatibleCost::Cheaper)) - return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1); + if (NegN0) { + HandleSDNode NegN0Handle(NegN0); + SDValue NegN1 = + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1); + if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper || + CostN1 == TargetLowering::NegatibleCost::Cheaper)) + return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1); + } // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X)) // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X) @@ -14964,7 +15625,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { case ISD::SETLT: case ISD::SETLE: std::swap(TrueOpnd, FalseOpnd); - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::SETOGT: case ISD::SETUGT: case ISD::SETOGE: @@ -15021,12 +15682,14 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { TargetLowering::NegatibleCost::Expensive; SDValue NegN0 = TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0); - SDValue NegN1 = - TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1); - if (NegN0 && NegN1 && - (CostN0 == TargetLowering::NegatibleCost::Cheaper || - CostN1 == TargetLowering::NegatibleCost::Cheaper)) - return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2); + if (NegN0) { + HandleSDNode NegN0Handle(NegN0); + SDValue NegN1 = + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1); + if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper || + CostN1 == TargetLowering::NegatibleCost::Cheaper)) + return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2); + } // FIXME: use fast math flags instead of Options.UnsafeFPMath if (Options.UnsafeFPMath) { @@ -15324,12 +15987,14 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { TargetLowering::NegatibleCost::Expensive; SDValue NegN0 = TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize, CostN0); - SDValue NegN1 = - TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1); - if (NegN0 && NegN1 && - (CostN0 == TargetLowering::NegatibleCost::Cheaper || - CostN1 == TargetLowering::NegatibleCost::Cheaper)) - return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1); + if (NegN0) { + HandleSDNode NegN0Handle(NegN0); + SDValue NegN1 = + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize, CostN1); + if (NegN1 && (CostN0 == TargetLowering::NegatibleCost::Cheaper || + CostN1 == TargetLowering::NegatibleCost::Cheaper)) + return DAG.getNode(ISD::FDIV, SDLoc(N), VT, NegN0, NegN1); + } return SDValue(); } @@ -15396,11 +16061,7 @@ static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) { if (N1Op0VT == MVT::f128) return false; - // Avoid mismatched vector operand types, for better instruction selection. - if (N1Op0VT.isVector()) - return false; - - return true; + return !N1Op0VT.isVector() || EnableVectorFCopySignExtendRound; } return false; } @@ -15722,12 +16383,12 @@ SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) { SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); EVT VT = N->getValueType(0); // fold (fp_round c1fp) -> c1fp - if (N0CFP) - return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1); + if (SDValue C = + DAG.FoldConstantArithmetic(ISD::FP_ROUND, SDLoc(N), VT, {N0, N1})) + return C; // fold (fp_round (fp_extend x)) -> x if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType()) @@ -15755,8 +16416,9 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { // Also, this is a value preserving truncation iff both fp_round's are. if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) { SDLoc DL(N); - return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0), - DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL)); + return DAG.getNode( + ISD::FP_ROUND, DL, VT, N0.getOperand(0), + DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL, /*isTarget=*/true)); } } @@ -15779,6 +16441,10 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVCastOp(N, SDLoc(N))) + return FoldedVOp; + // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded. if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::FP_ROUND) @@ -15814,11 +16480,11 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { LN0->getBasePtr(), N0.getValueType(), LN0->getMemOperand()); CombineTo(N, ExtLoad); - CombineTo(N0.getNode(), - DAG.getNode(ISD::FP_ROUND, SDLoc(N0), - N0.getValueType(), ExtLoad, - DAG.getIntPtrConstant(1, SDLoc(N0))), - ExtLoad.getValue(1)); + CombineTo( + N0.getNode(), + DAG.getNode(ISD::FP_ROUND, SDLoc(N0), N0.getValueType(), ExtLoad, + DAG.getIntPtrConstant(1, SDLoc(N0), /*isTarget=*/true)), + ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -16573,7 +17239,6 @@ static inline ElementCount numVectorEltsOrZero(EVT T) { } bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) { - Val = ST->getValue(); EVT STType = Val.getValueType(); EVT STMemType = ST->getMemoryVT(); if (STType == STMemType) @@ -16629,7 +17294,7 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) { SDValue Chain = LD->getOperand(0); StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode()); // TODO: Relax this restriction for unordered atomics (see D66309) - if (!ST || !ST->isSimple()) + if (!ST || !ST->isSimple() || ST->getAddressSpace() != LD->getAddressSpace()) return SDValue(); EVT LDType = LD->getValueType(0); @@ -16665,9 +17330,10 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) { // significant bit in the loaded value maps to the least significant bit in // the stored value). With Offset=n (for n > 0) the loaded value starts at the // n:th least significant byte of the stored value. + int64_t OrigOffset = Offset; if (DAG.getDataLayout().isBigEndian()) - Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedSize() - - (int64_t)LDMemType.getStoreSizeInBits().getFixedSize()) / + Offset = ((int64_t)STMemType.getStoreSizeInBits().getFixedValue() - + (int64_t)LDMemType.getStoreSizeInBits().getFixedValue()) / 8 - Offset; @@ -16679,8 +17345,8 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) { if (LdStScalable) STCoversLD = (Offset == 0) && LdMemSize == StMemSize; else - STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedSize() <= - StMemSize.getFixedSize()); + STCoversLD = (Offset >= 0) && (Offset * 8 + LdMemSize.getFixedValue() <= + StMemSize.getFixedValue()); auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue { if (LD->isIndexed()) { @@ -16709,18 +17375,30 @@ SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) { // Mask to size of LDMemType auto Mask = DAG.getConstant(APInt::getLowBitsSet(STType.getFixedSizeInBits(), - StMemSize.getFixedSize()), + StMemSize.getFixedValue()), SDLoc(ST), STType); auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask); return ReplaceLd(LD, Val, Chain); } } + // Handle some cases for big-endian that would be Offset 0 and handled for + // little-endian. + SDValue Val = ST->getValue(); + if (DAG.getDataLayout().isBigEndian() && Offset > 0 && OrigOffset == 0) { + if (STType.isInteger() && !STType.isVector() && LDType.isInteger() && + !LDType.isVector() && isTypeLegal(STType) && + TLI.isOperationLegal(ISD::SRL, STType)) { + Val = DAG.getNode(ISD::SRL, SDLoc(LD), STType, Val, + DAG.getConstant(Offset * 8, SDLoc(LD), STType)); + Offset = 0; + } + } + // TODO: Deal with nonzero offset. if (LD->getBasePtr().isUndef() || Offset != 0) return SDValue(); // Model necessary truncations / extenstions. - SDValue Val; // Truncate Value To Stored Memory Size. do { if (!getTruncatedStoreValue(ST, Val)) @@ -17160,7 +17838,7 @@ struct LoadedSlice { // Check if it will be merged with the load. // 1. Check the alignment / fast memory access constraint. - bool IsFast = false; + unsigned IsFast = 0; if (!TLI.allowsMemoryAccess(*DAG->getContext(), DAG->getDataLayout(), ResVT, Origin->getAddressSpace(), getAlign(), Origin->getMemOperand()->getFlags(), &IsFast) || @@ -17663,7 +18341,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { if (DAG.getDataLayout().isBigEndian()) PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff; - bool IsFast = false; + unsigned IsFast = 0; Align NewAlign = commonAlignment(LD->getAlign(), PtrOff); if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), NewVT, LD->getAddressSpace(), NewAlign, @@ -17722,8 +18400,8 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { if (VTSize.isScalable()) return SDValue(); - bool FastLD = false, FastST = false; - EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize()); + unsigned FastLD = 0, FastST = 0; + EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedValue()); if (!TLI.isOperationLegal(ISD::LOAD, IntVT) || !TLI.isOperationLegal(ISD::STORE, IntVT) || !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) || @@ -17866,7 +18544,7 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts( unsigned SizeInBits = NumStores * ElementSizeBits; unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1; - Optional<MachineMemOperand::Flags> Flags; + std::optional<MachineMemOperand::Flags> Flags; AAMDNodes AAInfo; for (unsigned I = 0; I != NumStores; ++I) { StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode); @@ -17941,6 +18619,9 @@ bool DAGCombiner::mergeStoresOfConstantsOrVecElts( // We may need to add a bitcast here to get types to line up. if (MemVTScalarTy != Val.getValueType().getScalarType()) { Val = DAG.getBitcast(MemVT, Val); + } else if (MemVT.isVector() && + Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { + Val = DAG.getNode(ISD::BUILD_VECTOR, DL, MemVT, Val); } else { unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR : ISD::EXTRACT_VECTOR_ELT; @@ -18331,7 +19012,7 @@ bool DAGCombiner::tryStoreMergeOfConstants( // Find a legal type for the constant store. unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8; EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits); - bool IsFast = false; + unsigned IsFast = 0; // Break early when size is too large to be legal. if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits) @@ -18441,7 +19122,7 @@ bool DAGCombiner::tryStoreMergeOfExtracts( // Find a legal type for the vector store. unsigned Elts = (i + 1) * NumMemElts; EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); - bool IsFast = false; + unsigned IsFast = 0; // Break early when size is too large to be legal. if (Ty.getSizeInBits() > MaximumLegalStoreInBits) @@ -18594,8 +19275,8 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes, if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits) break; - bool IsFastSt = false; - bool IsFastLd = false; + unsigned IsFastSt = 0; + unsigned IsFastLd = 0; // Don't try vector types if we need a rotate. We may still fail the // legality checks for the integer type, but we can't handle the rotate // case with vectors. @@ -19050,16 +19731,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { APInt::getLowBitsSet(Value.getScalarValueSizeInBits(), ST->getMemoryVT().getScalarSizeInBits()); - // See if we can simplify the input to this truncstore with knowledge that - // only the low bits are being used. For example: - // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8" + // See if we can simplify the operation with SimplifyDemandedBits, which + // only works if the value has a single use. AddToWorklist(Value.getNode()); - if (SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits)) - return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(), - ST->getMemOperand()); - - // Otherwise, see if we can simplify the operation with - // SimplifyDemandedBits, which only works if the value has a single use. if (SimplifyDemandedBits(Value, TruncDemandedBits)) { // Re-visit the store if anything changed and the store hasn't been merged // with another node (N is deleted) SimplifyDemandedBits will add Value's @@ -19069,6 +19743,28 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { AddToWorklist(N); return SDValue(N, 0); } + + // Otherwise, see if we can simplify the input to this truncstore with + // knowledge that only the low bits are being used. For example: + // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8" + if (SDValue Shorter = + TLI.SimplifyMultipleUseDemandedBits(Value, TruncDemandedBits, DAG)) + return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(), + ST->getMemOperand()); + + // If we're storing a truncated constant, see if we can simplify it. + // TODO: Move this to targetShrinkDemandedConstant? + if (auto *Cst = dyn_cast<ConstantSDNode>(Value)) + if (!Cst->isOpaque()) { + const APInt &CValue = Cst->getAPIntValue(); + APInt NewVal = CValue & TruncDemandedBits; + if (NewVal != CValue) { + SDValue Shorter = + DAG.getConstant(NewVal, SDLoc(N), Value.getValueType()); + return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, + ST->getMemoryVT(), ST->getMemOperand()); + } + } } // If this is a load followed by a store to the same location, then the store @@ -19209,7 +19905,7 @@ SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) { // If we store purely within object bounds just before its lifetime ends, // we can remove the store. if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase, - StoreSize.getFixedSize() * 8)) { + StoreSize.getFixedValue() * 8)) { LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump(); dbgs() << "\nwithin LIFETIME_END of : "; LifetimeEndBase.dump(); dbgs() << "\n"); @@ -19329,94 +20025,113 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) { return St1; } -/// Convert a disguised subvector insertion into a shuffle: -SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) { - assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT && - "Expected extract_vector_elt"); - SDValue InsertVal = N->getOperand(1); - SDValue Vec = N->getOperand(0); +// Merge an insertion into an existing shuffle: +// (insert_vector_elt (vector_shuffle X, Y, Mask), +// .(extract_vector_elt X, N), InsIndex) +// --> (vector_shuffle X, Y, NewMask) +// and variations where shuffle operands may be CONCAT_VECTORS. +static bool mergeEltWithShuffle(SDValue &X, SDValue &Y, ArrayRef<int> Mask, + SmallVectorImpl<int> &NewMask, SDValue Elt, + unsigned InsIndex) { + if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT || + !isa<ConstantSDNode>(Elt.getOperand(1))) + return false; - // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N), - // InsIndex) - // --> (vector_shuffle X, Y) and variations where shuffle operands may be - // CONCAT_VECTORS. - if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() && - InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT && - isa<ConstantSDNode>(InsertVal.getOperand(1))) { - ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Vec.getNode()); - ArrayRef<int> Mask = SVN->getMask(); + // Vec's operand 0 is using indices from 0 to N-1 and + // operand 1 from N to 2N - 1, where N is the number of + // elements in the vectors. + SDValue InsertVal0 = Elt.getOperand(0); + int ElementOffset = -1; + + // We explore the inputs of the shuffle in order to see if we find the + // source of the extract_vector_elt. If so, we can use it to modify the + // shuffle rather than perform an insert_vector_elt. + SmallVector<std::pair<int, SDValue>, 8> ArgWorkList; + ArgWorkList.emplace_back(Mask.size(), Y); + ArgWorkList.emplace_back(0, X); + + while (!ArgWorkList.empty()) { + int ArgOffset; + SDValue ArgVal; + std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val(); + + if (ArgVal == InsertVal0) { + ElementOffset = ArgOffset; + break; + } - SDValue X = Vec.getOperand(0); - SDValue Y = Vec.getOperand(1); - - // Vec's operand 0 is using indices from 0 to N-1 and - // operand 1 from N to 2N - 1, where N is the number of - // elements in the vectors. - SDValue InsertVal0 = InsertVal.getOperand(0); - int ElementOffset = -1; - - // We explore the inputs of the shuffle in order to see if we find the - // source of the extract_vector_elt. If so, we can use it to modify the - // shuffle rather than perform an insert_vector_elt. - SmallVector<std::pair<int, SDValue>, 8> ArgWorkList; - ArgWorkList.emplace_back(Mask.size(), Y); - ArgWorkList.emplace_back(0, X); - - while (!ArgWorkList.empty()) { - int ArgOffset; - SDValue ArgVal; - std::tie(ArgOffset, ArgVal) = ArgWorkList.pop_back_val(); - - if (ArgVal == InsertVal0) { - ElementOffset = ArgOffset; - break; + // Peek through concat_vector. + if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) { + int CurrentArgOffset = + ArgOffset + ArgVal.getValueType().getVectorNumElements(); + int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements(); + for (SDValue Op : reverse(ArgVal->ops())) { + CurrentArgOffset -= Step; + ArgWorkList.emplace_back(CurrentArgOffset, Op); } - // Peek through concat_vector. - if (ArgVal.getOpcode() == ISD::CONCAT_VECTORS) { - int CurrentArgOffset = - ArgOffset + ArgVal.getValueType().getVectorNumElements(); - int Step = ArgVal.getOperand(0).getValueType().getVectorNumElements(); - for (SDValue Op : reverse(ArgVal->ops())) { - CurrentArgOffset -= Step; - ArgWorkList.emplace_back(CurrentArgOffset, Op); - } - - // Make sure we went through all the elements and did not screw up index - // computation. - assert(CurrentArgOffset == ArgOffset); - } + // Make sure we went through all the elements and did not screw up index + // computation. + assert(CurrentArgOffset == ArgOffset); } + } - // If we failed to find a match, see if we can replace an UNDEF shuffle - // operand. - if (ElementOffset == -1 && Y.isUndef() && - InsertVal0.getValueType() == Y.getValueType()) { - ElementOffset = Mask.size(); - Y = InsertVal0; - } + // If we failed to find a match, see if we can replace an UNDEF shuffle + // operand. + if (ElementOffset == -1) { + if (!Y.isUndef() || InsertVal0.getValueType() != Y.getValueType()) + return false; + ElementOffset = Mask.size(); + Y = InsertVal0; + } - if (ElementOffset != -1) { - SmallVector<int, 16> NewMask(Mask.begin(), Mask.end()); + NewMask.assign(Mask.begin(), Mask.end()); + NewMask[InsIndex] = ElementOffset + Elt.getConstantOperandVal(1); + assert(NewMask[InsIndex] < (int)(2 * Mask.size()) && NewMask[InsIndex] >= 0 && + "NewMask[InsIndex] is out of bound"); + return true; +} - auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1)); - NewMask[InsIndex] = ElementOffset + ExtrIndex->getZExtValue(); - assert(NewMask[InsIndex] < - (int)(2 * Vec.getValueType().getVectorNumElements()) && - NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound"); +// Merge an insertion into an existing shuffle: +// (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N), +// InsIndex) +// --> (vector_shuffle X, Y) and variations where shuffle operands may be +// CONCAT_VECTORS. +SDValue DAGCombiner::mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex) { + assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT && + "Expected extract_vector_elt"); + SDValue InsertVal = N->getOperand(1); + SDValue Vec = N->getOperand(0); - SDValue LegalShuffle = - TLI.buildLegalVectorShuffle(Vec.getValueType(), SDLoc(N), X, - Y, NewMask, DAG); - if (LegalShuffle) - return LegalShuffle; - } + auto *SVN = dyn_cast<ShuffleVectorSDNode>(Vec); + if (!SVN || !Vec.hasOneUse()) + return SDValue(); + + ArrayRef<int> Mask = SVN->getMask(); + SDValue X = Vec.getOperand(0); + SDValue Y = Vec.getOperand(1); + + SmallVector<int, 16> NewMask(Mask); + if (mergeEltWithShuffle(X, Y, Mask, NewMask, InsertVal, InsIndex)) { + SDValue LegalShuffle = TLI.buildLegalVectorShuffle( + Vec.getValueType(), SDLoc(N), X, Y, NewMask, DAG); + if (LegalShuffle) + return LegalShuffle; } - // insert_vector_elt V, (bitcast X from vector type), IdxC --> - // bitcast(shuffle (bitcast V), (extended X), Mask) - // Note: We do not use an insert_subvector node because that requires a - // legal subvector type. + return SDValue(); +} + +// Convert a disguised subvector insertion into a shuffle: +// insert_vector_elt V, (bitcast X from vector type), IdxC --> +// bitcast(shuffle (bitcast V), (extended X), Mask) +// Note: We do not use an insert_subvector node because that requires a +// legal subvector type. +SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) { + assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT && + "Expected extract_vector_elt"); + SDValue InsertVal = N->getOperand(1); + if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() || !InsertVal.getOperand(0).getValueType().isVector()) return SDValue(); @@ -19491,13 +20206,8 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { if (!IndexC) { // If this is variable insert to undef vector, it might be better to splat: // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... > - if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) { - if (VT.isScalableVector()) - return DAG.getSplatVector(VT, DL, InVal); - - SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), InVal); - return DAG.getBuildVector(VT, DL, Ops); - } + if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) + return DAG.getSplat(VT, DL, InVal); return SDValue(); } @@ -19509,9 +20219,6 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { // We must know which element is being inserted for folds below here. unsigned Elt = IndexC->getZExtValue(); - if (SDValue Shuf = combineInsertEltToShuffle(N, Elt)) - return Shuf; - // Handle <1 x ???> vector insertion special cases. if (NumElts == 1) { // insert_vector_elt(x, extract_vector_elt(y, 0), 0) -> y @@ -19541,6 +20248,12 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { } } + if (SDValue Shuf = mergeInsertEltWithShuffle(N, Elt)) + return Shuf; + + if (SDValue Shuf = combineInsertEltToShuffle(N, Elt)) + return Shuf; + // Attempt to convert an insert_vector_elt chain into a legal build_vector. if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) { // vXi1 vector - we don't need to recurse. @@ -19610,9 +20323,52 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { continue; } + // VECTOR_SHUFFLE - if all the operands match the shuffle's sources, + // update the shuffle mask (and second operand if we started with unary + // shuffle) and create a new legal shuffle. + if (CurVec.getOpcode() == ISD::VECTOR_SHUFFLE && CurVec.hasOneUse()) { + auto *SVN = cast<ShuffleVectorSDNode>(CurVec); + SDValue LHS = SVN->getOperand(0); + SDValue RHS = SVN->getOperand(1); + SmallVector<int, 16> Mask(SVN->getMask()); + bool Merged = true; + for (auto I : enumerate(Ops)) { + SDValue &Op = I.value(); + if (Op) { + SmallVector<int, 16> NewMask; + if (!mergeEltWithShuffle(LHS, RHS, Mask, NewMask, Op, I.index())) { + Merged = false; + break; + } + Mask = std::move(NewMask); + } + } + if (Merged) + if (SDValue NewShuffle = + TLI.buildLegalVectorShuffle(VT, DL, LHS, RHS, Mask, DAG)) + return NewShuffle; + } + // Failed to find a match in the chain - bail. break; } + + // See if we can fill in the missing constant elements as zeros. + // TODO: Should we do this for any constant? + APInt DemandedZeroElts = APInt::getZero(NumElts); + for (unsigned I = 0; I != NumElts; ++I) + if (!Ops[I]) + DemandedZeroElts.setBit(I); + + if (DAG.MaskedVectorIsZero(InVec, DemandedZeroElts)) { + SDValue Zero = VT.isInteger() ? DAG.getConstant(0, DL, MaxEltVT) + : DAG.getConstantFP(0, DL, MaxEltVT); + for (unsigned I = 0; I != NumElts; ++I) + if (!Ops[I]) + Ops[I] = Zero; + + return CanonicalizeBuildVector(Ops); + } } return SDValue(); @@ -19653,7 +20409,7 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, Alignment = commonAlignment(Alignment, VecEltVT.getSizeInBits() / 8); } - bool IsFast = false; + unsigned IsFast = 0; if (!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VecEltVT, OriginalLoad->getAddressSpace(), Alignment, OriginalLoad->getMemOperand()->getFlags(), @@ -19731,6 +20487,168 @@ static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG, return SDValue(); } +// Given a ISD::EXTRACT_VECTOR_ELT, which is a glorified bit sequence extract, +// recursively analyse all of it's users. and try to model themselves as +// bit sequence extractions. If all of them agree on the new, narrower element +// type, and all of them can be modelled as ISD::EXTRACT_VECTOR_ELT's of that +// new element type, do so now. +// This is mainly useful to recover from legalization that scalarized +// the vector as wide elements, but tries to rebuild it with narrower elements. +// +// Some more nodes could be modelled if that helps cover interesting patterns. +bool DAGCombiner::refineExtractVectorEltIntoMultipleNarrowExtractVectorElts( + SDNode *N) { + // We perform this optimization post type-legalization because + // the type-legalizer often scalarizes integer-promoted vectors. + // Performing this optimization before may cause legalizaton cycles. + if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes) + return false; + + // TODO: Add support for big-endian. + if (DAG.getDataLayout().isBigEndian()) + return false; + + SDValue VecOp = N->getOperand(0); + EVT VecVT = VecOp.getValueType(); + assert(!VecVT.isScalableVector() && "Only for fixed vectors."); + + // We must start with a constant extraction index. + auto *IndexC = dyn_cast<ConstantSDNode>(N->getOperand(1)); + if (!IndexC) + return false; + + assert(IndexC->getZExtValue() < VecVT.getVectorNumElements() && + "Original ISD::EXTRACT_VECTOR_ELT is undefinend?"); + + // TODO: deal with the case of implicit anyext of the extraction. + unsigned VecEltBitWidth = VecVT.getScalarSizeInBits(); + EVT ScalarVT = N->getValueType(0); + if (VecVT.getScalarType() != ScalarVT) + return false; + + // TODO: deal with the cases other than everything being integer-typed. + if (!ScalarVT.isScalarInteger()) + return false; + + struct Entry { + SDNode *Producer; + + // Which bits of VecOp does it contain? + unsigned BitPos; + int NumBits; + // NOTE: the actual width of \p Producer may be wider than NumBits! + + Entry(Entry &&) = default; + Entry(SDNode *Producer_, unsigned BitPos_, int NumBits_) + : Producer(Producer_), BitPos(BitPos_), NumBits(NumBits_) {} + + Entry() = delete; + Entry(const Entry &) = delete; + Entry &operator=(const Entry &) = delete; + Entry &operator=(Entry &&) = delete; + }; + SmallVector<Entry, 32> Worklist; + SmallVector<Entry, 32> Leafs; + + // We start at the "root" ISD::EXTRACT_VECTOR_ELT. + Worklist.emplace_back(N, /*BitPos=*/VecEltBitWidth * IndexC->getZExtValue(), + /*NumBits=*/VecEltBitWidth); + + while (!Worklist.empty()) { + Entry E = Worklist.pop_back_val(); + // Does the node not even use any of the VecOp bits? + if (!(E.NumBits > 0 && E.BitPos < VecVT.getSizeInBits() && + E.BitPos + E.NumBits <= VecVT.getSizeInBits())) + return false; // Let's allow the other combines clean this up first. + // Did we fail to model any of the users of the Producer? + bool ProducerIsLeaf = false; + // Look at each user of this Producer. + for (SDNode *User : E.Producer->uses()) { + switch (User->getOpcode()) { + // TODO: support ISD::BITCAST + // TODO: support ISD::ANY_EXTEND + // TODO: support ISD::ZERO_EXTEND + // TODO: support ISD::SIGN_EXTEND + case ISD::TRUNCATE: + // Truncation simply means we keep position, but extract less bits. + Worklist.emplace_back(User, E.BitPos, + /*NumBits=*/User->getValueSizeInBits(0)); + break; + // TODO: support ISD::SRA + // TODO: support ISD::SHL + case ISD::SRL: + // We should be shifting the Producer by a constant amount. + if (auto *ShAmtC = dyn_cast<ConstantSDNode>(User->getOperand(1)); + User->getOperand(0).getNode() == E.Producer && ShAmtC) { + // Logical right-shift means that we start extraction later, + // but stop it at the same position we did previously. + unsigned ShAmt = ShAmtC->getZExtValue(); + Worklist.emplace_back(User, E.BitPos + ShAmt, E.NumBits - ShAmt); + break; + } + [[fallthrough]]; + default: + // We can not model this user of the Producer. + // Which means the current Producer will be a ISD::EXTRACT_VECTOR_ELT. + ProducerIsLeaf = true; + // Profitability check: all users that we can not model + // must be ISD::BUILD_VECTOR's. + if (User->getOpcode() != ISD::BUILD_VECTOR) + return false; + break; + } + } + if (ProducerIsLeaf) + Leafs.emplace_back(std::move(E)); + } + + unsigned NewVecEltBitWidth = Leafs.front().NumBits; + + // If we are still at the same element granularity, give up, + if (NewVecEltBitWidth == VecEltBitWidth) + return false; + + // The vector width must be a multiple of the new element width. + if (VecVT.getSizeInBits() % NewVecEltBitWidth != 0) + return false; + + // All leafs must agree on the new element width. + // All leafs must not expect any "padding" bits ontop of that width. + // All leafs must start extraction from multiple of that width. + if (!all_of(Leafs, [NewVecEltBitWidth](const Entry &E) { + return (unsigned)E.NumBits == NewVecEltBitWidth && + E.Producer->getValueSizeInBits(0) == NewVecEltBitWidth && + E.BitPos % NewVecEltBitWidth == 0; + })) + return false; + + EVT NewScalarVT = EVT::getIntegerVT(*DAG.getContext(), NewVecEltBitWidth); + EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewScalarVT, + VecVT.getSizeInBits() / NewVecEltBitWidth); + + if (LegalTypes && + !(TLI.isTypeLegal(NewScalarVT) && TLI.isTypeLegal(NewVecVT))) + return false; + + if (LegalOperations && + !(TLI.isOperationLegalOrCustom(ISD::BITCAST, NewVecVT) && + TLI.isOperationLegalOrCustom(ISD::EXTRACT_VECTOR_ELT, NewVecVT))) + return false; + + SDValue NewVecOp = DAG.getBitcast(NewVecVT, VecOp); + for (const Entry &E : Leafs) { + SDLoc DL(E.Producer); + unsigned NewIndex = E.BitPos / NewVecEltBitWidth; + assert(NewIndex < NewVecVT.getVectorNumElements() && + "Creating out-of-bounds ISD::EXTRACT_VECTOR_ELT?"); + SDValue V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, NewScalarVT, NewVecOp, + DAG.getVectorIdxConstant(NewIndex, DL)); + CombineTo(E.Producer, V); + } + + return true; +} + SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { SDValue VecOp = N->getOperand(0); SDValue Index = N->getOperand(1); @@ -19774,6 +20692,12 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { IndexC->getAPIntValue().uge(VecVT.getVectorNumElements())) return DAG.getUNDEF(ScalarVT); + // extract_vector_elt(freeze(x)), idx -> freeze(extract_vector_elt(x)), idx + if (VecOp.hasOneUse() && VecOp.getOpcode() == ISD::FREEZE) { + return DAG.getFreeze(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, + VecOp.getOperand(0), Index)); + } + // extract_vector_elt (build_vector x, y), 1 -> y if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) || VecOp.getOpcode() == ISD::SPLAT_VECTOR) && @@ -19819,7 +20743,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { unsigned BCTruncElt = IsLE ? 0 : NumElts - 1; SDValue BCSrc = VecOp.getOperand(0); if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger()) - return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc); + return DAG.getAnyExtOrTrunc(BCSrc, DL, ScalarVT); if (LegalTypes && BCSrc.getValueType().isInteger() && BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) { @@ -19919,6 +20843,9 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { } } + if (refineExtractVectorEltIntoMultipleNarrowExtractVectorElts(N)) + return SDValue(N, 0); + // Everything under here is trying to match an extract of a loaded value. // If the result of load has to be truncated, then it's not necessarily // profitable. @@ -20160,7 +21087,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { // Simplify (build_vec (trunc $1) // (trunc (srl $1 half-width)) -// (trunc (srl $1 (2 * half-width))) …) +// (trunc (srl $1 (2 * half-width)))) // to (bitcast $1) SDValue DAGCombiner::reduceBuildVecTruncToBitCast(SDNode *N) { assert(N->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector"); @@ -20313,6 +21240,29 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N, SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2)); ConcatOps[0] = VecIn2; VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps); + } else if (InVT1Size / VTSize > 1 && InVT1Size % VTSize == 0) { + if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems) || + !TLI.isTypeLegal(InVT1) || !TLI.isTypeLegal(InVT2)) + return SDValue(); + // If dest vector has less than two elements, then use shuffle and extract + // from larger regs will cost even more. + if (VT.getVectorNumElements() <= 2 || !VecIn2.getNode()) + return SDValue(); + assert(InVT2Size <= InVT1Size && + "Second input is not going to be larger than the first one."); + + // VecIn1 is wider than the output, and we have another, possibly + // smaller input. Pad the smaller input with undefs, shuffle at the + // input vector width, and extract the output. + // The shuffle type is different than VT, so check legality again. + if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1)) + return SDValue(); + + if (InVT1 != InVT2) { + VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1, + DAG.getUNDEF(InVT1), VecIn2, ZeroIdx); + } + ShuffleNumElems = InVT1Size / VTSize * NumElems; } else { // TODO: Support cases where the length mismatch isn't exactly by a // factor of 2. @@ -20753,6 +21703,127 @@ SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) { VT, In); } +// If this is a very simple BUILD_VECTOR with first element being a ZERO_EXTEND, +// and all other elements being constant zero's, granularize the BUILD_VECTOR's +// element width, absorbing the ZERO_EXTEND, turning it into a constant zero op. +// This patten can appear during legalization. +// +// NOTE: This can be generalized to allow more than a single +// non-constant-zero op, UNDEF's, and to be KnownBits-based, +SDValue DAGCombiner::convertBuildVecZextToBuildVecWithZeros(SDNode *N) { + // Don't run this after legalization. Targets may have other preferences. + if (Level >= AfterLegalizeDAG) + return SDValue(); + + // FIXME: support big-endian. + if (DAG.getDataLayout().isBigEndian()) + return SDValue(); + + EVT VT = N->getValueType(0); + EVT OpVT = N->getOperand(0).getValueType(); + assert(!VT.isScalableVector() && "Encountered scalable BUILD_VECTOR?"); + + EVT OpIntVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits()); + + if (!TLI.isTypeLegal(OpIntVT) || + (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::BITCAST, OpIntVT))) + return SDValue(); + + unsigned EltBitwidth = VT.getScalarSizeInBits(); + // NOTE: the actual width of operands may be wider than that! + + // Analyze all operands of this BUILD_VECTOR. What is the largest number of + // active bits they all have? We'll want to truncate them all to that width. + unsigned ActiveBits = 0; + APInt KnownZeroOps(VT.getVectorNumElements(), 0); + for (auto I : enumerate(N->ops())) { + SDValue Op = I.value(); + // FIXME: support UNDEF elements? + if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) { + unsigned OpActiveBits = + Cst->getAPIntValue().trunc(EltBitwidth).getActiveBits(); + if (OpActiveBits == 0) { + KnownZeroOps.setBit(I.index()); + continue; + } + // Profitability check: don't allow non-zero constant operands. + return SDValue(); + } + // Profitability check: there must only be a single non-zero operand, + // and it must be the first operand of the BUILD_VECTOR. + if (I.index() != 0) + return SDValue(); + // The operand must be a zero-extension itself. + // FIXME: this could be generalized to known leading zeros check. + if (Op.getOpcode() != ISD::ZERO_EXTEND) + return SDValue(); + unsigned CurrActiveBits = + Op.getOperand(0).getValueSizeInBits().getFixedValue(); + assert(!ActiveBits && "Already encountered non-constant-zero operand?"); + ActiveBits = CurrActiveBits; + // We want to at least halve the element size. + if (2 * ActiveBits > EltBitwidth) + return SDValue(); + } + + // This BUILD_VECTOR must have at least one non-constant-zero operand. + if (ActiveBits == 0) + return SDValue(); + + // We have EltBitwidth bits, the *minimal* chunk size is ActiveBits, + // into how many chunks can we split our element width? + EVT NewScalarIntVT, NewIntVT; + std::optional<unsigned> Factor; + // We can split the element into at least two chunks, but not into more + // than |_ EltBitwidth / ActiveBits _| chunks. Find a largest split factor + // for which the element width is a multiple of it, + // and the resulting types/operations on that chunk width are legal. + assert(2 * ActiveBits <= EltBitwidth && + "We know that half or less bits of the element are active."); + for (unsigned Scale = EltBitwidth / ActiveBits; Scale >= 2; --Scale) { + if (EltBitwidth % Scale != 0) + continue; + unsigned ChunkBitwidth = EltBitwidth / Scale; + assert(ChunkBitwidth >= ActiveBits && "As per starting point."); + NewScalarIntVT = EVT::getIntegerVT(*DAG.getContext(), ChunkBitwidth); + NewIntVT = EVT::getVectorVT(*DAG.getContext(), NewScalarIntVT, + Scale * N->getNumOperands()); + if (!TLI.isTypeLegal(NewScalarIntVT) || !TLI.isTypeLegal(NewIntVT) || + (LegalOperations && + !(TLI.isOperationLegalOrCustom(ISD::TRUNCATE, NewScalarIntVT) && + TLI.isOperationLegalOrCustom(ISD::BUILD_VECTOR, NewIntVT)))) + continue; + Factor = Scale; + break; + } + if (!Factor) + return SDValue(); + + SDLoc DL(N); + SDValue ZeroOp = DAG.getConstant(0, DL, NewScalarIntVT); + + // Recreate the BUILD_VECTOR, with elements now being Factor times smaller. + SmallVector<SDValue, 16> NewOps; + NewOps.reserve(NewIntVT.getVectorNumElements()); + for (auto I : enumerate(N->ops())) { + SDValue Op = I.value(); + assert(!Op.isUndef() && "FIXME: after allowing UNDEF's, handle them here."); + unsigned SrcOpIdx = I.index(); + if (KnownZeroOps[SrcOpIdx]) { + NewOps.append(*Factor, ZeroOp); + continue; + } + Op = DAG.getBitcast(OpIntVT, Op); + Op = DAG.getNode(ISD::TRUNCATE, DL, NewScalarIntVT, Op); + NewOps.emplace_back(Op); + NewOps.append(*Factor - 1, ZeroOp); + } + assert(NewOps.size() == NewIntVT.getVectorNumElements()); + SDValue NewBV = DAG.getBuildVector(NewIntVT, DL, NewOps); + NewBV = DAG.getBitcast(VT, NewBV); + return NewBV; +} + SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { EVT VT = N->getValueType(0); @@ -20818,6 +21889,9 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { if (SDValue V = convertBuildVecZextToZext(N)) return V; + if (SDValue V = convertBuildVecZextToBuildVecWithZeros(N)) + return V; + if (SDValue V = reduceBuildVecExtToExtBuildVec(N)) return V; @@ -21078,6 +22152,109 @@ static SDValue combineConcatVectorOfCasts(SDNode *N, SelectionDAG &DAG) { return DAG.getNode(CastOpcode, DL, VT, NewConcat); } +// See if this is a simple CONCAT_VECTORS with no UNDEF operands, and if one of +// the operands is a SHUFFLE_VECTOR, and all other operands are also operands +// to that SHUFFLE_VECTOR, create wider SHUFFLE_VECTOR. +static SDValue combineConcatVectorOfShuffleAndItsOperands( + SDNode *N, SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, + bool LegalOperations) { + EVT VT = N->getValueType(0); + EVT OpVT = N->getOperand(0).getValueType(); + if (VT.isScalableVector()) + return SDValue(); + + // For now, only allow simple 2-operand concatenations. + if (N->getNumOperands() != 2) + return SDValue(); + + // Don't create illegal types/shuffles when not allowed to. + if ((LegalTypes && !TLI.isTypeLegal(VT)) || + (LegalOperations && + !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT))) + return SDValue(); + + // Analyze all of the operands of the CONCAT_VECTORS. Out of all of them, + // we want to find one that is: (1) a SHUFFLE_VECTOR (2) only used by us, + // and (3) all operands of CONCAT_VECTORS must be either that SHUFFLE_VECTOR, + // or one of the operands of that SHUFFLE_VECTOR (but not UNDEF!). + // (4) and for now, the SHUFFLE_VECTOR must be unary. + ShuffleVectorSDNode *SVN = nullptr; + for (SDValue Op : N->ops()) { + if (auto *CurSVN = dyn_cast<ShuffleVectorSDNode>(Op); + CurSVN && CurSVN->getOperand(1).isUndef() && N->isOnlyUserOf(CurSVN) && + all_of(N->ops(), [CurSVN](SDValue Op) { + // FIXME: can we allow UNDEF operands? + return !Op.isUndef() && + (Op.getNode() == CurSVN || is_contained(CurSVN->ops(), Op)); + })) { + SVN = CurSVN; + break; + } + } + if (!SVN) + return SDValue(); + + // We are going to pad the shuffle operands, so any indice, that was picking + // from the second operand, must be adjusted. + SmallVector<int, 16> AdjustedMask; + AdjustedMask.reserve(SVN->getMask().size()); + assert(SVN->getOperand(1).isUndef() && "Expected unary shuffle!"); + append_range(AdjustedMask, SVN->getMask()); + + // Identity masks for the operands of the (padded) shuffle. + SmallVector<int, 32> IdentityMask(2 * OpVT.getVectorNumElements()); + MutableArrayRef<int> FirstShufOpIdentityMask = + MutableArrayRef<int>(IdentityMask) + .take_front(OpVT.getVectorNumElements()); + MutableArrayRef<int> SecondShufOpIdentityMask = + MutableArrayRef<int>(IdentityMask).take_back(OpVT.getVectorNumElements()); + std::iota(FirstShufOpIdentityMask.begin(), FirstShufOpIdentityMask.end(), 0); + std::iota(SecondShufOpIdentityMask.begin(), SecondShufOpIdentityMask.end(), + VT.getVectorNumElements()); + + // New combined shuffle mask. + SmallVector<int, 32> Mask; + Mask.reserve(VT.getVectorNumElements()); + for (SDValue Op : N->ops()) { + assert(!Op.isUndef() && "Not expecting to concatenate UNDEF."); + if (Op.getNode() == SVN) { + append_range(Mask, AdjustedMask); + continue; + } + if (Op == SVN->getOperand(0)) { + append_range(Mask, FirstShufOpIdentityMask); + continue; + } + if (Op == SVN->getOperand(1)) { + append_range(Mask, SecondShufOpIdentityMask); + continue; + } + llvm_unreachable("Unexpected operand!"); + } + + // Don't create illegal shuffle masks. + if (!TLI.isShuffleMaskLegal(Mask, VT)) + return SDValue(); + + // Pad the shuffle operands with UNDEF. + SDLoc dl(N); + std::array<SDValue, 2> ShufOps; + for (auto I : zip(SVN->ops(), ShufOps)) { + SDValue ShufOp = std::get<0>(I); + SDValue &NewShufOp = std::get<1>(I); + if (ShufOp.isUndef()) + NewShufOp = DAG.getUNDEF(VT); + else { + SmallVector<SDValue, 2> ShufOpParts(N->getNumOperands(), + DAG.getUNDEF(OpVT)); + ShufOpParts[0] = ShufOp; + NewShufOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, ShufOpParts); + } + } + // Finally, create the new wide shuffle. + return DAG.getVectorShuffle(VT, dl, ShufOps[0], ShufOps[1], Mask); +} + SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { // If we only have one input vector, we don't need to do any concatenation. if (N->getNumOperands() == 1) @@ -21213,6 +22390,10 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { if (SDValue V = combineConcatVectorOfCasts(N, DAG)) return V; + if (SDValue V = combineConcatVectorOfShuffleAndItsOperands( + N, DAG, TLI, LegalTypes, LegalOperations)) + return V; + // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR // nodes often generate nop CONCAT_VECTOR nodes. Scan the CONCAT_VECTOR // operands and look for a CONCAT operations that place the incoming vectors @@ -21490,7 +22671,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) { MachinePointerInfo(Ld->getPointerInfo().getAddrSpace()); MMO = MF.getMachineMemOperand(Ld->getMemOperand(), MPI, StoreSize); } else - MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedSize(), + MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset.getFixedValue(), StoreSize); SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO); @@ -22050,14 +23231,53 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN, return DAG.getBuildVector(VT, SDLoc(SVN), Ops); } +// Match shuffles that can be converted to *_vector_extend_in_reg. +// This is often generated during legalization. +// e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)), +// and returns the EVT to which the extension should be performed. +// NOTE: this assumes that the src is the first operand of the shuffle. +static std::optional<EVT> canCombineShuffleToExtendVectorInreg( + unsigned Opcode, EVT VT, std::function<bool(unsigned)> Match, + SelectionDAG &DAG, const TargetLowering &TLI, bool LegalTypes, + bool LegalOperations) { + bool IsBigEndian = DAG.getDataLayout().isBigEndian(); + + // TODO Add support for big-endian when we have a test case. + if (!VT.isInteger() || IsBigEndian) + return std::nullopt; + + unsigned NumElts = VT.getVectorNumElements(); + unsigned EltSizeInBits = VT.getScalarSizeInBits(); + + // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for + // power-of-2 extensions as they are the most likely. + // FIXME: should try Scale == NumElts case too, + for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) { + // The vector width must be a multiple of Scale. + if (NumElts % Scale != 0) + continue; + + EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale); + EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale); + + if ((LegalTypes && !TLI.isTypeLegal(OutVT)) || + (LegalOperations && !TLI.isOperationLegalOrCustom(Opcode, OutVT))) + continue; + + if (Match(Scale)) + return OutVT; + } + + return std::nullopt; +} + // Match shuffles that can be converted to any_vector_extend_in_reg. // This is often generated during legalization. // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src)) -// TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case. -static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN, - SelectionDAG &DAG, - const TargetLowering &TLI, - bool LegalOperations) { +static SDValue combineShuffleToAnyExtendVectorInreg(ShuffleVectorSDNode *SVN, + SelectionDAG &DAG, + const TargetLowering &TLI, + bool LegalOperations) { EVT VT = SVN->getValueType(0); bool IsBigEndian = DAG.getDataLayout().isBigEndian(); @@ -22065,13 +23285,9 @@ static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN, if (!VT.isInteger() || IsBigEndian) return SDValue(); - unsigned NumElts = VT.getVectorNumElements(); - unsigned EltSizeInBits = VT.getScalarSizeInBits(); - ArrayRef<int> Mask = SVN->getMask(); - SDValue N0 = SVN->getOperand(0); - // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32)) - auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) { + auto isAnyExtend = [NumElts = VT.getVectorNumElements(), + Mask = SVN->getMask()](unsigned Scale) { for (unsigned i = 0; i != NumElts; ++i) { if (Mask[i] < 0) continue; @@ -22082,27 +23298,138 @@ static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN, return true; }; - // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for - // power-of-2 extensions as they are the most likely. - for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) { - // Check for non power of 2 vector sizes - if (NumElts % Scale != 0) - continue; - if (!isAnyExtend(Scale)) - continue; + unsigned Opcode = ISD::ANY_EXTEND_VECTOR_INREG; + SDValue N0 = SVN->getOperand(0); + // Never create an illegal type. Only create unsupported operations if we + // are pre-legalization. + std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg( + Opcode, VT, isAnyExtend, DAG, TLI, /*LegalTypes=*/true, LegalOperations); + if (!OutVT) + return SDValue(); + return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT, N0)); +} - EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale); - EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale); - // Never create an illegal type. Only create unsupported operations if we - // are pre-legalization. - if (TLI.isTypeLegal(OutVT)) - if (!LegalOperations || - TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT)) - return DAG.getBitcast(VT, - DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, - SDLoc(SVN), OutVT, N0)); - } +// Match shuffles that can be converted to zero_extend_vector_inreg. +// This is often generated during legalization. +// e.g. v4i32 <0,z,1,u> -> (v2i64 zero_extend_vector_inreg(v4i32 src)) +static SDValue combineShuffleToZeroExtendVectorInReg(ShuffleVectorSDNode *SVN, + SelectionDAG &DAG, + const TargetLowering &TLI, + bool LegalOperations) { + bool LegalTypes = true; + EVT VT = SVN->getValueType(0); + assert(!VT.isScalableVector() && "Encountered scalable shuffle?"); + unsigned NumElts = VT.getVectorNumElements(); + unsigned EltSizeInBits = VT.getScalarSizeInBits(); + + // TODO: add support for big-endian when we have a test case. + bool IsBigEndian = DAG.getDataLayout().isBigEndian(); + if (!VT.isInteger() || IsBigEndian) + return SDValue(); + SmallVector<int, 16> Mask(SVN->getMask().begin(), SVN->getMask().end()); + auto ForEachDecomposedIndice = [NumElts, &Mask](auto Fn) { + for (int &Indice : Mask) { + if (Indice < 0) + continue; + int OpIdx = (unsigned)Indice < NumElts ? 0 : 1; + int OpEltIdx = (unsigned)Indice < NumElts ? Indice : Indice - NumElts; + Fn(Indice, OpIdx, OpEltIdx); + } + }; + + // Which elements of which operand does this shuffle demand? + std::array<APInt, 2> OpsDemandedElts; + for (APInt &OpDemandedElts : OpsDemandedElts) + OpDemandedElts = APInt::getZero(NumElts); + ForEachDecomposedIndice( + [&OpsDemandedElts](int &Indice, int OpIdx, int OpEltIdx) { + OpsDemandedElts[OpIdx].setBit(OpEltIdx); + }); + + // Element-wise(!), which of these demanded elements are know to be zero? + std::array<APInt, 2> OpsKnownZeroElts; + for (auto I : zip(SVN->ops(), OpsDemandedElts, OpsKnownZeroElts)) + std::get<2>(I) = + DAG.computeVectorKnownZeroElements(std::get<0>(I), std::get<1>(I)); + + // Manifest zeroable element knowledge in the shuffle mask. + // NOTE: we don't have 'zeroable' sentinel value in generic DAG, + // this is a local invention, but it won't leak into DAG. + // FIXME: should we not manifest them, but just check when matching? + bool HadZeroableElts = false; + ForEachDecomposedIndice([&OpsKnownZeroElts, &HadZeroableElts]( + int &Indice, int OpIdx, int OpEltIdx) { + if (OpsKnownZeroElts[OpIdx][OpEltIdx]) { + Indice = -2; // Zeroable element. + HadZeroableElts = true; + } + }); + + // Don't proceed unless we've refined at least one zeroable mask indice. + // If we didn't, then we are still trying to match the same shuffle mask + // we previously tried to match as ISD::ANY_EXTEND_VECTOR_INREG, + // and evidently failed. Proceeding will lead to endless combine loops. + if (!HadZeroableElts) + return SDValue(); + + // The shuffle may be more fine-grained than we want. Widen elements first. + // FIXME: should we do this before manifesting zeroable shuffle mask indices? + SmallVector<int, 16> ScaledMask; + getShuffleMaskWithWidestElts(Mask, ScaledMask); + assert(Mask.size() >= ScaledMask.size() && + Mask.size() % ScaledMask.size() == 0 && "Unexpected mask widening."); + int Prescale = Mask.size() / ScaledMask.size(); + + NumElts = ScaledMask.size(); + EltSizeInBits *= Prescale; + + EVT PrescaledVT = EVT::getVectorVT( + *DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits), + NumElts); + + if (LegalTypes && !TLI.isTypeLegal(PrescaledVT) && TLI.isTypeLegal(VT)) + return SDValue(); + + // For example, + // shuffle<0,z,1,-1> == (v2i64 zero_extend_vector_inreg(v4i32)) + // But not shuffle<z,z,1,-1> and not shuffle<0,z,z,-1> ! (for same types) + auto isZeroExtend = [NumElts, &ScaledMask](unsigned Scale) { + assert(Scale >= 2 && Scale <= NumElts && NumElts % Scale == 0 && + "Unexpected mask scaling factor."); + ArrayRef<int> Mask = ScaledMask; + for (unsigned SrcElt = 0, NumSrcElts = NumElts / Scale; + SrcElt != NumSrcElts; ++SrcElt) { + // Analyze the shuffle mask in Scale-sized chunks. + ArrayRef<int> MaskChunk = Mask.take_front(Scale); + assert(MaskChunk.size() == Scale && "Unexpected mask size."); + Mask = Mask.drop_front(MaskChunk.size()); + // The first indice in this chunk must be SrcElt, but not zero! + // FIXME: undef should be fine, but that results in more-defined result. + if (int FirstIndice = MaskChunk[0]; (unsigned)FirstIndice != SrcElt) + return false; + // The rest of the indices in this chunk must be zeros. + // FIXME: undef should be fine, but that results in more-defined result. + if (!all_of(MaskChunk.drop_front(1), + [](int Indice) { return Indice == -2; })) + return false; + } + assert(Mask.empty() && "Did not process the whole mask?"); + return true; + }; + + unsigned Opcode = ISD::ZERO_EXTEND_VECTOR_INREG; + for (bool Commuted : {false, true}) { + SDValue Op = SVN->getOperand(!Commuted ? 0 : 1); + if (Commuted) + ShuffleVectorSDNode::commuteMask(ScaledMask); + std::optional<EVT> OutVT = canCombineShuffleToExtendVectorInreg( + Opcode, PrescaledVT, isZeroExtend, DAG, TLI, LegalTypes, + LegalOperations); + if (OutVT) + return DAG.getBitcast(VT, DAG.getNode(Opcode, SDLoc(SVN), *OutVT, + DAG.getBitcast(PrescaledVT, Op))); + } return SDValue(); } @@ -22174,9 +23501,52 @@ static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN, // the masks of the shuffles. static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG) { + EVT VT = Shuf->getValueType(0); + unsigned NumElts = VT.getVectorNumElements(); + if (!Shuf->getOperand(1).isUndef()) return SDValue(); + // See if this unary non-splat shuffle actually *is* a splat shuffle, + // in disguise, with all demanded elements being identical. + // FIXME: this can be done per-operand. + if (!Shuf->isSplat()) { + APInt DemandedElts(NumElts, 0); + for (int Idx : Shuf->getMask()) { + if (Idx < 0) + continue; // Ignore sentinel indices. + assert((unsigned)Idx < NumElts && "Out-of-bounds shuffle indice?"); + DemandedElts.setBit(Idx); + } + assert(DemandedElts.countPopulation() > 1 && "Is a splat shuffle already?"); + APInt UndefElts; + if (DAG.isSplatValue(Shuf->getOperand(0), DemandedElts, UndefElts)) { + // Even if all demanded elements are splat, some of them could be undef. + // Which lowest demanded element is *not* known-undef? + std::optional<unsigned> MinNonUndefIdx; + for (int Idx : Shuf->getMask()) { + if (Idx < 0 || UndefElts[Idx]) + continue; // Ignore sentinel indices, and undef elements. + MinNonUndefIdx = std::min<unsigned>(Idx, MinNonUndefIdx.value_or(~0U)); + } + if (!MinNonUndefIdx) + return DAG.getUNDEF(VT); // All undef - result is undef. + assert(*MinNonUndefIdx < NumElts && "Expected valid element index."); + SmallVector<int, 8> SplatMask(Shuf->getMask().begin(), + Shuf->getMask().end()); + for (int &Idx : SplatMask) { + if (Idx < 0) + continue; // Passthrough sentinel indices. + // Otherwise, just pick the lowest demanded non-undef element. + // Or sentinel undef, if we know we'd pick a known-undef element. + Idx = UndefElts[Idx] ? -1 : *MinNonUndefIdx; + } + assert(SplatMask != Shuf->getMask() && "Expected mask to change!"); + return DAG.getVectorShuffle(VT, SDLoc(Shuf), Shuf->getOperand(0), + Shuf->getOperand(1), SplatMask); + } + } + // If the inner operand is a known splat with no undefs, just return that directly. // TODO: Create DemandedElts mask from Shuf's mask. // TODO: Allow undef elements and merge with the shuffle code below. @@ -22360,7 +23730,7 @@ static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf, // First, check if we are taking one element of a vector and shuffling that // element into another vector. ArrayRef<int> Mask = Shuf->getMask(); - SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end()); + SmallVector<int, 16> CommutedMask(Mask); SDValue Op0 = Shuf->getOperand(0); SDValue Op1 = Shuf->getOperand(1); int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask); @@ -22514,6 +23884,23 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { if (auto *Idx = dyn_cast<ConstantSDNode>(N0.getOperand(2))) if (Idx->getAPIntValue() == SplatIndex) return DAG.getSplatBuildVector(VT, SDLoc(N), N0.getOperand(1)); + + // Look through a bitcast if LE and splatting lane 0, through to a + // scalar_to_vector or a build_vector. + if (N0.getOpcode() == ISD::BITCAST && N0.getOperand(0).hasOneUse() && + SplatIndex == 0 && DAG.getDataLayout().isLittleEndian() && + (N0.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR || + N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR)) { + EVT N00VT = N0.getOperand(0).getValueType(); + if (VT.getScalarSizeInBits() <= N00VT.getScalarSizeInBits() && + VT.isInteger() && N00VT.isInteger()) { + EVT InVT = + TLI.getTypeToTransformTo(*DAG.getContext(), VT.getScalarType()); + SDValue Op = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), + SDLoc(N), InVT); + return DAG.getSplatBuildVector(VT, SDLoc(N), Op); + } + } } // If this is a bit convert that changes the element type of the vector but @@ -22574,7 +23961,8 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return ShufOp; // Match shuffles that can be converted to any_vector_extend_in_reg. - if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations)) + if (SDValue V = + combineShuffleToAnyExtendVectorInreg(SVN, DAG, TLI, LegalOperations)) return V; // Combine "truncate_vector_in_reg" style shuffles. @@ -22671,7 +24059,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { if (SDValue InsertN1 = ShuffleToInsert(N0, N1, Mask)) return InsertN1; if (N0.getOpcode() == ISD::CONCAT_VECTORS) { - SmallVector<int> CommuteMask(Mask.begin(), Mask.end()); + SmallVector<int> CommuteMask(Mask); ShuffleVectorSDNode::commuteMask(CommuteMask); if (SDValue InsertN0 = ShuffleToInsert(N1, N0, CommuteMask)) return InsertN0; @@ -22707,24 +24095,31 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { SDLoc DL(N); EVT IntVT = VT.changeVectorElementTypeToInteger(); EVT IntSVT = VT.getVectorElementType().changeTypeToInteger(); - SDValue ZeroElt = DAG.getConstant(0, DL, IntSVT); - SDValue AllOnesElt = DAG.getAllOnesConstant(DL, IntSVT); - SmallVector<SDValue, 16> AndMask(NumElts, DAG.getUNDEF(IntSVT)); - for (int I = 0; I != (int)NumElts; ++I) - if (0 <= Mask[I]) - AndMask[I] = Mask[I] == I ? AllOnesElt : ZeroElt; - - // See if a clear mask is legal instead of going via - // XformToShuffleWithZero which loses UNDEF mask elements. - if (TLI.isVectorClearMaskLegal(ClearMask, IntVT)) - return DAG.getBitcast( - VT, DAG.getVectorShuffle(IntVT, DL, DAG.getBitcast(IntVT, N0), - DAG.getConstant(0, DL, IntVT), ClearMask)); + // Transform the type to a legal type so that the buildvector constant + // elements are not illegal. Make sure that the result is larger than the + // original type, incase the value is split into two (eg i64->i32). + if (!TLI.isTypeLegal(IntSVT) && LegalTypes) + IntSVT = TLI.getTypeToTransformTo(*DAG.getContext(), IntSVT); + if (IntSVT.getSizeInBits() >= IntVT.getScalarSizeInBits()) { + SDValue ZeroElt = DAG.getConstant(0, DL, IntSVT); + SDValue AllOnesElt = DAG.getAllOnesConstant(DL, IntSVT); + SmallVector<SDValue, 16> AndMask(NumElts, DAG.getUNDEF(IntSVT)); + for (int I = 0; I != (int)NumElts; ++I) + if (0 <= Mask[I]) + AndMask[I] = Mask[I] == I ? AllOnesElt : ZeroElt; + + // See if a clear mask is legal instead of going via + // XformToShuffleWithZero which loses UNDEF mask elements. + if (TLI.isVectorClearMaskLegal(ClearMask, IntVT)) + return DAG.getBitcast( + VT, DAG.getVectorShuffle(IntVT, DL, DAG.getBitcast(IntVT, N0), + DAG.getConstant(0, DL, IntVT), ClearMask)); - if (TLI.isOperationLegalOrCustom(ISD::AND, IntVT)) - return DAG.getBitcast( - VT, DAG.getNode(ISD::AND, DL, IntVT, DAG.getBitcast(IntVT, N0), - DAG.getBuildVector(IntVT, DL, AndMask))); + if (TLI.isOperationLegalOrCustom(ISD::AND, IntVT)) + return DAG.getBitcast( + VT, DAG.getNode(ISD::AND, DL, IntVT, DAG.getBitcast(IntVT, N0), + DAG.getBuildVector(IntVT, DL, AndMask))); + } } } @@ -23053,55 +24448,101 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG)) return V; + // Match shuffles that can be converted to ISD::ZERO_EXTEND_VECTOR_INREG. + // Perform this really late, because it could eliminate knowledge + // of undef elements created by this shuffle. + if (Level < AfterLegalizeTypes) + if (SDValue V = combineShuffleToZeroExtendVectorInReg(SVN, DAG, TLI, + LegalOperations)) + return V; + return SDValue(); } SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) { - SDValue InVal = N->getOperand(0); EVT VT = N->getValueType(0); + if (!VT.isFixedLengthVector()) + return SDValue(); + + // Try to convert a scalar binop with an extracted vector element to a vector + // binop. This is intended to reduce potentially expensive register moves. + // TODO: Check if both operands are extracted. + // TODO: Generalize this, so it can be called from visitINSERT_VECTOR_ELT(). + SDValue Scalar = N->getOperand(0); + unsigned Opcode = Scalar.getOpcode(); + EVT VecEltVT = VT.getScalarType(); + if (Scalar.hasOneUse() && Scalar->getNumValues() == 1 && + TLI.isBinOp(Opcode) && Scalar.getValueType() == VecEltVT && + Scalar.getOperand(0).getValueType() == VecEltVT && + Scalar.getOperand(1).getValueType() == VecEltVT && + DAG.isSafeToSpeculativelyExecute(Opcode) && hasOperation(Opcode, VT)) { + // Match an extract element and get a shuffle mask equivalent. + SmallVector<int, 8> ShufMask(VT.getVectorNumElements(), -1); + + for (int i : {0, 1}) { + // s2v (bo (extelt V, Idx), C) --> shuffle (bo V, C'), {Idx, -1, -1...} + // s2v (bo C, (extelt V, Idx)) --> shuffle (bo C', V), {Idx, -1, -1...} + SDValue EE = Scalar.getOperand(i); + auto *C = dyn_cast<ConstantSDNode>(Scalar.getOperand(i ? 0 : 1)); + if (C && EE.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + EE.getOperand(0).getValueType() == VT && + isa<ConstantSDNode>(EE.getOperand(1))) { + // Mask = {ExtractIndex, undef, undef....} + ShufMask[0] = EE.getConstantOperandVal(1); + // Make sure the shuffle is legal if we are crossing lanes. + if (TLI.isShuffleMaskLegal(ShufMask, VT)) { + SDLoc DL(N); + SDValue V[] = {EE.getOperand(0), + DAG.getConstant(C->getAPIntValue(), DL, VT)}; + SDValue VecBO = DAG.getNode(Opcode, DL, VT, V[i], V[1 - i]); + return DAG.getVectorShuffle(VT, DL, VecBO, DAG.getUNDEF(VT), + ShufMask); + } + } + } + } // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern // with a VECTOR_SHUFFLE and possible truncate. - if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT && - VT.isFixedLengthVector() && - InVal->getOperand(0).getValueType().isFixedLengthVector()) { - SDValue InVec = InVal->getOperand(0); - SDValue EltNo = InVal->getOperand(1); - auto InVecT = InVec.getValueType(); - if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) { - SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1); - int Elt = C0->getZExtValue(); - NewMask[0] = Elt; - // If we have an implict truncate do truncate here as long as it's legal. - // if it's not legal, this should - if (VT.getScalarType() != InVal.getValueType() && - InVal.getValueType().isScalarInteger() && - isTypeLegal(VT.getScalarType())) { - SDValue Val = - DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal); - return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val); - } - if (VT.getScalarType() == InVecT.getScalarType() && - VT.getVectorNumElements() <= InVecT.getVectorNumElements()) { - SDValue LegalShuffle = - TLI.buildLegalVectorShuffle(InVecT, SDLoc(N), InVec, - DAG.getUNDEF(InVecT), NewMask, DAG); - if (LegalShuffle) { - // If the initial vector is the correct size this shuffle is a - // valid result. - if (VT == InVecT) - return LegalShuffle; - // If not we must truncate the vector. - if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) { - SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N)); - EVT SubVT = EVT::getVectorVT(*DAG.getContext(), - InVecT.getVectorElementType(), - VT.getVectorNumElements()); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, - LegalShuffle, ZeroIdx); - } - } - } + if (Opcode != ISD::EXTRACT_VECTOR_ELT || + !Scalar.getOperand(0).getValueType().isFixedLengthVector()) + return SDValue(); + + // If we have an implicit truncate, truncate here if it is legal. + if (VecEltVT != Scalar.getValueType() && + Scalar.getValueType().isScalarInteger() && isTypeLegal(VecEltVT)) { + SDValue Val = DAG.getNode(ISD::TRUNCATE, SDLoc(Scalar), VecEltVT, Scalar); + return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val); + } + + auto *ExtIndexC = dyn_cast<ConstantSDNode>(Scalar.getOperand(1)); + if (!ExtIndexC) + return SDValue(); + + SDValue SrcVec = Scalar.getOperand(0); + EVT SrcVT = SrcVec.getValueType(); + unsigned SrcNumElts = SrcVT.getVectorNumElements(); + unsigned VTNumElts = VT.getVectorNumElements(); + if (VecEltVT == SrcVT.getScalarType() && VTNumElts <= SrcNumElts) { + // Create a shuffle equivalent for scalar-to-vector: {ExtIndex, -1, -1, ...} + SmallVector<int, 8> Mask(SrcNumElts, -1); + Mask[0] = ExtIndexC->getZExtValue(); + SDValue LegalShuffle = TLI.buildLegalVectorShuffle( + SrcVT, SDLoc(N), SrcVec, DAG.getUNDEF(SrcVT), Mask, DAG); + if (!LegalShuffle) + return SDValue(); + + // If the initial vector is the same size, the shuffle is the result. + if (VT == SrcVT) + return LegalShuffle; + + // If not, shorten the shuffled vector. + if (VTNumElts != SrcNumElts) { + SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N)); + EVT SubVT = EVT::getVectorVT(*DAG.getContext(), + SrcVT.getVectorElementType(), VTNumElts); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, LegalShuffle, + ZeroIdx); } } @@ -23331,6 +24772,15 @@ SDValue DAGCombiner::visitVECREDUCE(SDNode *N) { } SDValue DAGCombiner::visitVPOp(SDNode *N) { + + if (N->getOpcode() == ISD::VP_GATHER) + if (SDValue SD = visitVPGATHER(N)) + return SD; + + if (N->getOpcode() == ISD::VP_SCATTER) + if (SDValue SD = visitVPSCATTER(N)) + return SD; + // VP operations in which all vector elements are disabled - either by // determining that the mask is all false or that the EVL is 0 - can be // eliminated. @@ -23499,10 +24949,40 @@ static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG, } // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index - if (VT.isScalableVector()) - return DAG.getSplatVector(VT, DL, ScalarBO); - SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO); - return DAG.getBuildVector(VT, DL, Ops); + return DAG.getSplat(VT, DL, ScalarBO); +} + +/// Visit a vector cast operation, like FP_EXTEND. +SDValue DAGCombiner::SimplifyVCastOp(SDNode *N, const SDLoc &DL) { + EVT VT = N->getValueType(0); + assert(VT.isVector() && "SimplifyVCastOp only works on vectors!"); + EVT EltVT = VT.getVectorElementType(); + unsigned Opcode = N->getOpcode(); + + SDValue N0 = N->getOperand(0); + EVT SrcVT = N0->getValueType(0); + EVT SrcEltVT = SrcVT.getVectorElementType(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // TODO: promote operation might be also good here? + int Index0; + SDValue Src0 = DAG.getSplatSourceVector(N0, Index0); + if (Src0 && + (N0.getOpcode() == ISD::SPLAT_VECTOR || + TLI.isExtractVecEltCheap(VT, Index0)) && + TLI.isOperationLegalOrCustom(Opcode, EltVT) && + TLI.preferScalarizeSplat(Opcode)) { + SDValue IndexC = DAG.getVectorIdxConstant(Index0, DL); + SDValue Elt = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcEltVT, Src0, IndexC); + SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, Elt, N->getFlags()); + if (VT.isScalableVector()) + return DAG.getSplatVector(VT, DL, ScalarBO); + SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO); + return DAG.getBuildVector(VT, DL, Ops); + } + + return SDValue(); } /// Visit a binary vector operation, like ADD. @@ -23522,9 +25002,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) { // same types of operations that are in the original sequence. We do have to // restrict ops like integer div that have immediate UB (eg, div-by-zero) // though. This code is adapted from the identical transform in instcombine. - if (Opcode != ISD::UDIV && Opcode != ISD::SDIV && - Opcode != ISD::UREM && Opcode != ISD::SREM && - Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) { + if (DAG.isSafeToSpeculativelyExecute(Opcode)) { auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS); auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS); if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) && @@ -23542,7 +25020,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) { // demanded elements analysis. It is further limited to not change a splat // of an inserted scalar because that may be optimized better by // load-folding or other target-specific behaviors. - if (isConstOrConstSplat(RHS) && Shuf0 && is_splat(Shuf0->getMask()) && + if (isConstOrConstSplat(RHS) && Shuf0 && all_equal(Shuf0->getMask()) && Shuf0->hasOneUse() && Shuf0->getOperand(1).isUndef() && Shuf0->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) { // binop (splat X), (splat C) --> splat (binop X, C) @@ -23551,7 +25029,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) { return DAG.getVectorShuffle(VT, DL, NewBinOp, DAG.getUNDEF(VT), Shuf0->getMask()); } - if (isConstOrConstSplat(LHS) && Shuf1 && is_splat(Shuf1->getMask()) && + if (isConstOrConstSplat(LHS) && Shuf1 && all_equal(Shuf1->getMask()) && Shuf1->hasOneUse() && Shuf1->getOperand(1).isUndef() && Shuf1->getOperand(0).getOpcode() != ISD::INSERT_VECTOR_ELT) { // binop (splat C), (splat X) --> splat (binop C, X) @@ -23624,7 +25102,8 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N, const SDLoc &DL) { SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2) { - assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!"); + assert(N0.getOpcode() == ISD::SETCC && + "First argument must be a SetCC node!"); SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2, cast<CondCodeSDNode>(N0.getOperand(2))->get()); @@ -24099,7 +25578,7 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC)) return V; - // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A) + // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (sra (shl x)) A) // where y is has a single bit set. // A plaintext description would be, we can turn the SELECT_CC into an AND // when the condition can be materialized as an all-ones register. Any @@ -24550,7 +26029,7 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const { bool IsAtomic; SDValue BasePtr; int64_t Offset; - Optional<int64_t> NumBytes; + std::optional<int64_t> NumBytes; MachineMemOperand *MMO; }; @@ -24565,21 +26044,26 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const { : 0; uint64_t Size = MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize()); - return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(), + return {LSN->isVolatile(), + LSN->isAtomic(), + LSN->getBasePtr(), Offset /*base offset*/, - Optional<int64_t>(Size), + std::optional<int64_t>(Size), LSN->getMemOperand()}; } if (const auto *LN = cast<LifetimeSDNode>(N)) - return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1), + return {false /*isVolatile*/, + /*isAtomic*/ false, + LN->getOperand(1), (LN->hasOffset()) ? LN->getOffset() : 0, - (LN->hasOffset()) ? Optional<int64_t>(LN->getSize()) - : Optional<int64_t>(), + (LN->hasOffset()) ? std::optional<int64_t>(LN->getSize()) + : std::optional<int64_t>(), (MachineMemOperand *)nullptr}; // Default. - return {false /*isvolatile*/, /*isAtomic*/ false, SDValue(), - (int64_t)0 /*offset*/, - Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr}; + return {false /*isvolatile*/, + /*isAtomic*/ false, SDValue(), + (int64_t)0 /*offset*/, std::optional<int64_t>() /*size*/, + (MachineMemOperand *)nullptr}; }; MemUseCharacteristics MUC0 = getCharacteristics(Op0), @@ -24806,13 +26290,6 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { return DAG.getTokenFactor(SDLoc(N), Aliases); } -namespace { -// TODO: Replace with with std::monostate when we move to C++17. -struct UnitT { } Unit; -bool operator==(const UnitT &, const UnitT &) { return true; } -bool operator!=(const UnitT &, const UnitT &) { return false; } -} // namespace - // This function tries to collect a bunch of potentially interesting // nodes to improve the chains of, all at once. This might seem // redundant, as this function gets called when visiting every store @@ -24833,8 +26310,8 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) { // the common case, every store writes to the immediately previous address // space and thus merged with the previous interval at insertion time. - using IMap = - llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>; + using IMap = llvm::IntervalMap<int64_t, std::monostate, 8, + IntervalMapHalfOpenInfo<int64_t>>; IMap::Allocator A; IMap Intervals(A); @@ -24861,7 +26338,8 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) { return false; // Add ST's interval. - Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit); + Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, + std::monostate{}); while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) { if (Chain->getMemoryVT().isScalableVector()) @@ -24890,7 +26368,7 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) { // If there's a previous interval, we should start after it. if (I != Intervals.begin() && (--I).stop() <= Offset) break; - Intervals.insert(Offset, Offset + Length, Unit); + Intervals.insert(Offset, Offset + Length, std::monostate{}); ChainedStores.push_back(Chain); STChain = Chain; diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index ff5779967e22..2f2ae6e29855 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -42,7 +42,6 @@ #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APSInt.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" @@ -105,6 +104,7 @@ #include <cassert> #include <cstdint> #include <iterator> +#include <optional> #include <utility> using namespace llvm; @@ -319,7 +319,7 @@ Register FastISel::materializeConstant(const Value *V, MVT VT) { Reg = lookUpRegForValue(Op); } else if (isa<UndefValue>(V)) { Reg = createResultReg(TLI.getRegClassFor(VT)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::IMPLICIT_DEF), Reg); } return Reg; @@ -405,11 +405,6 @@ void FastISel::recomputeInsertPt() { ++FuncInfo.InsertPt; } else FuncInfo.InsertPt = FuncInfo.MBB->getFirstNonPHI(); - - // Now skip past any EH_LABELs, which must remain at the beginning. - while (FuncInfo.InsertPt != FuncInfo.MBB->end() && - FuncInfo.InsertPt->getOpcode() == TargetOpcode::EH_LABEL) - ++FuncInfo.InsertPt; } void FastISel::removeDeadCode(MachineBasicBlock::iterator I, @@ -696,20 +691,20 @@ bool FastISel::selectStackmap(const CallInst *I) { // Issue CALLSEQ_START unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); auto Builder = - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackDown)); const MCInstrDesc &MCID = Builder.getInstr()->getDesc(); for (unsigned I = 0, E = MCID.getNumOperands(); I < E; ++I) Builder.addImm(0); // Issue STACKMAP. - MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::STACKMAP)); for (auto const &MO : Ops) MIB.add(MO); // Issue CALLSEQ_END unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp)) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackUp)) .addImm(0) .addImm(0); @@ -878,7 +873,7 @@ bool FastISel::selectPatchpoint(const CallInst *I) { /*isImp=*/true)); // Insert the patchpoint instruction before the call generated by the target. - MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, CLI.Call, DbgLoc, + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, CLI.Call, MIMD, TII.get(TargetOpcode::PATCHPOINT)); for (auto &MO : Ops) @@ -907,7 +902,7 @@ bool FastISel::selectXRayCustomEvent(const CallInst *I) { Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(1)), /*isDef=*/false)); MachineInstrBuilder MIB = - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::PATCHABLE_EVENT_CALL)); for (auto &MO : Ops) MIB.add(MO); @@ -928,7 +923,7 @@ bool FastISel::selectXRayTypedEvent(const CallInst *I) { Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(2)), /*isDef=*/false)); MachineInstrBuilder MIB = - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::PATCHABLE_TYPED_EVENT_CALL)); for (auto &MO : Ops) MIB.add(MO); @@ -1139,9 +1134,8 @@ bool FastISel::lowerCall(const CallInst *CI) { bool IsTailCall = CI->isTailCall(); if (IsTailCall && !isInTailCallPosition(*CI, TM)) IsTailCall = false; - if (IsTailCall && MF->getFunction() - .getFnAttribute("disable-tail-calls") - .getValueAsBool()) + if (IsTailCall && !CI->isMustTailCall() && + MF->getFunction().getFnAttribute("disable-tail-calls").getValueAsBool()) IsTailCall = false; CallLoweringInfo CLI; @@ -1171,7 +1165,7 @@ bool FastISel::selectCall(const User *I) { ExtraInfo |= InlineAsm::Extra_IsConvergent; ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect; - MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::INLINEASM)); MIB.addExternalSymbol(IA->getAsmString().c_str()); MIB.addImm(ExtraInfo); @@ -1229,7 +1223,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { if (Arg && FuncInfo.getArgumentFrameIndex(Arg) != INT_MAX) return true; - Optional<MachineOperand> Op; + std::optional<MachineOperand> Op; if (Register Reg = lookUpRegForValue(Address)) Op = MachineOperand::CreateReg(Reg, false); @@ -1251,24 +1245,24 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { false); if (Op) { - assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) && + assert(DI->getVariable()->isValidLocationForIntrinsic(MIMD.getDL()) && "Expected inlined-at fields to agree"); - // A dbg.declare describes the address of a source variable, so lower it - // into an indirect DBG_VALUE. - auto Builder = - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true, *Op, - DI->getVariable(), DI->getExpression()); - - // If using instruction referencing, mutate this into a DBG_INSTR_REF, - // to be later patched up by finalizeDebugInstrRefs. Tack a deref onto - // the expression, we don't have an "indirect" flag in DBG_INSTR_REF. - if (UseInstrRefDebugInfo && Op->isReg()) { - Builder->setDesc(TII.get(TargetOpcode::DBG_INSTR_REF)); - Builder->getOperand(1).ChangeToImmediate(0); - auto *NewExpr = - DIExpression::prepend(DI->getExpression(), DIExpression::DerefBefore); - Builder->getOperand(3).setMetadata(NewExpr); + if (FuncInfo.MF->useDebugInstrRef() && Op->isReg()) { + // If using instruction referencing, produce this as a DBG_INSTR_REF, + // to be later patched up by finalizeDebugInstrRefs. Tack a deref onto + // the expression, we don't have an "indirect" flag in DBG_INSTR_REF. + SmallVector<uint64_t, 3> Ops( + {dwarf::DW_OP_LLVM_arg, 0, dwarf::DW_OP_deref}); + auto *NewExpr = DIExpression::prependOpcodes(DI->getExpression(), Ops); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD.getDL(), + TII.get(TargetOpcode::DBG_INSTR_REF), /*IsIndirect*/ false, *Op, + DI->getVariable(), NewExpr); + } else { + // A dbg.declare describes the address of a source variable, so lower it + // into an indirect DBG_VALUE. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD.getDL(), + TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true, *Op, + DI->getVariable(), DI->getExpression()); } } else { // We can't yet handle anything else here because it would require @@ -1283,12 +1277,12 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { const DbgValueInst *DI = cast<DbgValueInst>(II); const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); const Value *V = DI->getValue(); - assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) && + assert(DI->getVariable()->isValidLocationForIntrinsic(MIMD.getDL()) && "Expected inlined-at fields to agree"); if (!V || isa<UndefValue>(V) || DI->hasArgList()) { // DI is either undef or cannot produce a valid DBG_VALUE, so produce an // undef DBG_VALUE to terminate any prior location. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, false, 0U, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD.getDL(), II, false, 0U, DI->getVariable(), DI->getExpression()); } else if (const auto *CI = dyn_cast<ConstantInt>(V)) { // See if there's an expression to constant-fold. @@ -1296,35 +1290,42 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { if (Expr) std::tie(Expr, CI) = Expr->constantFold(CI); if (CI->getBitWidth() > 64) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) .addCImm(CI) .addImm(0U) .addMetadata(DI->getVariable()) .addMetadata(Expr); else - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) .addImm(CI->getZExtValue()) .addImm(0U) .addMetadata(DI->getVariable()) .addMetadata(Expr); } else if (const auto *CF = dyn_cast<ConstantFP>(V)) { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) .addFPImm(CF) .addImm(0U) .addMetadata(DI->getVariable()) .addMetadata(DI->getExpression()); } else if (Register Reg = lookUpRegForValue(V)) { // FIXME: This does not handle register-indirect values at offset 0. - bool IsIndirect = false; - auto Builder = - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, IsIndirect, Reg, - DI->getVariable(), DI->getExpression()); - - // If using instruction referencing, mutate this into a DBG_INSTR_REF, - // to be later patched up by finalizeDebugInstrRefs. - if (UseInstrRefDebugInfo) { - Builder->setDesc(TII.get(TargetOpcode::DBG_INSTR_REF)); - Builder->getOperand(1).ChangeToImmediate(0); + if (!FuncInfo.MF->useDebugInstrRef()) { + bool IsIndirect = false; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD.getDL(), II, IsIndirect, + Reg, DI->getVariable(), DI->getExpression()); + } else { + // If using instruction referencing, produce this as a DBG_INSTR_REF, + // to be later patched up by finalizeDebugInstrRefs. + SmallVector<MachineOperand, 1> MOs({MachineOperand::CreateReg( + /* Reg */ Reg, /* isDef */ false, /* isImp */ false, + /* isKill */ false, /* isDead */ false, + /* isUndef */ false, /* isEarlyClobber */ false, + /* SubReg */ 0, /* isDebug */ true)}); + SmallVector<uint64_t, 2> Ops({dwarf::DW_OP_LLVM_arg, 0}); + auto *NewExpr = DIExpression::prependOpcodes(DI->getExpression(), Ops); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD.getDL(), + TII.get(TargetOpcode::DBG_INSTR_REF), /*IsIndirect*/ false, MOs, + DI->getVariable(), NewExpr); } } else { // We don't know how to handle other cases, so we drop. @@ -1340,7 +1341,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { return true; } - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::DBG_LABEL)).addMetadata(DI->getLabel()); return true; } @@ -1448,7 +1449,7 @@ bool FastISel::selectFreeze(const User *I) { MVT Ty = ETy.getSimpleVT(); const TargetRegisterClass *TyRegClass = TLI.getRegClassFor(Ty); Register ResultReg = createResultReg(TyRegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY), ResultReg).addReg(Reg); updateValueMap(I, ResultReg); @@ -1500,7 +1501,7 @@ bool FastISel::selectInstruction(const Instruction *I) { if (Call->getOperandBundleAt(i).getTagID() != LLVMContext::OB_funclet) return false; - DbgLoc = I->getDebugLoc(); + MIMD = MIMetadata(*I); SavedInsertPt = FuncInfo.InsertPt; @@ -1525,7 +1526,7 @@ bool FastISel::selectInstruction(const Instruction *I) { if (!SkipTargetIndependentISel) { if (selectOperator(I, I->getOpcode())) { ++NumFastIselSuccessIndependent; - DbgLoc = DebugLoc(); + MIMD = {}; return true; } // Remove dead code. @@ -1537,7 +1538,7 @@ bool FastISel::selectInstruction(const Instruction *I) { // Next, try calling the target to attempt to handle the instruction. if (fastSelectInstruction(I)) { ++NumFastIselSuccessTarget; - DbgLoc = DebugLoc(); + MIMD = {}; return true; } // Remove dead code. @@ -1545,7 +1546,7 @@ bool FastISel::selectInstruction(const Instruction *I) { if (SavedInsertPt != FuncInfo.InsertPt) removeDeadCode(FuncInfo.InsertPt, SavedInsertPt); - DbgLoc = DebugLoc(); + MIMD = {}; // Undo phi node updates, because they will be added again by SelectionDAG. if (I->isTerminator()) { // PHI node handling may have generated local value instructions. @@ -1593,7 +1594,7 @@ void FastISel::finishCondBranch(const BasicBlock *BranchBB, FuncInfo.MBB->addSuccessorWithoutProb(TrueMBB); } - fastEmitBranch(FalseMBB, DbgLoc); + fastEmitBranch(FalseMBB, MIMD.getDL()); } /// Emit an FNeg operation. @@ -1906,7 +1907,7 @@ Register FastISel::constrainOperandRegClass(const MCInstrDesc &II, Register Op, // If it's not legal to COPY between the register classes, something // has gone very wrong before we got here. Register NewOp = createResultReg(RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY), NewOp).addReg(Op); return NewOp; } @@ -1919,7 +1920,7 @@ Register FastISel::fastEmitInst_(unsigned MachineInstOpcode, Register ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg); return ResultReg; } @@ -1931,13 +1932,14 @@ Register FastISel::fastEmitInst_r(unsigned MachineInstOpcode, Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) .addReg(Op0); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) .addReg(Op0); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY), + ResultReg) + .addReg(II.implicit_defs()[0]); } return ResultReg; @@ -1953,15 +1955,16 @@ Register FastISel::fastEmitInst_rr(unsigned MachineInstOpcode, Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1); if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) .addReg(Op0) .addReg(Op1); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) .addReg(Op0) .addReg(Op1); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY), + ResultReg) + .addReg(II.implicit_defs()[0]); } return ResultReg; } @@ -1977,17 +1980,18 @@ Register FastISel::fastEmitInst_rrr(unsigned MachineInstOpcode, Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 2); if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) .addReg(Op0) .addReg(Op1) .addReg(Op2); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) .addReg(Op0) .addReg(Op1) .addReg(Op2); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY), + ResultReg) + .addReg(II.implicit_defs()[0]); } return ResultReg; } @@ -2001,15 +2005,16 @@ Register FastISel::fastEmitInst_ri(unsigned MachineInstOpcode, Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) .addReg(Op0) .addImm(Imm); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) .addReg(Op0) .addImm(Imm); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY), + ResultReg) + .addReg(II.implicit_defs()[0]); } return ResultReg; } @@ -2023,17 +2028,18 @@ Register FastISel::fastEmitInst_rii(unsigned MachineInstOpcode, Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) .addReg(Op0) .addImm(Imm1) .addImm(Imm2); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) .addReg(Op0) .addImm(Imm1) .addImm(Imm2); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY), + ResultReg) + .addReg(II.implicit_defs()[0]); } return ResultReg; } @@ -2046,13 +2052,14 @@ Register FastISel::fastEmitInst_f(unsigned MachineInstOpcode, Register ResultReg = createResultReg(RC); if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) .addFPImm(FPImm); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) .addFPImm(FPImm); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY), + ResultReg) + .addReg(II.implicit_defs()[0]); } return ResultReg; } @@ -2067,17 +2074,18 @@ Register FastISel::fastEmitInst_rri(unsigned MachineInstOpcode, Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1); if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) .addReg(Op0) .addReg(Op1) .addImm(Imm); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) .addReg(Op0) .addReg(Op1) .addImm(Imm); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY), + ResultReg) + .addReg(II.implicit_defs()[0]); } return ResultReg; } @@ -2088,12 +2096,13 @@ Register FastISel::fastEmitInst_i(unsigned MachineInstOpcode, const MCInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) .addImm(Imm); else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addImm(Imm); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addImm(Imm); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY), + ResultReg) + .addReg(II.implicit_defs()[0]); } return ResultReg; } @@ -2105,7 +2114,7 @@ Register FastISel::fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, "Cannot yet extract from physregs"); const TargetRegisterClass *RC = MRI.getRegClass(Op0); MRI.constrainRegClass(Op0, TRI.getSubClassWithSubReg(RC, Idx)); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY), ResultReg).addReg(Op0, 0, Idx); return ResultReg; } @@ -2170,9 +2179,9 @@ bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // Set the DebugLoc for the copy. Use the location of the operand if // there is one; otherwise no location, flushLocalValueMap will fix it. - DbgLoc = DebugLoc(); + MIMD = {}; if (const auto *Inst = dyn_cast<Instruction>(PHIOp)) - DbgLoc = Inst->getDebugLoc(); + MIMD = MIMetadata(*Inst); Register Reg = getRegForValue(PHIOp); if (!Reg) { @@ -2180,7 +2189,7 @@ bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { return false; } FuncInfo.PHINodesToUpdate.push_back(std::make_pair(&*MBBI++, Reg)); - DbgLoc = DebugLoc(); + MIMD = {}; } } diff --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index aa9c77f9cabf..c18cd39ed296 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -119,10 +119,6 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, } } } - if (Personality == EHPersonality::Wasm_CXX) { - WasmEHFuncInfo &EHInfo = *MF->getWasmEHFuncInfo(); - calculateWasmEHInfo(&fn, EHInfo); - } // Initialize the mapping of values to registers. This is only set up for // instruction values that are used outside of the block that defines @@ -154,7 +150,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, (TFI->isStackRealignable() || (Alignment <= StackAlign))) { const ConstantInt *CUI = cast<ConstantInt>(AI->getArraySize()); uint64_t TySize = - MF->getDataLayout().getTypeAllocSize(Ty).getKnownMinSize(); + MF->getDataLayout().getTypeAllocSize(Ty).getKnownMinValue(); TySize *= CUI->getZExtValue(); // Get total allocated size. if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects. @@ -270,7 +266,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, // be multiple MachineBasicBlocks corresponding to one BasicBlock, and only // the first one should be marked. if (BB.hasAddressTaken()) - MBB->setHasAddressTaken(); + MBB->setAddressTakenIRBlock(const_cast<BasicBlock *>(&BB)); // Mark landing pad blocks. if (BB.isEHPad()) @@ -323,10 +319,10 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, const auto *BB = CME.Handler.get<const BasicBlock *>(); CME.Handler = MBBMap[BB]; } - } - - else if (Personality == EHPersonality::Wasm_CXX) { + } else if (Personality == EHPersonality::Wasm_CXX) { WasmEHFuncInfo &EHInfo = *MF->getWasmEHFuncInfo(); + calculateWasmEHInfo(&fn, EHInfo); + // Map all BB references in the Wasm EH data to MBBs. DenseMap<BBOrMBB, BBOrMBB> SrcToUnwindDest; for (auto &KV : EHInfo.SrcToUnwindDest) { @@ -369,8 +365,7 @@ void FunctionLoweringInfo::clear() { /// CreateReg - Allocate a single virtual register for the given type. Register FunctionLoweringInfo::CreateReg(MVT VT, bool isDivergent) { - return RegInfo->createVirtualRegister( - MF->getSubtarget().getTargetLowering()->getRegClassFor(VT, isDivergent)); + return RegInfo->createVirtualRegister(TLI->getRegClassFor(VT, isDivergent)); } /// CreateRegs - Allocate the appropriate number of virtual registers of @@ -381,8 +376,6 @@ Register FunctionLoweringInfo::CreateReg(MVT VT, bool isDivergent) { /// will assign registers for each member or element. /// Register FunctionLoweringInfo::CreateRegs(Type *Ty, bool isDivergent) { - const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); - SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(*TLI, MF->getDataLayout(), Ty, ValueVTs); @@ -451,8 +444,8 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { Register DestReg = It->second; if (DestReg == 0) - return - assert(Register::isVirtualRegister(DestReg) && "Expected a virtual reg"); + return; + assert(DestReg.isVirtual() && "Expected a virtual reg"); LiveOutRegInfo.grow(DestReg); LiveOutInfo &DestLOI = LiveOutRegInfo[DestReg]; @@ -475,7 +468,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { assert(ValueMap.count(V) && "V should have been placed in ValueMap when its" "CopyToReg node was created."); Register SrcReg = ValueMap[V]; - if (!Register::isVirtualRegister(SrcReg)) { + if (!SrcReg.isVirtual()) { DestLOI.IsValid = false; return; } diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 3d3b504c6abd..338172e4e10a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -70,7 +70,7 @@ static unsigned countOperands(SDNode *Node, unsigned NumExpUses, if (isa<RegisterMaskSDNode>(Node->getOperand(I - 1))) continue; if (RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Node->getOperand(I - 1))) - if (Register::isPhysicalRegister(RN->getReg())) + if (RN->getReg().isPhysical()) continue; NumImpUses = N - I; break; @@ -81,9 +81,9 @@ static unsigned countOperands(SDNode *Node, unsigned NumExpUses, /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an /// implicit physical register output. -void InstrEmitter:: -EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, - Register SrcReg, DenseMap<SDValue, Register> &VRBaseMap) { +void InstrEmitter::EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, + Register SrcReg, + DenseMap<SDValue, Register> &VRBaseMap) { Register VRBase; if (SrcReg.isVirtual()) { // Just use the input register directly! @@ -106,51 +106,50 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, if (TLI->isTypeLegal(VT)) UseRC = TLI->getRegClassFor(VT, Node->isDivergent()); - if (!IsClone && !IsCloned) - for (SDNode *User : Node->uses()) { - bool Match = true; - if (User->getOpcode() == ISD::CopyToReg && - User->getOperand(2).getNode() == Node && - User->getOperand(2).getResNo() == ResNo) { - Register DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); - if (DestReg.isVirtual()) { - VRBase = DestReg; - Match = false; - } else if (DestReg != SrcReg) - Match = false; - } else { - for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { - SDValue Op = User->getOperand(i); - if (Op.getNode() != Node || Op.getResNo() != ResNo) - continue; - MVT VT = Node->getSimpleValueType(Op.getResNo()); - if (VT == MVT::Other || VT == MVT::Glue) - continue; - Match = false; - if (User->isMachineOpcode()) { - const MCInstrDesc &II = TII->get(User->getMachineOpcode()); - const TargetRegisterClass *RC = nullptr; - if (i+II.getNumDefs() < II.getNumOperands()) { - RC = TRI->getAllocatableClass( - TII->getRegClass(II, i+II.getNumDefs(), TRI, *MF)); - } - if (!UseRC) - UseRC = RC; - else if (RC) { - const TargetRegisterClass *ComRC = + for (SDNode *User : Node->uses()) { + bool Match = true; + if (User->getOpcode() == ISD::CopyToReg && + User->getOperand(2).getNode() == Node && + User->getOperand(2).getResNo() == ResNo) { + Register DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); + if (DestReg.isVirtual()) { + VRBase = DestReg; + Match = false; + } else if (DestReg != SrcReg) + Match = false; + } else { + for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { + SDValue Op = User->getOperand(i); + if (Op.getNode() != Node || Op.getResNo() != ResNo) + continue; + MVT VT = Node->getSimpleValueType(Op.getResNo()); + if (VT == MVT::Other || VT == MVT::Glue) + continue; + Match = false; + if (User->isMachineOpcode()) { + const MCInstrDesc &II = TII->get(User->getMachineOpcode()); + const TargetRegisterClass *RC = nullptr; + if (i + II.getNumDefs() < II.getNumOperands()) { + RC = TRI->getAllocatableClass( + TII->getRegClass(II, i + II.getNumDefs(), TRI, *MF)); + } + if (!UseRC) + UseRC = RC; + else if (RC) { + const TargetRegisterClass *ComRC = TRI->getCommonSubClass(UseRC, RC); - // If multiple uses expect disjoint register classes, we emit - // copies in AddRegisterOperand. - if (ComRC) - UseRC = ComRC; - } + // If multiple uses expect disjoint register classes, we emit + // copies in AddRegisterOperand. + if (ComRC) + UseRC = ComRC; } } } - MatchReg &= Match; - if (VRBase) - break; } + MatchReg &= Match; + if (VRBase) + break; + } const TargetRegisterClass *SrcRC = nullptr, *DstRC = nullptr; SrcRC = TRI->getMinimalPhysRegClass(SrcReg, VT); @@ -219,7 +218,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, RC = VTRC; } - if (II.OpInfo != nullptr && II.OpInfo[i].isOptionalDef()) { + if (!II.operands().empty() && II.operands()[i].isOptionalDef()) { // Optional def must be a physical register. VRBase = cast<RegisterSDNode>(Node->getOperand(i-NumResults))->getReg(); assert(VRBase.isPhysical()); @@ -231,8 +230,8 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, if (User->getOpcode() == ISD::CopyToReg && User->getOperand(2).getNode() == Node && User->getOperand(2).getResNo() == i) { - unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); - if (Register::isVirtualRegister(Reg)) { + Register Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); + if (Reg.isVirtual()) { const TargetRegisterClass *RegRC = MRI->getRegClass(Reg); if (RegRC == RC) { VRBase = Reg; @@ -305,7 +304,7 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB, const MCInstrDesc &MCID = MIB->getDesc(); bool isOptDef = IIOpNum < MCID.getNumOperands() && - MCID.OpInfo[IIOpNum].isOptionalDef(); + MCID.operands()[IIOpNum].isOptionalDef(); // If the instruction requires a register in a different class, create // a new virtual register and copy the value into it, but first attempt to @@ -395,7 +394,7 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, (IIRC && TRI->isDivergentRegClass(IIRC))) : nullptr; - if (OpRC && IIRC && OpRC != IIRC && Register::isVirtualRegister(VReg)) { + if (OpRC && IIRC && OpRC != IIRC && VReg.isVirtual()) { Register NewVReg = MRI->createVirtualRegister(IIRC); BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(), TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg); @@ -503,7 +502,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, Register Reg; MachineInstr *DefMI; RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(0)); - if (R && Register::isPhysicalRegister(R->getReg())) { + if (R && R->getReg().isPhysical()) { Reg = R->getReg(); DefMI = nullptr; } else { @@ -650,7 +649,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node, RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(i-1)); // Skip physical registers as they don't have a vreg to get and we'll // insert copies for them in TwoAddressInstructionPass anyway. - if (!R || !Register::isPhysicalRegister(R->getReg())) { + if (!R || !R->getReg().isPhysical()) { unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue(); unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap); const TargetRegisterClass *TRC = MRI->getRegClass(SubReg); @@ -678,43 +677,54 @@ void InstrEmitter::EmitRegSequence(SDNode *Node, MachineInstr * InstrEmitter::EmitDbgValue(SDDbgValue *SD, DenseMap<SDValue, Register> &VRBaseMap) { - MDNode *Var = SD->getVariable(); - MDNode *Expr = SD->getExpression(); DebugLoc DL = SD->getDebugLoc(); - assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && + assert(cast<DILocalVariable>(SD->getVariable()) + ->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); SD->setIsEmitted(); - ArrayRef<SDDbgOperand> LocationOps = SD->getLocationOps(); - assert(!LocationOps.empty() && "dbg_value with no location operands?"); + assert(!SD->getLocationOps().empty() && + "dbg_value with no location operands?"); if (SD->isInvalidated()) return EmitDbgNoLocation(SD); - // Emit variadic dbg_value nodes as DBG_VALUE_LIST. - if (SD->isVariadic()) { - // DBG_VALUE_LIST := "DBG_VALUE_LIST" var, expression, loc (, loc)* - const MCInstrDesc &DbgValDesc = TII->get(TargetOpcode::DBG_VALUE_LIST); - // Build the DBG_VALUE_LIST instruction base. - auto MIB = BuildMI(*MF, DL, DbgValDesc); - MIB.addMetadata(Var); - MIB.addMetadata(Expr); - AddDbgValueLocationOps(MIB, DbgValDesc, LocationOps, VRBaseMap); - return &*MIB; - } - // Attempt to produce a DBG_INSTR_REF if we've been asked to. - // We currently exclude the possibility of instruction references for - // variadic nodes; if at some point we enable them, this should be moved - // above the variadic block. if (EmitDebugInstrRefs) if (auto *InstrRef = EmitDbgInstrRef(SD, VRBaseMap)) return InstrRef; + // Emit variadic dbg_value nodes as DBG_VALUE_LIST if they have not been + // emitted as instruction references. + if (SD->isVariadic()) + return EmitDbgValueList(SD, VRBaseMap); + + // Emit single-location dbg_value nodes as DBG_VALUE if they have not been + // emitted as instruction references. return EmitDbgValueFromSingleOp(SD, VRBaseMap); } +MachineOperand GetMOForConstDbgOp(const SDDbgOperand &Op) { + const Value *V = Op.getConst(); + if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + if (CI->getBitWidth() > 64) + return MachineOperand::CreateCImm(CI); + return MachineOperand::CreateImm(CI->getSExtValue()); + } + if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) + return MachineOperand::CreateFPImm(CF); + // Note: This assumes that all nullptr constants are zero-valued. + if (isa<ConstantPointerNull>(V)) + return MachineOperand::CreateImm(0); + // Undef or unhandled value type, so return an undef operand. + return MachineOperand::CreateReg( + /* Reg */ 0U, /* isDef */ false, /* isImp */ false, + /* isKill */ false, /* isDead */ false, + /* isUndef */ false, /* isEarlyClobber */ false, + /* SubReg */ 0, /* isDebug */ true); +} + void InstrEmitter::AddDbgValueLocationOps( MachineInstrBuilder &MIB, const MCInstrDesc &DbgValDesc, ArrayRef<SDDbgOperand> LocationOps, @@ -740,24 +750,9 @@ void InstrEmitter::AddDbgValueLocationOps( AddOperand(MIB, V, (*MIB).getNumOperands(), &DbgValDesc, VRBaseMap, /*IsDebug=*/true, /*IsClone=*/false, /*IsCloned=*/false); } break; - case SDDbgOperand::CONST: { - const Value *V = Op.getConst(); - if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { - if (CI->getBitWidth() > 64) - MIB.addCImm(CI); - else - MIB.addImm(CI->getSExtValue()); - } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { - MIB.addFPImm(CF); - } else if (isa<ConstantPointerNull>(V)) { - // Note: This assumes that all nullptr constants are zero-valued. - MIB.addImm(0); - } else { - // Could be an Undef. In any case insert an Undef so we can see what we - // dropped. - MIB.addReg(0U); - } - } break; + case SDDbgOperand::CONST: + MIB.add(GetMOForConstDbgOp(Op)); + break; } } } @@ -765,116 +760,158 @@ void InstrEmitter::AddDbgValueLocationOps( MachineInstr * InstrEmitter::EmitDbgInstrRef(SDDbgValue *SD, DenseMap<SDValue, Register> &VRBaseMap) { - assert(!SD->isVariadic()); - SDDbgOperand DbgOperand = SD->getLocationOps()[0]; MDNode *Var = SD->getVariable(); - DIExpression *Expr = (DIExpression*)SD->getExpression(); + const DIExpression *Expr = (DIExpression *)SD->getExpression(); DebugLoc DL = SD->getDebugLoc(); const MCInstrDesc &RefII = TII->get(TargetOpcode::DBG_INSTR_REF); - // Handle variable locations that don't actually depend on the instructions - // in the program: constants and stack locations. - if (DbgOperand.getKind() == SDDbgOperand::FRAMEIX || - DbgOperand.getKind() == SDDbgOperand::CONST) + // Returns true if the given operand is not a legal debug operand for a + // DBG_INSTR_REF. + auto IsInvalidOp = [](SDDbgOperand DbgOp) { + return DbgOp.getKind() == SDDbgOperand::FRAMEIX; + }; + // Returns true if the given operand is not itself an instruction reference + // but is a legal debug operand for a DBG_INSTR_REF. + auto IsNonInstrRefOp = [](SDDbgOperand DbgOp) { + return DbgOp.getKind() == SDDbgOperand::CONST; + }; + + // If this variable location does not depend on any instructions or contains + // any stack locations, produce it as a standard debug value instead. + if (any_of(SD->getLocationOps(), IsInvalidOp) || + all_of(SD->getLocationOps(), IsNonInstrRefOp)) { + if (SD->isVariadic()) + return EmitDbgValueList(SD, VRBaseMap); return EmitDbgValueFromSingleOp(SD, VRBaseMap); + } // Immediately fold any indirectness from the LLVM-IR intrinsic into the // expression: - if (SD->isIndirect()) { - std::vector<uint64_t> Elts = {dwarf::DW_OP_deref}; - Expr = DIExpression::append(Expr, Elts); - } + if (SD->isIndirect()) + Expr = DIExpression::append(Expr, dwarf::DW_OP_deref); + // If this is not already a variadic expression, it must be modified to become + // one. + if (!SD->isVariadic()) + Expr = DIExpression::convertToVariadicExpression(Expr); + + SmallVector<MachineOperand> MOs; // It may not be immediately possible to identify the MachineInstr that // defines a VReg, it can depend for example on the order blocks are // emitted in. When this happens, or when further analysis is needed later, // produce an instruction like this: // - // DBG_INSTR_REF %0:gr64, 0, !123, !456 + // DBG_INSTR_REF !123, !456, %0:gr64 // // i.e., point the instruction at the vreg, and patch it up later in // MachineFunction::finalizeDebugInstrRefs. - auto EmitHalfDoneInstrRef = [&](unsigned VReg) -> MachineInstr * { - auto MIB = BuildMI(*MF, DL, RefII); - MIB.addReg(VReg); - MIB.addImm(0); - MIB.addMetadata(Var); - MIB.addMetadata(Expr); - return MIB; + auto AddVRegOp = [&](unsigned VReg) { + MOs.push_back(MachineOperand::CreateReg( + /* Reg */ VReg, /* isDef */ false, /* isImp */ false, + /* isKill */ false, /* isDead */ false, + /* isUndef */ false, /* isEarlyClobber */ false, + /* SubReg */ 0, /* isDebug */ true)); }; + unsigned OpCount = SD->getLocationOps().size(); + for (unsigned OpIdx = 0; OpIdx < OpCount; ++OpIdx) { + SDDbgOperand DbgOperand = SD->getLocationOps()[OpIdx]; + + // Try to find both the defined register and the instruction defining it. + MachineInstr *DefMI = nullptr; + unsigned VReg; - // Try to find both the defined register and the instruction defining it. - MachineInstr *DefMI = nullptr; - unsigned VReg; + if (DbgOperand.getKind() == SDDbgOperand::VREG) { + VReg = DbgOperand.getVReg(); - if (DbgOperand.getKind() == SDDbgOperand::VREG) { - VReg = DbgOperand.getVReg(); + // No definition means that block hasn't been emitted yet. Leave a vreg + // reference to be fixed later. + if (!MRI->hasOneDef(VReg)) { + AddVRegOp(VReg); + continue; + } - // No definition means that block hasn't been emitted yet. Leave a vreg - // reference to be fixed later. - if (!MRI->hasOneDef(VReg)) - return EmitHalfDoneInstrRef(VReg); + DefMI = &*MRI->def_instr_begin(VReg); + } else if (DbgOperand.getKind() == SDDbgOperand::SDNODE) { + // Look up the corresponding VReg for the given SDNode, if any. + SDNode *Node = DbgOperand.getSDNode(); + SDValue Op = SDValue(Node, DbgOperand.getResNo()); + DenseMap<SDValue, Register>::iterator I = VRBaseMap.find(Op); + // No VReg -> produce a DBG_VALUE $noreg instead. + if (I == VRBaseMap.end()) + break; - DefMI = &*MRI->def_instr_begin(VReg); - } else { - assert(DbgOperand.getKind() == SDDbgOperand::SDNODE); - // Look up the corresponding VReg for the given SDNode, if any. - SDNode *Node = DbgOperand.getSDNode(); - SDValue Op = SDValue(Node, DbgOperand.getResNo()); - DenseMap<SDValue, Register>::iterator I = VRBaseMap.find(Op); - // No VReg -> produce a DBG_VALUE $noreg instead. - if (I==VRBaseMap.end()) - return EmitDbgNoLocation(SD); - - // Try to pick out a defining instruction at this point. - VReg = getVR(Op, VRBaseMap); - - // Again, if there's no instruction defining the VReg right now, fix it up - // later. - if (!MRI->hasOneDef(VReg)) - return EmitHalfDoneInstrRef(VReg); - - DefMI = &*MRI->def_instr_begin(VReg); - } + // Try to pick out a defining instruction at this point. + VReg = getVR(Op, VRBaseMap); - // Avoid copy like instructions: they don't define values, only move them. - // Leave a virtual-register reference until it can be fixed up later, to find - // the underlying value definition. - if (DefMI->isCopyLike() || TII->isCopyInstr(*DefMI)) - return EmitHalfDoneInstrRef(VReg); + // Again, if there's no instruction defining the VReg right now, fix it up + // later. + if (!MRI->hasOneDef(VReg)) { + AddVRegOp(VReg); + continue; + } - auto MIB = BuildMI(*MF, DL, RefII); + DefMI = &*MRI->def_instr_begin(VReg); + } else { + assert(DbgOperand.getKind() == SDDbgOperand::CONST); + MOs.push_back(GetMOForConstDbgOp(DbgOperand)); + continue; + } - // Find the operand number which defines the specified VReg. - unsigned OperandIdx = 0; - for (const auto &MO : DefMI->operands()) { - if (MO.isReg() && MO.isDef() && MO.getReg() == VReg) - break; - ++OperandIdx; + // Avoid copy like instructions: they don't define values, only move them. + // Leave a virtual-register reference until it can be fixed up later, to + // find the underlying value definition. + if (DefMI->isCopyLike() || TII->isCopyInstr(*DefMI)) { + AddVRegOp(VReg); + continue; + } + + // Find the operand number which defines the specified VReg. + unsigned OperandIdx = 0; + for (const auto &MO : DefMI->operands()) { + if (MO.isReg() && MO.isDef() && MO.getReg() == VReg) + break; + ++OperandIdx; + } + assert(OperandIdx < DefMI->getNumOperands()); + + // Make the DBG_INSTR_REF refer to that instruction, and that operand. + unsigned InstrNum = DefMI->getDebugInstrNum(); + MOs.push_back(MachineOperand::CreateDbgInstrRef(InstrNum, OperandIdx)); } - assert(OperandIdx < DefMI->getNumOperands()); - // Make the DBG_INSTR_REF refer to that instruction, and that operand. - unsigned InstrNum = DefMI->getDebugInstrNum(); - MIB.addImm(InstrNum); - MIB.addImm(OperandIdx); - MIB.addMetadata(Var); - MIB.addMetadata(Expr); - return &*MIB; + // If we haven't created a valid MachineOperand for every DbgOp, abort and + // produce an undef DBG_VALUE. + if (MOs.size() != OpCount) + return EmitDbgNoLocation(SD); + + return BuildMI(*MF, DL, RefII, false, MOs, Var, Expr); } MachineInstr *InstrEmitter::EmitDbgNoLocation(SDDbgValue *SD) { // An invalidated SDNode must generate an undef DBG_VALUE: although the // original value is no longer computed, earlier DBG_VALUEs live ranges // must not leak into later code. + DIVariable *Var = SD->getVariable(); + const DIExpression *Expr = + DIExpression::convertToUndefExpression(SD->getExpression()); + DebugLoc DL = SD->getDebugLoc(); + const MCInstrDesc &Desc = TII->get(TargetOpcode::DBG_VALUE); + return BuildMI(*MF, DL, Desc, false, 0U, Var, Expr); +} + +MachineInstr * +InstrEmitter::EmitDbgValueList(SDDbgValue *SD, + DenseMap<SDValue, Register> &VRBaseMap) { MDNode *Var = SD->getVariable(); - MDNode *Expr = SD->getExpression(); + DIExpression *Expr = SD->getExpression(); DebugLoc DL = SD->getDebugLoc(); - auto MIB = BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE)); - MIB.addReg(0U); - MIB.addReg(0U); + // DBG_VALUE_LIST := "DBG_VALUE_LIST" var, expression, loc (, loc)* + const MCInstrDesc &DbgValDesc = TII->get(TargetOpcode::DBG_VALUE_LIST); + // Build the DBG_VALUE_LIST instruction base. + auto MIB = BuildMI(*MF, DL, DbgValDesc); MIB.addMetadata(Var); MIB.addMetadata(Expr); + AddDbgValueLocationOps(MIB, DbgValDesc, SD->getLocationOps(), VRBaseMap); return &*MIB; } @@ -984,8 +1021,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, countOperands(Node, II.getNumOperands() - NumDefs, NumImpUses); bool HasVRegVariadicDefs = !MF->getTarget().usesPhysRegsForValues() && II.isVariadic() && II.variadicOpsAreDefs(); - bool HasPhysRegOuts = NumResults > NumDefs && - II.getImplicitDefs() != nullptr && !HasVRegVariadicDefs; + bool HasPhysRegOuts = NumResults > NumDefs && !II.implicit_defs().empty() && + !HasVRegVariadicDefs; #ifndef NDEBUG unsigned NumMIOperands = NodeOperands + NumResults; if (II.isVariadic()) @@ -993,8 +1030,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, "Too few operands for a variadic node!"); else assert(NumMIOperands >= II.getNumOperands() && - NumMIOperands <= II.getNumOperands() + II.getNumImplicitDefs() + - NumImpUses && + NumMIOperands <= + II.getNumOperands() + II.implicit_defs().size() + NumImpUses && "#operands for dag node doesn't match .td file!"); #endif @@ -1063,6 +1100,9 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // part of the function. MIB.setMemRefs(cast<MachineSDNode>(Node)->memoperands()); + // Set the CFI type. + MIB->setCFIType(*MF, Node->getCFIType()); + // Insert the instruction into position in the block. This needs to // happen before any custom inserter hook is called so that the // hook knows where in the block to insert the replacement code. @@ -1088,12 +1128,12 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // Additional results must be physical register defs. if (HasPhysRegOuts) { for (unsigned i = NumDefs; i < NumResults; ++i) { - Register Reg = II.getImplicitDefs()[i - NumDefs]; + Register Reg = II.implicit_defs()[i - NumDefs]; if (!Node->hasAnyUseOfValue(i)) continue; // This implicitly defined physreg has a use. UsedRegs.push_back(Reg); - EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap); + EmitCopyFromReg(Node, i, IsClone, Reg, VRBaseMap); } } @@ -1109,8 +1149,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, } // Collect declared implicit uses. const MCInstrDesc &MCID = TII->get(F->getMachineOpcode()); - UsedRegs.append(MCID.getImplicitUses(), - MCID.getImplicitUses() + MCID.getNumImplicitUses()); + append_range(UsedRegs, MCID.implicit_uses()); // In addition to declared implicit uses, we must also check for // direct RegisterSDNode operands. for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i) @@ -1123,7 +1162,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, } // Finally mark unused registers as dead. - if (!UsedRegs.empty() || II.getImplicitDefs() || II.hasOptionalDef()) + if (!UsedRegs.empty() || !II.implicit_defs().empty() || II.hasOptionalDef()) MIB->setPhysRegsDeadExcept(UsedRegs, *TRI); // STATEPOINT is too 'dynamic' to have meaningful machine description. @@ -1159,14 +1198,13 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, #endif llvm_unreachable("This target-independent node should have been selected!"); case ISD::EntryToken: - llvm_unreachable("EntryToken should have been excluded from the schedule!"); case ISD::MERGE_VALUES: case ISD::TokenFactor: // fall thru break; case ISD::CopyToReg: { Register DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); SDValue SrcVal = Node->getOperand(2); - if (Register::isVirtualRegister(DestReg) && SrcVal.isMachineOpcode() && + if (DestReg.isVirtual() && SrcVal.isMachineOpcode() && SrcVal.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) { // Instead building a COPY to that vreg destination, build an // IMPLICIT_DEF instruction instead. @@ -1189,7 +1227,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, } case ISD::CopyFromReg: { unsigned SrcReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); - EmitCopyFromReg(Node, 0, IsClone, IsCloned, SrcReg, VRBaseMap); + EmitCopyFromReg(Node, 0, IsClone, SrcReg, VRBaseMap); break; } case ISD::EH_LABEL: @@ -1273,28 +1311,25 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, default: llvm_unreachable("Bad flags!"); case InlineAsm::Kind_RegDef: for (unsigned j = 0; j != NumVals; ++j, ++i) { - unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); + Register Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); // FIXME: Add dead flags for physical and virtual registers defined. // For now, mark physical register defs as implicit to help fast // regalloc. This makes inline asm look a lot like calls. - MIB.addReg(Reg, - RegState::Define | - getImplRegState(Register::isPhysicalRegister(Reg))); + MIB.addReg(Reg, RegState::Define | getImplRegState(Reg.isPhysical())); } break; case InlineAsm::Kind_RegDefEarlyClobber: case InlineAsm::Kind_Clobber: for (unsigned j = 0; j != NumVals; ++j, ++i) { - unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); - MIB.addReg(Reg, - RegState::Define | RegState::EarlyClobber | - getImplRegState(Register::isPhysicalRegister(Reg))); + Register Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); + MIB.addReg(Reg, RegState::Define | RegState::EarlyClobber | + getImplRegState(Reg.isPhysical())); ECRegs.push_back(Reg); } break; case InlineAsm::Kind_RegUse: // Use of register. case InlineAsm::Kind_Imm: // Immediate. - case InlineAsm::Kind_Mem: // Addressing mode. + case InlineAsm::Kind_Mem: // Non-function addressing mode. // The addressing mode has been selected, just add all of the // operands to the machine instruction. for (unsigned j = 0; j != NumVals; ++j, ++i) @@ -1312,6 +1347,21 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, } } break; + case InlineAsm::Kind_Func: // Function addressing mode. + for (unsigned j = 0; j != NumVals; ++j, ++i) { + SDValue Op = Node->getOperand(i); + AddOperand(MIB, Op, 0, nullptr, VRBaseMap, + /*IsDebug=*/false, IsClone, IsCloned); + + // Adjust Target Flags for function reference. + if (auto *TGA = dyn_cast<GlobalAddressSDNode>(Op)) { + unsigned NewFlags = + MF->getSubtarget().classifyGlobalFunctionReference( + TGA->getGlobal()); + unsigned LastIdx = MIB.getInstr()->getNumOperands() - 1; + MIB.getInstr()->getOperand(LastIdx).setTargetFlags(NewFlags); + } + } } } @@ -1344,12 +1394,11 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, /// InstrEmitter - Construct an InstrEmitter and set it to start inserting /// at the given position in the given block. InstrEmitter::InstrEmitter(const TargetMachine &TM, MachineBasicBlock *mbb, - MachineBasicBlock::iterator insertpos, - bool UseInstrRefDebugInfo) + MachineBasicBlock::iterator insertpos) : MF(mbb->getParent()), MRI(&MF->getRegInfo()), TII(MF->getSubtarget().getInstrInfo()), TRI(MF->getSubtarget().getRegisterInfo()), TLI(MF->getSubtarget().getTargetLowering()), MBB(mbb), InsertPos(insertpos) { - EmitDebugInstrRefs = UseInstrRefDebugInfo; + EmitDebugInstrRefs = mbb->getParent()->useDebugInstrRef(); } diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h index ced8f064b9be..959bce31c8b2 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -44,10 +44,8 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an /// implicit physical register output. - void EmitCopyFromReg(SDNode *Node, unsigned ResNo, - bool IsClone, bool IsCloned, - Register SrcReg, - DenseMap<SDValue, Register> &VRBaseMap); + void EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, + Register SrcReg, DenseMap<SDValue, Register> &VRBaseMap); void CreateVirtualRegisters(SDNode *Node, MachineInstrBuilder &MIB, @@ -128,6 +126,10 @@ public: /// Emit a DBG_VALUE $noreg, indicating a variable has no location. MachineInstr *EmitDbgNoLocation(SDDbgValue *SD); + /// Emit a DBG_VALUE_LIST from the operands to SDDbgValue. + MachineInstr *EmitDbgValueList(SDDbgValue *SD, + DenseMap<SDValue, Register> &VRBaseMap); + /// Emit a DBG_VALUE from the operands to SDDbgValue. MachineInstr *EmitDbgValueFromSingleOp(SDDbgValue *SD, DenseMap<SDValue, Register> &VRBaseMap); @@ -154,8 +156,7 @@ public: /// InstrEmitter - Construct an InstrEmitter and set it to start inserting /// at the given position in the given block. InstrEmitter(const TargetMachine &TM, MachineBasicBlock *mbb, - MachineBasicBlock::iterator insertpos, - bool UseInstrRefDebugInfo); + MachineBasicBlock::iterator insertpos); private: void EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 56d35dfe8701..c3106216a060 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -142,10 +142,12 @@ private: RTLIB::Libcall Call_F128, RTLIB::Libcall Call_PPCF128, SmallVectorImpl<SDValue> &Results); - SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, RTLIB::Libcall Call_I8, - RTLIB::Libcall Call_I16, RTLIB::Libcall Call_I32, - RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128, - RTLIB::Libcall Call_IEXT); + SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, + RTLIB::Libcall Call_I8, + RTLIB::Libcall Call_I16, + RTLIB::Libcall Call_I32, + RTLIB::Libcall Call_I64, + RTLIB::Libcall Call_I128); void ExpandArgFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128, @@ -308,7 +310,7 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { // We don't want to shrink SNaNs. Converting the SNaN back to its real type // can cause it to be changed into a QNaN on some platforms (e.g. on SystemZ). if (!APF.isSignaling()) { - while (SVT != MVT::f32 && SVT != MVT::f16) { + while (SVT != MVT::f32 && SVT != MVT::f16 && SVT != MVT::bf16) { SVT = (MVT::SimpleValueType)(SVT.getSimpleVT().SimpleTy - 1); if (ConstantFPSDNode::isValueValidForType(SVT, APF) && // Only do this if the target has a native EXTLOAD instruction from @@ -550,16 +552,16 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // Promote to a byte-sized store with upper bits zero if not // storing an integral number of bytes. For example, promote // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1) - EVT NVT = EVT::getIntegerVT(*DAG.getContext(), StSize.getFixedSize()); + EVT NVT = EVT::getIntegerVT(*DAG.getContext(), StSize.getFixedValue()); Value = DAG.getZeroExtendInReg(Value, dl, StVT); SDValue Result = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), NVT, ST->getOriginalAlign(), MMOFlags, AAInfo); ReplaceNode(SDValue(Node, 0), Result); - } else if (!StVT.isVector() && !isPowerOf2_64(StWidth.getFixedSize())) { + } else if (!StVT.isVector() && !isPowerOf2_64(StWidth.getFixedValue())) { // If not storing a power-of-2 number of bits, expand as two stores. assert(!StVT.isVector() && "Unsupported truncstore!"); - unsigned StWidthBits = StWidth.getFixedSize(); + unsigned StWidthBits = StWidth.getFixedValue(); unsigned LogStWidth = Log2_32(StWidthBits); assert(LogStWidth < 32); unsigned RoundWidth = 1 << LogStWidth; @@ -767,10 +769,10 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { Value = Result; Chain = Ch; - } else if (!isPowerOf2_64(SrcWidth.getKnownMinSize())) { + } else if (!isPowerOf2_64(SrcWidth.getKnownMinValue())) { // If not loading a power-of-2 number of bits, expand as two loads. assert(!SrcVT.isVector() && "Unsupported extload!"); - unsigned SrcWidthBits = SrcWidth.getFixedSize(); + unsigned SrcWidthBits = SrcWidth.getFixedValue(); unsigned LogSrcWidth = Log2_32(SrcWidthBits); assert(LogSrcWidth < 32); unsigned RoundWidth = 1 << LogSrcWidth; @@ -850,7 +852,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Custom: isCustom = true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case TargetLowering::Legal: Value = SDValue(Node, 0); Chain = SDValue(Node, 1); @@ -1035,12 +1037,14 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::STRICT_FSETCC: case ISD::STRICT_FSETCCS: case ISD::SETCC: + case ISD::SETCCCARRY: case ISD::VP_SETCC: case ISD::BR_CC: { unsigned Opc = Node->getOpcode(); unsigned CCOperand = Opc == ISD::SELECT_CC ? 4 : Opc == ISD::STRICT_FSETCC ? 3 : Opc == ISD::STRICT_FSETCCS ? 3 + : Opc == ISD::SETCCCARRY ? 3 : (Opc == ISD::SETCC || Opc == ISD::VP_SETCC) ? 2 : 1; unsigned CompareOperand = Opc == ISD::BR_CC ? 2 @@ -1074,7 +1078,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { SimpleFinishLegalizing = false; break; case ISD::EXTRACT_ELEMENT: - case ISD::FLT_ROUNDS_: + case ISD::GET_ROUNDING: case ISD::MERGE_VALUES: case ISD::EH_RETURN: case ISD::FRAME_TO_ARGS_OFFSET: @@ -1317,11 +1321,11 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { return; } LLVM_DEBUG(dbgs() << "Could not custom legalize node\n"); - LLVM_FALLTHROUGH; + [[fallthrough]]; case TargetLowering::Expand: if (ExpandNode(Node)) return; - LLVM_FALLTHROUGH; + [[fallthrough]]; case TargetLowering::LibCall: ConvertNodeToLibcall(Node); return; @@ -1717,8 +1721,7 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, DAG.getConstant(-Alignment.value(), dl, VT)); Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain - Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true), - DAG.getIntPtrConstant(0, dl, true), SDValue(), dl); + Tmp2 = DAG.getCALLSEQ_END(Chain, 0, 0, SDValue(), dl); Results.push_back(Tmp1); Results.push_back(Tmp2); @@ -2111,17 +2114,15 @@ void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, ExpandFPLibCall(Node, LC, Results); } -SDValue SelectionDAGLegalize::ExpandIntLibCall( - SDNode *Node, bool isSigned, RTLIB::Libcall Call_I8, - RTLIB::Libcall Call_I16, RTLIB::Libcall Call_I32, RTLIB::Libcall Call_I64, - RTLIB::Libcall Call_I128, RTLIB::Libcall Call_IEXT) { +SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, + RTLIB::Libcall Call_I8, + RTLIB::Libcall Call_I16, + RTLIB::Libcall Call_I32, + RTLIB::Libcall Call_I64, + RTLIB::Libcall Call_I128) { RTLIB::Libcall LC; switch (Node->getSimpleValueType(0).SimpleTy) { - - default: - LC = Call_IEXT; - break; - + default: llvm_unreachable("Unexpected request for libcall!"); case MVT::i8: LC = Call_I8; break; case MVT::i16: LC = Call_I16; break; case MVT::i32: LC = Call_I32; break; @@ -2156,11 +2157,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, RTLIB::Libcall LC; switch (Node->getSimpleValueType(0).SimpleTy) { - - default: - LC = isSigned ? RTLIB::SDIVREM_IEXT : RTLIB::UDIVREM_IEXT; - break; - + default: llvm_unreachable("Unexpected request for libcall!"); case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; @@ -2744,7 +2741,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { FA, Offset)); break; } - case ISD::FLT_ROUNDS_: + case ISD::GET_ROUNDING: Results.push_back(DAG.getConstant(1, dl, Node->getValueType(0))); Results.push_back(Node->getOperand(0)); break; @@ -2911,13 +2908,44 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { break; case ISD::BF16_TO_FP: { // Always expand bf16 to f32 casts, they lower to ext + shift. - SDValue Op = DAG.getNode(ISD::BITCAST, dl, MVT::i16, Node->getOperand(0)); - Op = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op); + // + // Note that the operand of this code can be bf16 or an integer type in case + // bf16 is not supported on the target and was softened. + SDValue Op = Node->getOperand(0); + if (Op.getValueType() == MVT::bf16) { + Op = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, + DAG.getNode(ISD::BITCAST, dl, MVT::i16, Op)); + } else { + Op = DAG.getAnyExtOrTrunc(Op, dl, MVT::i32); + } Op = DAG.getNode( ISD::SHL, dl, MVT::i32, Op, DAG.getConstant(16, dl, TLI.getShiftAmountTy(MVT::i32, DAG.getDataLayout()))); Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op); + // Add fp_extend in case the output is bigger than f32. + if (Node->getValueType(0) != MVT::f32) + Op = DAG.getNode(ISD::FP_EXTEND, dl, Node->getValueType(0), Op); + Results.push_back(Op); + break; + } + case ISD::FP_TO_BF16: { + SDValue Op = Node->getOperand(0); + if (Op.getValueType() != MVT::f32) + Op = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Op, + DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)); + Op = DAG.getNode( + ISD::SRL, dl, MVT::i32, DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op), + DAG.getConstant(16, dl, + TLI.getShiftAmountTy(MVT::i32, DAG.getDataLayout()))); + // The result of this node can be bf16 or an integer type in case bf16 is + // not supported on the target and was softened to i16 for storage. + if (Node->getValueType(0) == MVT::bf16) { + Op = DAG.getNode(ISD::BITCAST, dl, MVT::bf16, + DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, Op)); + } else { + Op = DAG.getAnyExtOrTrunc(Op, dl, Node->getValueType(0)); + } Results.push_back(Op); break; } @@ -2961,7 +2989,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Tmp2); break; } - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::SINT_TO_FP: case ISD::STRICT_SINT_TO_FP: if ((Tmp1 = ExpandLegalINT_TO_FP(Node, Tmp2))) { @@ -3112,7 +3140,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { } case ISD::EXTRACT_ELEMENT: { EVT OpTy = Node->getOperand(0).getValueType(); - if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue()) { + if (Node->getConstantOperandVal(1)) { // 1 -> Hi Tmp1 = DAG.getNode(ISD::SRL, dl, OpTy, Node->getOperand(0), DAG.getConstant(OpTy.getSizeInBits() / 2, dl, @@ -3251,8 +3279,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { TLI.isOperationLegalOrCustom(ISD::FP_TO_FP16, MVT::f32)) { // Under fastmath, we can expand this node into a fround followed by // a float-half conversion. - SDValue FloatVal = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Op, - DAG.getIntPtrConstant(0, dl)); + SDValue FloatVal = + DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Op, + DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)); Results.push_back( DAG.getNode(ISD::FP_TO_FP16, dl, Node->getValueType(0), FloatVal)); } @@ -4379,24 +4408,28 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { RTLIB::SUB_PPCF128, Results); break; case ISD::SREM: - Results.push_back(ExpandIntLibCall( - Node, true, RTLIB::SREM_I8, RTLIB::SREM_I16, RTLIB::SREM_I32, - RTLIB::SREM_I64, RTLIB::SREM_I128, RTLIB::SREM_IEXT)); + Results.push_back(ExpandIntLibCall(Node, true, + RTLIB::SREM_I8, + RTLIB::SREM_I16, RTLIB::SREM_I32, + RTLIB::SREM_I64, RTLIB::SREM_I128)); break; case ISD::UREM: - Results.push_back(ExpandIntLibCall( - Node, false, RTLIB::UREM_I8, RTLIB::UREM_I16, RTLIB::UREM_I32, - RTLIB::UREM_I64, RTLIB::UREM_I128, RTLIB::UREM_IEXT)); + Results.push_back(ExpandIntLibCall(Node, false, + RTLIB::UREM_I8, + RTLIB::UREM_I16, RTLIB::UREM_I32, + RTLIB::UREM_I64, RTLIB::UREM_I128)); break; case ISD::SDIV: - Results.push_back(ExpandIntLibCall( - Node, true, RTLIB::SDIV_I8, RTLIB::SDIV_I16, RTLIB::SDIV_I32, - RTLIB::SDIV_I64, RTLIB::SDIV_I128, RTLIB::SDIV_IEXT)); + Results.push_back(ExpandIntLibCall(Node, true, + RTLIB::SDIV_I8, + RTLIB::SDIV_I16, RTLIB::SDIV_I32, + RTLIB::SDIV_I64, RTLIB::SDIV_I128)); break; case ISD::UDIV: - Results.push_back(ExpandIntLibCall( - Node, false, RTLIB::UDIV_I8, RTLIB::UDIV_I16, RTLIB::UDIV_I32, - RTLIB::UDIV_I64, RTLIB::UDIV_I128, RTLIB::UDIV_IEXT)); + Results.push_back(ExpandIntLibCall(Node, false, + RTLIB::UDIV_I8, + RTLIB::UDIV_I16, RTLIB::UDIV_I32, + RTLIB::UDIV_I64, RTLIB::UDIV_I128)); break; case ISD::SDIVREM: case ISD::UDIVREM: @@ -4404,9 +4437,10 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { ExpandDivRemLibCall(Node, Results); break; case ISD::MUL: - Results.push_back(ExpandIntLibCall( - Node, false, RTLIB::MUL_I8, RTLIB::MUL_I16, RTLIB::MUL_I32, - RTLIB::MUL_I64, RTLIB::MUL_I128, RTLIB::MUL_IEXT)); + Results.push_back(ExpandIntLibCall(Node, false, + RTLIB::MUL_I8, + RTLIB::MUL_I16, RTLIB::MUL_I32, + RTLIB::MUL_I64, RTLIB::MUL_I128)); break; case ISD::CTLZ_ZERO_UNDEF: switch (Node->getSimpleValueType(0).SimpleTy) { @@ -4696,7 +4730,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1); else Tmp1 = DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp1, - DAG.getIntPtrConstant(0, dl)); + DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)); Results.push_back(Tmp1); break; @@ -4756,8 +4790,9 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1)); Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2, Node->getFlags()); - Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, - Tmp3, DAG.getIntPtrConstant(0, dl))); + Results.push_back( + DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp3, + DAG.getIntPtrConstant(0, dl, /*isTarget=*/true))); break; case ISD::STRICT_FADD: case ISD::STRICT_FSUB: @@ -4787,7 +4822,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Results.push_back( DAG.getNode(ISD::FP_ROUND, dl, OVT, DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2, Tmp3), - DAG.getIntPtrConstant(0, dl))); + DAG.getIntPtrConstant(0, dl, /*isTarget=*/true))); break; case ISD::STRICT_FMA: Tmp1 = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NVT, MVT::Other}, @@ -4817,8 +4852,9 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { // (fp_round (fpext a)) // which is a no-op. Mark it as a TRUNCating FP_ROUND. const bool isTrunc = (Node->getOpcode() == ISD::FCOPYSIGN); - Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, - Tmp3, DAG.getIntPtrConstant(isTrunc, dl))); + Results.push_back( + DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp3, + DAG.getIntPtrConstant(isTrunc, dl, /*isTarget=*/true))); break; } case ISD::STRICT_FPOWI: @@ -4850,8 +4886,9 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { case ISD::FEXP2: Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); - Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, - Tmp2, DAG.getIntPtrConstant(0, dl))); + Results.push_back( + DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp2, + DAG.getIntPtrConstant(0, dl, /*isTarget=*/true))); break; case ISD::STRICT_FFLOOR: case ISD::STRICT_FCEIL: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index b2df67f45c72..f1e80ce7e037 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -1071,8 +1071,9 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) { if (ST->isTruncatingStore()) // Do an FP_ROUND followed by a non-truncating store. - Val = BitConvertToInteger(DAG.getNode(ISD::FP_ROUND, dl, ST->getMemoryVT(), - Val, DAG.getIntPtrConstant(0, dl))); + Val = BitConvertToInteger( + DAG.getNode(ISD::FP_ROUND, dl, ST->getMemoryVT(), Val, + DAG.getIntPtrConstant(0, dl, /*isTarget=*/true))); else Val = GetSoftenedFloat(Val); @@ -2532,7 +2533,8 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_XINT_TO_FP(SDNode *N) { // Round the value to the desired precision (that of the source type). return DAG.getNode( ISD::FP_EXTEND, DL, NVT, - DAG.getNode(ISD::FP_ROUND, DL, VT, NV, DAG.getIntPtrConstant(0, DL))); + DAG.getNode(ISD::FP_ROUND, DL, VT, NV, + DAG.getIntPtrConstant(0, DL, /*isTarget=*/true))); } SDValue DAGTypeLegalizer::PromoteFloatRes_UNDEF(SDNode *N) { @@ -2746,39 +2748,47 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FCOPYSIGN(SDNode *N) { } SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FMAD(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT OVT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT); SDValue Op0 = GetSoftPromotedHalf(N->getOperand(0)); SDValue Op1 = GetSoftPromotedHalf(N->getOperand(1)); SDValue Op2 = GetSoftPromotedHalf(N->getOperand(2)); SDLoc dl(N); // Promote to the larger FP type. - Op0 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op0); - Op1 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op1); - Op2 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op2); + auto PromotionOpcode = GetPromotionOpcode(OVT, NVT); + Op0 = DAG.getNode(PromotionOpcode, dl, NVT, Op0); + Op1 = DAG.getNode(PromotionOpcode, dl, NVT, Op1); + Op2 = DAG.getNode(PromotionOpcode, dl, NVT, Op2); SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, Op0, Op1, Op2); // Convert back to FP16 as an integer. - return DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, Res); + return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res); } SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FPOWI(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT OVT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT); SDValue Op0 = GetSoftPromotedHalf(N->getOperand(0)); SDValue Op1 = N->getOperand(1); SDLoc dl(N); - Op0 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op0); + // Promote to the larger FP type. + Op0 = DAG.getNode(GetPromotionOpcode(OVT, NVT), dl, NVT, Op0); SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, Op0, Op1); // Convert back to FP16 as an integer. - return DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, Res); + return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res); } SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FP_ROUND(SDNode *N) { + EVT RVT = N->getValueType(0); + EVT SVT = N->getOperand(0).getValueType(); + if (N->isStrictFPOpcode()) { + assert(RVT == MVT::f16); SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_FP16, SDLoc(N), {MVT::i16, MVT::Other}, {N->getOperand(0), N->getOperand(1)}); @@ -2786,7 +2796,8 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FP_ROUND(SDNode *N) { return Res; } - return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), MVT::i16, N->getOperand(0)); + return DAG.getNode(GetPromotionOpcode(SVT, RVT), SDLoc(N), MVT::i16, + N->getOperand(0)); } SDValue DAGTypeLegalizer::SoftPromoteHalfRes_LOAD(SDNode *N) { @@ -2821,13 +2832,14 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_SELECT_CC(SDNode *N) { } SDValue DAGTypeLegalizer::SoftPromoteHalfRes_XINT_TO_FP(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT OVT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT); SDLoc dl(N); SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0)); // Round the value to the softened type. - return DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, Res); + return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res); } SDValue DAGTypeLegalizer::SoftPromoteHalfRes_UNDEF(SDNode *N) { @@ -2835,33 +2847,36 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_UNDEF(SDNode *N) { } SDValue DAGTypeLegalizer::SoftPromoteHalfRes_UnaryOp(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT OVT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT); SDValue Op = GetSoftPromotedHalf(N->getOperand(0)); SDLoc dl(N); // Promote to the larger FP type. - Op = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op); + Op = DAG.getNode(GetPromotionOpcode(OVT, NVT), dl, NVT, Op); SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, Op); // Convert back to FP16 as an integer. - return DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, Res); + return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res); } SDValue DAGTypeLegalizer::SoftPromoteHalfRes_BinOp(SDNode *N) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT OVT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT); SDValue Op0 = GetSoftPromotedHalf(N->getOperand(0)); SDValue Op1 = GetSoftPromotedHalf(N->getOperand(1)); SDLoc dl(N); // Promote to the larger FP type. - Op0 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op0); - Op1 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op1); + auto PromotionOpcode = GetPromotionOpcode(OVT, NVT); + Op0 = DAG.getNode(PromotionOpcode, dl, NVT, Op0); + Op1 = DAG.getNode(PromotionOpcode, dl, NVT, Op1); SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, Op0, Op1); // Convert back to FP16 as an integer. - return DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, Res); + return DAG.getNode(GetPromotionOpcode(NVT, OVT), dl, MVT::i16, Res); } SDValue DAGTypeLegalizer::SoftPromoteHalfRes_VECREDUCE(SDNode *N) { @@ -2945,22 +2960,27 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FCOPYSIGN(SDNode *N, unsigned OpNo) { assert(OpNo == 1 && "Only Operand 1 must need promotion here"); SDValue Op1 = N->getOperand(1); + EVT RVT = Op1.getValueType(); SDLoc dl(N); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op1.getValueType()); Op1 = GetSoftPromotedHalf(Op1); - Op1 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op1); + Op1 = DAG.getNode(GetPromotionOpcode(RVT, NVT), dl, NVT, Op1); return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), N->getOperand(0), Op1); } SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_EXTEND(SDNode *N) { + EVT RVT = N->getValueType(0); bool IsStrict = N->isStrictFPOpcode(); - SDValue Op = GetSoftPromotedHalf(N->getOperand(IsStrict ? 1 : 0)); + SDValue Op = N->getOperand(IsStrict ? 1 : 0); + EVT SVT = Op.getValueType(); + Op = GetSoftPromotedHalf(N->getOperand(IsStrict ? 1 : 0)); if (IsStrict) { + assert(SVT == MVT::f16); SDValue Res = DAG.getNode(ISD::STRICT_FP16_TO_FP, SDLoc(N), {N->getValueType(0), MVT::Other}, {N->getOperand(0), Op}); @@ -2969,31 +2989,35 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_EXTEND(SDNode *N) { return SDValue(); } - return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0), Op); + return DAG.getNode(GetPromotionOpcode(SVT, RVT), SDLoc(N), RVT, Op); } SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_TO_XINT(SDNode *N) { + EVT RVT = N->getValueType(0); SDValue Op = N->getOperand(0); + EVT SVT = Op.getValueType(); SDLoc dl(N); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()); Op = GetSoftPromotedHalf(Op); - SDValue Res = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op); + SDValue Res = DAG.getNode(GetPromotionOpcode(SVT, RVT), dl, NVT, Op); return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), Res); } SDValue DAGTypeLegalizer::SoftPromoteHalfOp_FP_TO_XINT_SAT(SDNode *N) { + EVT RVT = N->getValueType(0); SDValue Op = N->getOperand(0); + EVT SVT = Op.getValueType(); SDLoc dl(N); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()); Op = GetSoftPromotedHalf(Op); - SDValue Res = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op); + SDValue Res = DAG.getNode(GetPromotionOpcode(SVT, RVT), dl, NVT, Op); return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), Res, N->getOperand(1)); @@ -3006,14 +3030,16 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_SELECT_CC(SDNode *N, SDValue Op1 = N->getOperand(1); SDLoc dl(N); - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op0.getValueType()); + EVT SVT = Op0.getValueType(); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), SVT); Op0 = GetSoftPromotedHalf(Op0); Op1 = GetSoftPromotedHalf(Op1); // Promote to the larger FP type. - Op0 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op0); - Op1 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op1); + auto PromotionOpcode = GetPromotionOpcode(SVT, NVT); + Op0 = DAG.getNode(PromotionOpcode, dl, NVT, Op0); + Op1 = DAG.getNode(PromotionOpcode, dl, NVT, Op1); return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0), Op0, Op1, N->getOperand(2), N->getOperand(3), N->getOperand(4)); @@ -3025,14 +3051,16 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfOp_SETCC(SDNode *N) { ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get(); SDLoc dl(N); + EVT SVT = Op0.getValueType(); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Op0.getValueType()); Op0 = GetSoftPromotedHalf(Op0); Op1 = GetSoftPromotedHalf(Op1); // Promote to the larger FP type. - Op0 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op0); - Op1 = DAG.getNode(ISD::FP16_TO_FP, dl, NVT, Op1); + auto PromotionOpcode = GetPromotionOpcode(SVT, NVT); + Op0 = DAG.getNode(PromotionOpcode, dl, NVT, Op0); + Op1 = DAG.getNode(PromotionOpcode, dl, NVT, Op1); return DAG.getSetCC(SDLoc(N), N->getValueType(0), Op0, Op1, CCCode); } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 228d4a43ccde..c9ce9071a25d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -137,8 +137,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: Res = PromoteIntRes_INT_EXTEND(N); break; - case ISD::VP_FPTOSI: - case ISD::VP_FPTOUI: + case ISD::VP_FP_TO_SINT: + case ISD::VP_FP_TO_UINT: case ISD::STRICT_FP_TO_SINT: case ISD::STRICT_FP_TO_UINT: case ISD::FP_TO_SINT: @@ -148,9 +148,12 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::FP_TO_UINT_SAT: Res = PromoteIntRes_FP_TO_XINT_SAT(N); break; - case ISD::FP_TO_FP16: Res = PromoteIntRes_FP_TO_FP16(N); break; + case ISD::FP_TO_BF16: + case ISD::FP_TO_FP16: + Res = PromoteIntRes_FP_TO_FP16_BF16(N); + break; - case ISD::FLT_ROUNDS_: Res = PromoteIntRes_FLT_ROUNDS(N); break; + case ISD::GET_ROUNDING: Res = PromoteIntRes_GET_ROUNDING(N); break; case ISD::AND: case ISD::OR: @@ -165,11 +168,15 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { case ISD::VP_SUB: case ISD::VP_MUL: Res = PromoteIntRes_SimpleIntBinOp(N); break; + case ISD::VP_SMIN: + case ISD::VP_SMAX: case ISD::SDIV: case ISD::SREM: case ISD::VP_SDIV: case ISD::VP_SREM: Res = PromoteIntRes_SExtIntBinOp(N); break; + case ISD::VP_UMIN: + case ISD::VP_UMAX: case ISD::UDIV: case ISD::UREM: case ISD::VP_UDIV: @@ -673,10 +680,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { TLI.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, NVT)) NewOpc = ISD::STRICT_FP_TO_SINT; - if (N->getOpcode() == ISD::VP_FPTOUI && - !TLI.isOperationLegal(ISD::VP_FPTOUI, NVT) && - TLI.isOperationLegalOrCustom(ISD::VP_FPTOSI, NVT)) - NewOpc = ISD::VP_FPTOSI; + if (N->getOpcode() == ISD::VP_FP_TO_UINT && + !TLI.isOperationLegal(ISD::VP_FP_TO_UINT, NVT) && + TLI.isOperationLegalOrCustom(ISD::VP_FP_TO_SINT, NVT)) + NewOpc = ISD::VP_FP_TO_SINT; SDValue Res; if (N->isStrictFPOpcode()) { @@ -685,7 +692,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { // Legalize the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); - } else if (NewOpc == ISD::VP_FPTOSI || NewOpc == ISD::VP_FPTOUI) { + } else if (NewOpc == ISD::VP_FP_TO_SINT || NewOpc == ISD::VP_FP_TO_UINT) { Res = DAG.getNode(NewOpc, dl, NVT, {N->getOperand(0), N->getOperand(1), N->getOperand(2)}); } else { @@ -701,7 +708,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { // after legalization: fp-to-sint32, 65534. -> 0x0000fffe return DAG.getNode((N->getOpcode() == ISD::FP_TO_UINT || N->getOpcode() == ISD::STRICT_FP_TO_UINT || - N->getOpcode() == ISD::VP_FPTOUI) + N->getOpcode() == ISD::VP_FP_TO_UINT) ? ISD::AssertZext : ISD::AssertSext, dl, NVT, Res, @@ -716,14 +723,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT_SAT(SDNode *N) { N->getOperand(1)); } -SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_FP16(SDNode *N) { +SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_FP16_BF16(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDLoc dl(N); return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0)); } -SDValue DAGTypeLegalizer::PromoteIntRes_FLT_ROUNDS(SDNode *N) { +SDValue DAGTypeLegalizer::PromoteIntRes_GET_ROUNDING(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDLoc dl(N); @@ -836,7 +843,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) { SDLoc dl(N); SDValue Res = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(VT, SVT), - makeArrayRef(Ops, NumOps)); + ArrayRef(Ops, NumOps)); // Modified the sum result - switch anything that used the old sum to use // the new one. @@ -1555,7 +1562,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UNDEF(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_VSCALE(SDNode *N) { EVT VT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - APInt MulImm = cast<ConstantSDNode>(N->getOperand(0))->getAPIntValue(); + const APInt &MulImm = N->getConstantOperandAPInt(0); return DAG.getVScale(SDLoc(N), VT, MulImm.sext(VT.getSizeInBits())); } @@ -1648,7 +1655,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { case ISD::VP_SETCC: case ISD::SETCC: Res = PromoteIntOp_SETCC(N, OpNo); break; case ISD::SIGN_EXTEND: Res = PromoteIntOp_SIGN_EXTEND(N); break; - case ISD::VP_SITOFP: + case ISD::VP_SINT_TO_FP: case ISD::SINT_TO_FP: Res = PromoteIntOp_SINT_TO_FP(N); break; case ISD::STRICT_SINT_TO_FP: Res = PromoteIntOp_STRICT_SINT_TO_FP(N); break; case ISD::STORE: Res = PromoteIntOp_STORE(cast<StoreSDNode>(N), @@ -1663,8 +1670,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { OpNo); break; case ISD::VP_TRUNCATE: case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break; + case ISD::BF16_TO_FP: case ISD::FP16_TO_FP: - case ISD::VP_UITOFP: + case ISD::VP_UINT_TO_FP: case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break; case ISD::STRICT_UINT_TO_FP: Res = PromoteIntOp_STRICT_UINT_TO_FP(N); break; case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break; @@ -1998,7 +2006,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) { - if (N->getOpcode() == ISD::VP_SITOFP) + if (N->getOpcode() == ISD::VP_SINT_TO_FP) return SDValue(DAG.UpdateNodeOperands(N, SExtPromotedInteger(N->getOperand(0)), N->getOperand(1), N->getOperand(2)), @@ -2127,7 +2135,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) { - if (N->getOpcode() == ISD::VP_UITOFP) + if (N->getOpcode() == ISD::VP_UINT_TO_FP) return SDValue(DAG.UpdateNodeOperands(N, ZExtPromotedInteger(N->getOperand(0)), N->getOperand(1), N->getOperand(2)), @@ -2420,17 +2428,21 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::CTPOP: ExpandIntRes_CTPOP(N, Lo, Hi); break; case ISD::CTTZ_ZERO_UNDEF: case ISD::CTTZ: ExpandIntRes_CTTZ(N, Lo, Hi); break; - case ISD::FLT_ROUNDS_: ExpandIntRes_FLT_ROUNDS(N, Lo, Hi); break; + case ISD::GET_ROUNDING:ExpandIntRes_GET_ROUNDING(N, Lo, Hi); break; case ISD::STRICT_FP_TO_SINT: case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break; case ISD::STRICT_FP_TO_UINT: case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break; case ISD::FP_TO_SINT_SAT: case ISD::FP_TO_UINT_SAT: ExpandIntRes_FP_TO_XINT_SAT(N, Lo, Hi); break; + case ISD::STRICT_LROUND: + case ISD::STRICT_LRINT: + case ISD::LROUND: + case ISD::LRINT: case ISD::STRICT_LLROUND: case ISD::STRICT_LLRINT: case ISD::LLROUND: - case ISD::LLRINT: ExpandIntRes_LLROUND_LLRINT(N, Lo, Hi); break; + case ISD::LLRINT: ExpandIntRes_XROUND_XRINT(N, Lo, Hi); break; case ISD::LOAD: ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break; case ISD::MUL: ExpandIntRes_MUL(N, Lo, Hi); break; case ISD::READCYCLECOUNTER: ExpandIntRes_READCYCLECOUNTER(N, Lo, Hi); break; @@ -2866,15 +2878,29 @@ void DAGTypeLegalizer::ExpandIntRes_MINMAX(SDNode *N, ISD::CondCode CondC; std::tie(CondC, LoOpc) = getExpandedMinMaxOps(N->getOpcode()); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + // Expand the subcomponents. SDValue LHSL, LHSH, RHSL, RHSH; - GetExpandedInteger(N->getOperand(0), LHSL, LHSH); - GetExpandedInteger(N->getOperand(1), RHSL, RHSH); + GetExpandedInteger(LHS, LHSL, LHSH); + GetExpandedInteger(RHS, RHSL, RHSH); // Value types EVT NVT = LHSL.getValueType(); EVT CCT = getSetCCResultType(NVT); + // If the upper halves are all sign bits, then we can perform the MINMAX on + // the lower half and sign-extend the result to the upper half. + unsigned NumHalfBits = NVT.getScalarSizeInBits(); + if (DAG.ComputeNumSignBits(LHS) > NumHalfBits && + DAG.ComputeNumSignBits(RHS) > NumHalfBits) { + Lo = DAG.getNode(N->getOpcode(), DL, NVT, LHSL, RHSL); + Hi = DAG.getNode(ISD::SRA, DL, NVT, Lo, + DAG.getShiftAmountConstant(NumHalfBits - 1, NVT, DL)); + return; + } + // Hi part is always the same op Hi = DAG.getNode(N->getOpcode(), DL, NVT, {LHSH, RHSH}); @@ -2913,13 +2939,13 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, Lo = DAG.getNode(ISD::UADDO, dl, VTList, LoOps); HiOps[2] = Lo.getValue(1); Hi = DAG.computeKnownBits(HiOps[2]).isZero() - ? DAG.getNode(ISD::UADDO, dl, VTList, makeArrayRef(HiOps, 2)) + ? DAG.getNode(ISD::UADDO, dl, VTList, ArrayRef(HiOps, 2)) : DAG.getNode(ISD::ADDCARRY, dl, VTList, HiOps); } else { Lo = DAG.getNode(ISD::USUBO, dl, VTList, LoOps); HiOps[2] = Lo.getValue(1); Hi = DAG.computeKnownBits(HiOps[2]).isZero() - ? DAG.getNode(ISD::USUBO, dl, VTList, makeArrayRef(HiOps, 2)) + ? DAG.getNode(ISD::USUBO, dl, VTList, ArrayRef(HiOps, 2)) : DAG.getNode(ISD::SUBCARRY, dl, VTList, HiOps); } return; @@ -2962,18 +2988,18 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, if (N->getOpcode() == ISD::ADD) { RevOpc = ISD::SUB; Lo = DAG.getNode(ISD::UADDO, dl, VTList, LoOps); - Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2)); + Hi = DAG.getNode(ISD::ADD, dl, NVT, ArrayRef(HiOps, 2)); } else { RevOpc = ISD::ADD; Lo = DAG.getNode(ISD::USUBO, dl, VTList, LoOps); - Hi = DAG.getNode(ISD::SUB, dl, NVT, makeArrayRef(HiOps, 2)); + Hi = DAG.getNode(ISD::SUB, dl, NVT, ArrayRef(HiOps, 2)); } SDValue OVF = Lo.getValue(1); switch (BoolType) { case TargetLoweringBase::UndefinedBooleanContent: OVF = DAG.getNode(ISD::AND, dl, OvfVT, DAG.getConstant(1, dl, OvfVT), OVF); - LLVM_FALLTHROUGH; + [[fallthrough]]; case TargetLoweringBase::ZeroOrOneBooleanContent: OVF = DAG.getZExtOrTrunc(OVF, dl, NVT); Hi = DAG.getNode(N->getOpcode(), dl, NVT, Hi, OVF); @@ -2987,27 +3013,21 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, if (N->getOpcode() == ISD::ADD) { Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps); - Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2)); - SDValue Cmp1 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[0], - ISD::SETULT); + Hi = DAG.getNode(ISD::ADD, dl, NVT, ArrayRef(HiOps, 2)); + SDValue Cmp = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[0], + ISD::SETULT); - if (BoolType == TargetLoweringBase::ZeroOrOneBooleanContent) { - SDValue Carry = DAG.getZExtOrTrunc(Cmp1, dl, NVT); - Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry); - return; - } + SDValue Carry; + if (BoolType == TargetLoweringBase::ZeroOrOneBooleanContent) + Carry = DAG.getZExtOrTrunc(Cmp, dl, NVT); + else + Carry = DAG.getSelect(dl, NVT, Cmp, DAG.getConstant(1, dl, NVT), + DAG.getConstant(0, dl, NVT)); - SDValue Carry1 = DAG.getSelect(dl, NVT, Cmp1, - DAG.getConstant(1, dl, NVT), - DAG.getConstant(0, dl, NVT)); - SDValue Cmp2 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[1], - ISD::SETULT); - SDValue Carry2 = DAG.getSelect(dl, NVT, Cmp2, - DAG.getConstant(1, dl, NVT), Carry1); - Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2); + Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry); } else { Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps); - Hi = DAG.getNode(ISD::SUB, dl, NVT, makeArrayRef(HiOps, 2)); + Hi = DAG.getNode(ISD::SUB, dl, NVT, ArrayRef(HiOps, 2)); SDValue Cmp = DAG.getSetCC(dl, getSetCCResultType(LoOps[0].getValueType()), LoOps[0], LoOps[1], ISD::SETULT); @@ -3280,6 +3300,14 @@ void DAGTypeLegalizer::ExpandIntRes_ABS(SDNode *N, SDValue &Lo, SDValue &Hi) { GetExpandedInteger(N0, Lo, Hi); EVT NVT = Lo.getValueType(); + // If the upper half is all sign bits, then we can perform the ABS on the + // lower half and zero-extend. + if (DAG.ComputeNumSignBits(N0) > NVT.getScalarSizeInBits()) { + Lo = DAG.getNode(ISD::ABS, dl, NVT, Lo); + Hi = DAG.getConstant(0, dl, NVT); + return; + } + // If we have SUBCARRY, use the expanded form of the sra+xor+sub sequence we // use in LegalizeDAG. The SUB part of the expansion is based on // ExpandIntRes_ADDSUB which also uses SUBCARRY/USUBO after checking that @@ -3364,15 +3392,15 @@ void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N, Hi = DAG.getConstant(0, dl, NVT); } -void DAGTypeLegalizer::ExpandIntRes_FLT_ROUNDS(SDNode *N, SDValue &Lo, +void DAGTypeLegalizer::ExpandIntRes_GET_ROUNDING(SDNode *N, SDValue &Lo, SDValue &Hi) { SDLoc dl(N); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned NBitWidth = NVT.getSizeInBits(); - Lo = DAG.getNode(ISD::FLT_ROUNDS_, dl, {NVT, MVT::Other}, N->getOperand(0)); + Lo = DAG.getNode(ISD::GET_ROUNDING, dl, {NVT, MVT::Other}, N->getOperand(0)); SDValue Chain = Lo.getValue(1); - // The high part is the sign of Lo, as -1 is a valid value for FLT_ROUNDS + // The high part is the sign of Lo, as -1 is a valid value for GET_ROUNDING Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo, DAG.getShiftAmountConstant(NBitWidth - 1, NVT, dl)); @@ -3450,17 +3478,57 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_XINT_SAT(SDNode *N, SDValue &Lo, SplitInteger(Res, Lo, Hi); } -void DAGTypeLegalizer::ExpandIntRes_LLROUND_LLRINT(SDNode *N, SDValue &Lo, - SDValue &Hi) { - SDValue Op = N->getOperand(N->isStrictFPOpcode() ? 1 : 0); +void DAGTypeLegalizer::ExpandIntRes_XROUND_XRINT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDLoc dl(N); + bool IsStrict = N->isStrictFPOpcode(); + SDValue Op = N->getOperand(IsStrict ? 1 : 0); + SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); assert(getTypeAction(Op.getValueType()) != TargetLowering::TypePromoteFloat && "Input type needs to be promoted!"); EVT VT = Op.getValueType(); + if (VT == MVT::f16) { + VT = MVT::f32; + // Extend to f32. + if (IsStrict) { + Op = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, { VT, MVT::Other }, {Chain, Op}); + Chain = Op.getValue(1); + } else { + Op = DAG.getNode(ISD::FP_EXTEND, dl, VT, Op); + } + } + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; - if (N->getOpcode() == ISD::LLROUND || + if (N->getOpcode() == ISD::LROUND || + N->getOpcode() == ISD::STRICT_LROUND) { + if (VT == MVT::f32) + LC = RTLIB::LROUND_F32; + else if (VT == MVT::f64) + LC = RTLIB::LROUND_F64; + else if (VT == MVT::f80) + LC = RTLIB::LROUND_F80; + else if (VT == MVT::f128) + LC = RTLIB::LROUND_F128; + else if (VT == MVT::ppcf128) + LC = RTLIB::LROUND_PPCF128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected lround input type!"); + } else if (N->getOpcode() == ISD::LRINT || + N->getOpcode() == ISD::STRICT_LRINT) { + if (VT == MVT::f32) + LC = RTLIB::LRINT_F32; + else if (VT == MVT::f64) + LC = RTLIB::LRINT_F64; + else if (VT == MVT::f80) + LC = RTLIB::LRINT_F80; + else if (VT == MVT::f128) + LC = RTLIB::LRINT_F128; + else if (VT == MVT::ppcf128) + LC = RTLIB::LRINT_PPCF128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected lrint input type!"); + } else if (N->getOpcode() == ISD::LLROUND || N->getOpcode() == ISD::STRICT_LLROUND) { if (VT == MVT::f32) LC = RTLIB::LLROUND_F32; @@ -3489,9 +3557,7 @@ void DAGTypeLegalizer::ExpandIntRes_LLROUND_LLRINT(SDNode *N, SDValue &Lo, } else llvm_unreachable("Unexpected opcode!"); - SDLoc dl(N); EVT RetVT = N->getValueType(0); - SDValue Chain = N->isStrictFPOpcode() ? N->getOperand(0) : SDValue(); TargetLowering::MakeLibCallOptions CallOptions; CallOptions.setSExt(true); @@ -4046,70 +4112,6 @@ void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node, ReplaceValueWith(SDValue(Node, 1), Ovf); } -// Emit a call to __udivei4 and friends which require -// the arguments be based on the stack -// and extra argument that contains the number of bits of the operands. -// Returns the result of the call operation. -static SDValue ExpandExtIntRes_DIVREM(const TargetLowering &TLI, - const RTLIB::Libcall &LC, - SelectionDAG &DAG, SDNode *N, - const SDLoc &DL, const EVT &VT) { - - SDValue InChain = DAG.getEntryNode(); - - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - - // The signature of __udivei4 is - // void __udivei4(unsigned int *quo, unsigned int *a, unsigned int *b, - // unsigned int bits) - EVT ArgVT = N->op_begin()->getValueType(); - assert(ArgVT.isInteger() && ArgVT.getSizeInBits() > 128 && - "Unexpected argument type for lowering"); - Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); - - SDValue Output = DAG.CreateStackTemporary(ArgVT); - Entry.Node = Output; - Entry.Ty = ArgTy->getPointerTo(); - Entry.IsSExt = false; - Entry.IsZExt = false; - Args.push_back(Entry); - - for (const llvm::SDUse &Op : N->ops()) { - SDValue StackPtr = DAG.CreateStackTemporary(ArgVT); - InChain = DAG.getStore(InChain, DL, Op, StackPtr, MachinePointerInfo()); - Entry.Node = StackPtr; - Entry.Ty = ArgTy->getPointerTo(); - Entry.IsSExt = false; - Entry.IsZExt = false; - Args.push_back(Entry); - } - - int Bits = N->getOperand(0) - .getValueType() - .getTypeForEVT(*DAG.getContext()) - ->getIntegerBitWidth(); - Entry.Node = DAG.getConstant(Bits, DL, TLI.getPointerTy(DAG.getDataLayout())); - Entry.Ty = Type::getInt32Ty(*DAG.getContext()); - Entry.IsSExt = false; - Entry.IsZExt = true; - Args.push_back(Entry); - - SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), - TLI.getPointerTy(DAG.getDataLayout())); - - TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(DL) - .setChain(InChain) - .setLibCallee(TLI.getLibcallCallingConv(LC), - Type::getVoidTy(*DAG.getContext()), Callee, std::move(Args)) - .setDiscardResult(); - - SDValue Chain = TLI.LowerCallTo(CLI).second; - - return DAG.getLoad(ArgVT, DL, Chain, Output, MachinePointerInfo()); -} - void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); @@ -4131,14 +4133,6 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, LC = RTLIB::SDIV_I64; else if (VT == MVT::i128) LC = RTLIB::SDIV_I128; - - else { - SDValue Result = - ExpandExtIntRes_DIVREM(TLI, RTLIB::SDIV_IEXT, DAG, N, dl, VT); - SplitInteger(Result, Lo, Hi); - return; - } - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!"); TargetLowering::MakeLibCallOptions CallOptions; @@ -4146,6 +4140,111 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi); } +void DAGTypeLegalizer::ExpandIntRes_ShiftThroughStack(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDLoc dl(N); + SDValue Shiftee = N->getOperand(0); + EVT VT = Shiftee.getValueType(); + SDValue ShAmt = N->getOperand(1); + EVT ShAmtVT = ShAmt.getValueType(); + + // This legalization is optimal when the shift is by a multiple of byte width, + // %x * 8 <-> %x << 3 so 3 low bits should be be known zero. + bool ShiftByByteMultiple = + DAG.computeKnownBits(ShAmt).countMinTrailingZeros() >= 3; + + // If we can't do it as one step, we'll have two uses of shift amount, + // and thus must freeze it. + if (!ShiftByByteMultiple) + ShAmt = DAG.getFreeze(ShAmt); + + unsigned VTBitWidth = VT.getScalarSizeInBits(); + assert(VTBitWidth % 8 == 0 && "Shifting a not byte multiple value?"); + unsigned VTByteWidth = VTBitWidth / 8; + assert(isPowerOf2_32(VTByteWidth) && + "Shiftee type size is not a power of two!"); + unsigned StackSlotByteWidth = 2 * VTByteWidth; + unsigned StackSlotBitWidth = 8 * StackSlotByteWidth; + EVT StackSlotVT = EVT::getIntegerVT(*DAG.getContext(), StackSlotBitWidth); + + // Get a temporary stack slot 2x the width of our VT. + // FIXME: reuse stack slots? + // FIXME: should we be more picky about alignment? + Align StackSlotAlignment(1); + SDValue StackPtr = DAG.CreateStackTemporary( + TypeSize::getFixed(StackSlotByteWidth), StackSlotAlignment); + EVT PtrTy = StackPtr.getValueType(); + SDValue Ch = DAG.getEntryNode(); + + MachinePointerInfo StackPtrInfo = MachinePointerInfo::getFixedStack( + DAG.getMachineFunction(), + cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex()); + + // Extend the value, that is being shifted, to the entire stack slot's width. + SDValue Init; + if (N->getOpcode() != ISD::SHL) { + unsigned WideningOpc = + N->getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + Init = DAG.getNode(WideningOpc, dl, StackSlotVT, Shiftee); + } else { + // For left-shifts, pad the Shiftee's LSB with zeros to twice it's width. + SDValue AllZeros = DAG.getConstant(0, dl, VT); + Init = DAG.getNode(ISD::BUILD_PAIR, dl, StackSlotVT, AllZeros, Shiftee); + } + // And spill it into the stack slot. + Ch = DAG.getStore(Ch, dl, Init, StackPtr, StackPtrInfo, StackSlotAlignment); + + // Now, compute the full-byte offset into stack slot from where we can load. + // We have shift amount, which is in bits, but in multiples of byte. + // So just divide by CHAR_BIT. + SDNodeFlags Flags; + if (ShiftByByteMultiple) + Flags.setExact(true); + SDValue ByteOffset = DAG.getNode(ISD::SRL, dl, ShAmtVT, ShAmt, + DAG.getConstant(3, dl, ShAmtVT), Flags); + // And clamp it, because OOB load is an immediate UB, + // while shift overflow would have *just* been poison. + ByteOffset = DAG.getNode(ISD::AND, dl, ShAmtVT, ByteOffset, + DAG.getConstant(VTByteWidth - 1, dl, ShAmtVT)); + // We have exactly two strategies on indexing into stack slot here: + // 1. upwards starting from the beginning of the slot + // 2. downwards starting from the middle of the slot + // On little-endian machine, we pick 1. for right shifts and 2. for left-shift + // and vice versa on big-endian machine. + bool WillIndexUpwards = N->getOpcode() != ISD::SHL; + if (DAG.getDataLayout().isBigEndian()) + WillIndexUpwards = !WillIndexUpwards; + + SDValue AdjStackPtr; + if (WillIndexUpwards) { + AdjStackPtr = StackPtr; + } else { + AdjStackPtr = DAG.getMemBasePlusOffset( + StackPtr, DAG.getConstant(VTByteWidth, dl, PtrTy), dl); + ByteOffset = DAG.getNegative(ByteOffset, dl, ShAmtVT); + } + + // Get the pointer somewhere into the stack slot from which we need to load. + ByteOffset = DAG.getSExtOrTrunc(ByteOffset, dl, PtrTy); + AdjStackPtr = DAG.getMemBasePlusOffset(AdjStackPtr, ByteOffset, dl); + + // And load it! While the load is not legal, legalizing it is obvious. + SDValue Res = DAG.getLoad( + VT, dl, Ch, AdjStackPtr, + MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), Align(1)); + // We've performed the shift by a CHAR_BIT * [_ShAmt / CHAR_BIT_] + + // If we may still have a less-than-CHAR_BIT to shift by, do so now. + if (!ShiftByByteMultiple) { + SDValue ShAmtRem = DAG.getNode(ISD::AND, dl, ShAmtVT, ShAmt, + DAG.getConstant(7, dl, ShAmtVT)); + Res = DAG.getNode(N->getOpcode(), dl, VT, Res, ShAmtRem); + } + + // Finally, split the computed value. + SplitInteger(Res, Lo, Hi); +} + void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); @@ -4181,7 +4280,24 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, (Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) || Action == TargetLowering::Custom; - if (LegalOrCustom && TLI.shouldExpandShift(DAG, N)) { + unsigned ExpansionFactor = 1; + // That VT->NVT expansion is one step. But will we re-expand NVT? + for (EVT TmpVT = NVT;;) { + EVT NewTMPVT = TLI.getTypeToTransformTo(*DAG.getContext(), TmpVT); + if (NewTMPVT == TmpVT) + break; + TmpVT = NewTMPVT; + ++ExpansionFactor; + } + + TargetLowering::ShiftLegalizationStrategy S = + TLI.preferredShiftLegalizationStrategy(DAG, N, ExpansionFactor); + + if (S == TargetLowering::ShiftLegalizationStrategy::ExpandThroughStack) + return ExpandIntRes_ShiftThroughStack(N, Lo, Hi); + + if (LegalOrCustom && + S != TargetLowering::ShiftLegalizationStrategy::LowerToLibcall) { // Expand the subcomponents. SDValue LHSL, LHSH; GetExpandedInteger(N->getOperand(0), LHSL, LHSH); @@ -4330,14 +4446,6 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N, LC = RTLIB::SREM_I64; else if (VT == MVT::i128) LC = RTLIB::SREM_I128; - - else { - SDValue Result = - ExpandExtIntRes_DIVREM(TLI, RTLIB::SREM_IEXT, DAG, N, dl, VT); - SplitInteger(Result, Lo, Hi); - return; - } - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!"); TargetLowering::MakeLibCallOptions CallOptions; @@ -4428,7 +4536,10 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, else if (VT == MVT::i128) LC = RTLIB::MULO_I128; - if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC)) { + // If we don't have the libcall or if the function we are compiling is the + // implementation of the expected libcall (avoid inf-loop), expand inline. + if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC) || + TLI.getLibcallName(LC) == DAG.getMachineFunction().getName()) { // FIXME: This is not an optimal expansion, but better than crashing. EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2); @@ -4504,6 +4615,22 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, return; } + // Try to expand UDIV by constant. + if (isa<ConstantSDNode>(N->getOperand(1))) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + // Only if the new type is legal. + if (isTypeLegal(NVT)) { + SDValue InL, InH; + GetExpandedInteger(N->getOperand(0), InL, InH); + SmallVector<SDValue> Result; + if (TLI.expandDIVREMByConstant(N, Result, NVT, DAG, InL, InH)) { + Lo = Result[0]; + Hi = Result[1]; + return; + } + } + } + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (VT == MVT::i16) LC = RTLIB::UDIV_I16; @@ -4513,14 +4640,6 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, LC = RTLIB::UDIV_I64; else if (VT == MVT::i128) LC = RTLIB::UDIV_I128; - - else { - SDValue Result = - ExpandExtIntRes_DIVREM(TLI, RTLIB::UDIV_IEXT, DAG, N, dl, VT); - SplitInteger(Result, Lo, Hi); - return; - } - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!"); TargetLowering::MakeLibCallOptions CallOptions; @@ -4539,6 +4658,22 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, return; } + // Try to expand UREM by constant. + if (isa<ConstantSDNode>(N->getOperand(1))) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + // Only if the new type is legal. + if (isTypeLegal(NVT)) { + SDValue InL, InH; + GetExpandedInteger(N->getOperand(0), InL, InH); + SmallVector<SDValue> Result; + if (TLI.expandDIVREMByConstant(N, Result, NVT, DAG, InL, InH)) { + Lo = Result[0]; + Hi = Result[1]; + return; + } + } + } + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (VT == MVT::i16) LC = RTLIB::UREM_I16; @@ -4548,14 +4683,6 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, LC = RTLIB::UREM_I64; else if (VT == MVT::i128) LC = RTLIB::UREM_I128; - - else { - SDValue Result = - ExpandExtIntRes_DIVREM(TLI, RTLIB::UREM_IEXT, DAG, N, dl, VT); - SplitInteger(Result, Lo, Hi); - return; - } - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!"); TargetLowering::MakeLibCallOptions CallOptions; @@ -5294,7 +5421,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SHUFFLE(SDNode *N) { return DAG.getVectorShuffle(OutVT, dl, V0, V1, NewMask); } - SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) { EVT OutVT = N->getValueType(0); EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); @@ -5352,7 +5478,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_STEP_VECTOR(SDNode *N) { EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); assert(NOutVT.isScalableVector() && "Type must be promoted to a scalable vector type"); - APInt StepVal = cast<ConstantSDNode>(N->getOperand(0))->getAPIntValue(); + const APInt &StepVal = N->getConstantOperandAPInt(0); return DAG.getStepVector(dl, NOutVT, StepVal.sext(NOutVT.getScalarSizeInBits())); } diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 8fe9a83b9c3d..5e0349593139 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -722,9 +722,13 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { } void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { - assert(Result.getValueType() == - TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && +#ifndef NDEBUG + EVT VT = Result.getValueType(); + LLVMContext &Ctx = *DAG.getContext(); + assert((VT == EVT::getIntegerVT(Ctx, 80) || + VT == TLI.getTypeToTransformTo(Ctx, Op.getValueType())) && "Invalid type for softened float"); +#endif AnalyzeNewValue(Result); auto &OpIdEntry = SoftenedFloats[getTableId(Op)]; @@ -759,7 +763,7 @@ void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) { // a constant i8 operand. // We don't currently support the scalarization of scalable vector types. - assert(Result.getValueSizeInBits().getFixedSize() >= + assert(Result.getValueSizeInBits().getFixedValue() >= Op.getScalarValueSizeInBits() && "Invalid type for scalarized vector"); AnalyzeNewValue(Result); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 6696b79cf885..b97e44a01319 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -324,7 +324,7 @@ private: SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N); SDValue PromoteIntRes_FP_TO_XINT(SDNode *N); SDValue PromoteIntRes_FP_TO_XINT_SAT(SDNode *N); - SDValue PromoteIntRes_FP_TO_FP16(SDNode *N); + SDValue PromoteIntRes_FP_TO_FP16_BF16(SDNode *N); SDValue PromoteIntRes_FREEZE(SDNode *N); SDValue PromoteIntRes_INT_EXTEND(SDNode *N); SDValue PromoteIntRes_LOAD(LoadSDNode *N); @@ -354,7 +354,7 @@ private: SDValue PromoteIntRes_ADDSUBSHLSAT(SDNode *N); SDValue PromoteIntRes_MULFIX(SDNode *N); SDValue PromoteIntRes_DIVFIX(SDNode *N); - SDValue PromoteIntRes_FLT_ROUNDS(SDNode *N); + SDValue PromoteIntRes_GET_ROUNDING(SDNode *N); SDValue PromoteIntRes_VECREDUCE(SDNode *N); SDValue PromoteIntRes_VP_REDUCE(SDNode *N); SDValue PromoteIntRes_ABS(SDNode *N); @@ -437,11 +437,11 @@ private: void ExpandIntRes_SIGN_EXTEND_INREG (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_TRUNCATE (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_ZERO_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); - void ExpandIntRes_FLT_ROUNDS (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_GET_ROUNDING (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_FP_TO_SINT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_FP_TO_UINT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_FP_TO_XINT_SAT (SDNode *N, SDValue &Lo, SDValue &Hi); - void ExpandIntRes_LLROUND_LLRINT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_XROUND_XRINT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_Logical (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_ADDSUB (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -457,6 +457,7 @@ private: void ExpandIntRes_SREM (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_UDIV (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_UREM (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_ShiftThroughStack (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_Shift (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_MINMAX (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -862,6 +863,8 @@ private: void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, SDValue &Hi); + void SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD, SDValue &Lo, + SDValue &Hi); void SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue &Lo, SDValue &Hi); void SplitVecRes_Gather(MemSDNode *VPGT, SDValue &Lo, SDValue &Hi, bool SplitSETCC = false); @@ -891,6 +894,7 @@ private: SDValue SplitVecOp_ExtVecInRegOp(SDNode *N); SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo); SDValue SplitVecOp_VP_STORE(VPStoreSDNode *N, unsigned OpNo); + SDValue SplitVecOp_VP_STRIDED_STORE(VPStridedStoreSDNode *N, unsigned OpNo); SDValue SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo); SDValue SplitVecOp_Scatter(MemSDNode *N, unsigned OpNo); SDValue SplitVecOp_Gather(MemSDNode *MGT, unsigned OpNo); @@ -947,6 +951,7 @@ private: SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); SDValue WidenVecRes_LOAD(SDNode* N); SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N); + SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N); SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N); SDValue WidenVecRes_MGATHER(MaskedGatherSDNode* N); SDValue WidenVecRes_VP_GATHER(VPGatherSDNode* N); @@ -958,6 +963,7 @@ private: SDValue WidenVecRes_STRICT_FSETCC(SDNode* N); SDValue WidenVecRes_UNDEF(SDNode *N); SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N); + SDValue WidenVecRes_VECTOR_REVERSE(SDNode *N); SDValue WidenVecRes_Ternary(SDNode *N); SDValue WidenVecRes_Binary(SDNode *N); @@ -984,6 +990,7 @@ private: SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue WidenVecOp_STORE(SDNode* N); SDValue WidenVecOp_VP_STORE(SDNode *N, unsigned OpNo); + SDValue WidenVecOp_VP_STRIDED_STORE(SDNode *N, unsigned OpNo); SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo); SDValue WidenVecOp_MGATHER(SDNode* N, unsigned OpNo); SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index c6885677d644..21b5255c8f72 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -197,8 +197,7 @@ void DAGTypeLegalizer::ExpandRes_BUILD_PAIR(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::ExpandRes_EXTRACT_ELEMENT(SDNode *N, SDValue &Lo, SDValue &Hi) { GetExpandedOp(N->getOperand(0), Lo, Hi); - SDValue Part = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() ? - Hi : Lo; + SDValue Part = N->getConstantOperandVal(1) ? Hi : Lo; assert(Part.getValueType() == N->getValueType(0) && "Type twice as big as expanded type not itself expanded!"); @@ -209,7 +208,7 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_ELEMENT(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue OldVec = N->getOperand(0); - unsigned OldElts = OldVec.getValueType().getVectorNumElements(); + ElementCount OldEltCount = OldVec.getValueType().getVectorElementCount(); EVT OldEltVT = OldVec.getValueType().getVectorElementType(); SDLoc dl(N); @@ -223,14 +222,13 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, // the input vector. If so, extend the elements of the input vector to the // same bitwidth as the result before expanding. assert(OldEltVT.bitsLT(OldVT) && "Result type smaller then element type!"); - EVT NVecVT = EVT::getVectorVT(*DAG.getContext(), OldVT, OldElts); + EVT NVecVT = EVT::getVectorVT(*DAG.getContext(), OldVT, OldEltCount); OldVec = DAG.getNode(ISD::ANY_EXTEND, dl, NVecVT, N->getOperand(0)); } - SDValue NewVec = DAG.getNode(ISD::BITCAST, dl, - EVT::getVectorVT(*DAG.getContext(), - NewVT, 2*OldElts), - OldVec); + SDValue NewVec = DAG.getNode( + ISD::BITCAST, dl, + EVT::getVectorVT(*DAG.getContext(), NewVT, OldEltCount * 2), OldVec); // Extract the elements at 2 * Idx and 2 * Idx + 1 from the new vector. SDValue Idx = N->getOperand(1); @@ -359,8 +357,7 @@ SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) { SmallVector<SDValue, 8> Ops; IntegerToVector(N->getOperand(0), NumElts, Ops, NVT.getVectorElementType()); - SDValue Vec = - DAG.getBuildVector(NVT, dl, makeArrayRef(Ops.data(), NumElts)); + SDValue Vec = DAG.getBuildVector(NVT, dl, ArrayRef(Ops.data(), NumElts)); return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec); } @@ -403,7 +400,7 @@ SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) { SDValue DAGTypeLegalizer::ExpandOp_EXTRACT_ELEMENT(SDNode *N) { SDValue Lo, Hi; GetExpandedOp(N->getOperand(0), Lo, Hi); - return cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() ? Hi : Lo; + return N->getConstantOperandVal(1) ? Hi : Lo; } SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) { diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index f5a1eae1e7fe..e245b3cb4c6d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -132,6 +132,7 @@ class VectorLegalizer { SDValue ExpandVSELECT(SDNode *Node); SDValue ExpandVP_SELECT(SDNode *Node); SDValue ExpandVP_MERGE(SDNode *Node); + SDValue ExpandVP_REM(SDNode *Node); SDValue ExpandSELECT(SDNode *Node); std::pair<SDValue, SDValue> ExpandLoad(SDNode *N); SDValue ExpandStore(SDNode *N); @@ -492,7 +493,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { if (LowerOperationWrapper(Node, ResultVals)) break; LLVM_DEBUG(dbgs() << "Could not custom legalize node\n"); - LLVM_FALLTHROUGH; + [[fallthrough]]; case TargetLowering::Expand: LLVM_DEBUG(dbgs() << "Expanding\n"); Expand(Node, ResultVals); @@ -594,7 +595,8 @@ void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) { if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) || (VT.isVector() && VT.getVectorElementType().isFloatingPoint() && NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())) - Res = DAG.getNode(ISD::FP_ROUND, dl, VT, Res, DAG.getIntPtrConstant(0, dl)); + Res = DAG.getNode(ISD::FP_ROUND, dl, VT, Res, + DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)); else Res = DAG.getNode(ISD::BITCAST, dl, VT, Res); @@ -728,12 +730,22 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { case ISD::BSWAP: Results.push_back(ExpandBSWAP(Node)); return; + case ISD::VP_BSWAP: + Results.push_back(TLI.expandVPBSWAP(Node, DAG)); + return; case ISD::VSELECT: Results.push_back(ExpandVSELECT(Node)); return; case ISD::VP_SELECT: Results.push_back(ExpandVP_SELECT(Node)); return; + case ISD::VP_SREM: + case ISD::VP_UREM: + if (SDValue Expanded = ExpandVP_REM(Node)) { + Results.push_back(Expanded); + return; + } + break; case ISD::SELECT: Results.push_back(ExpandSELECT(Node)); return; @@ -776,12 +788,24 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { case ISD::BITREVERSE: ExpandBITREVERSE(Node, Results); return; + case ISD::VP_BITREVERSE: + if (SDValue Expanded = TLI.expandVPBITREVERSE(Node, DAG)) { + Results.push_back(Expanded); + return; + } + break; case ISD::CTPOP: if (SDValue Expanded = TLI.expandCTPOP(Node, DAG)) { Results.push_back(Expanded); return; } break; + case ISD::VP_CTPOP: + if (SDValue Expanded = TLI.expandVPCTPOP(Node, DAG)) { + Results.push_back(Expanded); + return; + } + break; case ISD::CTLZ: case ISD::CTLZ_ZERO_UNDEF: if (SDValue Expanded = TLI.expandCTLZ(Node, DAG)) { @@ -789,6 +813,13 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { return; } break; + case ISD::VP_CTLZ: + case ISD::VP_CTLZ_ZERO_UNDEF: + if (SDValue Expanded = TLI.expandVPCTLZ(Node, DAG)) { + Results.push_back(Expanded); + return; + } + break; case ISD::CTTZ: case ISD::CTTZ_ZERO_UNDEF: if (SDValue Expanded = TLI.expandCTTZ(Node, DAG)) { @@ -796,8 +827,17 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { return; } break; + case ISD::VP_CTTZ: + case ISD::VP_CTTZ_ZERO_UNDEF: + if (SDValue Expanded = TLI.expandVPCTTZ(Node, DAG)) { + Results.push_back(Expanded); + return; + } + break; case ISD::FSHL: + case ISD::VP_FSHL: case ISD::FSHR: + case ISD::VP_FSHR: if (SDValue Expanded = TLI.expandFunnelShift(Node, DAG)) { Results.push_back(Expanded); return; @@ -847,6 +887,13 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { return; } break; + case ISD::USHLSAT: + case ISD::SSHLSAT: + if (SDValue Expanded = TLI.expandShlSat(Node, DAG)) { + Results.push_back(Expanded); + return; + } + break; case ISD::FP_TO_SINT_SAT: case ISD::FP_TO_UINT_SAT: // Expand the fpsosisat if it is scalable to prevent it from unrolling below. @@ -954,10 +1001,7 @@ SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) { DAG.getConstant(0, DL, BitTy)); // Broadcast the mask so that the entire vector is all one or all zero. - if (VT.isFixedLengthVector()) - Mask = DAG.getSplatBuildVector(MaskTy, DL, Mask); - else - Mask = DAG.getSplatVector(MaskTy, DL, Mask); + Mask = DAG.getSplat(MaskTy, DL, Mask); // Bitcast the operands to be the same type as the mask. // This is needed when we select between FP types because @@ -1300,8 +1344,7 @@ SDValue VectorLegalizer::ExpandVP_MERGE(SDNode *Node) { return DAG.UnrollVectorOp(Node); SDValue StepVec = DAG.getStepVector(DL, EVLVecVT); - SDValue SplatEVL = IsFixedLen ? DAG.getSplatBuildVector(EVLVecVT, DL, EVL) - : DAG.getSplatVector(EVLVecVT, DL, EVL); + SDValue SplatEVL = DAG.getSplat(EVLVecVT, DL, EVL); SDValue EVLMask = DAG.getSetCC(DL, MaskVT, StepVec, SplatEVL, ISD::CondCode::SETULT); @@ -1309,6 +1352,30 @@ SDValue VectorLegalizer::ExpandVP_MERGE(SDNode *Node) { return DAG.getSelect(DL, Node->getValueType(0), FullMask, Op1, Op2); } +SDValue VectorLegalizer::ExpandVP_REM(SDNode *Node) { + // Implement VP_SREM/UREM in terms of VP_SDIV/VP_UDIV, VP_MUL, VP_SUB. + EVT VT = Node->getValueType(0); + + unsigned DivOpc = Node->getOpcode() == ISD::VP_SREM ? ISD::VP_SDIV : ISD::VP_UDIV; + + if (!TLI.isOperationLegalOrCustom(DivOpc, VT) || + !TLI.isOperationLegalOrCustom(ISD::VP_MUL, VT) || + !TLI.isOperationLegalOrCustom(ISD::VP_SUB, VT)) + return SDValue(); + + SDLoc DL(Node); + + SDValue Dividend = Node->getOperand(0); + SDValue Divisor = Node->getOperand(1); + SDValue Mask = Node->getOperand(2); + SDValue EVL = Node->getOperand(3); + + // X % Y -> X-X/Y*Y + SDValue Div = DAG.getNode(DivOpc, DL, VT, Dividend, Divisor, Mask, EVL); + SDValue Mul = DAG.getNode(ISD::VP_MUL, DL, VT, Divisor, Div, Mask, EVL); + return DAG.getNode(ISD::VP_SUB, DL, VT, Dividend, Mul, Mask, EVL); +} + void VectorLegalizer::ExpandFP_TO_UINT(SDNode *Node, SmallVectorImpl<SDValue> &Results) { // Attempt to expand using TargetLowering. diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 143abc08eeea..af5ea1ce5f45 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -27,6 +27,8 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TypeSize.h" #include "llvm/Support/raw_ostream.h" +#include <numeric> + using namespace llvm; #define DEBUG_TYPE "legalize-types" @@ -975,6 +977,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::VP_LOAD: SplitVecRes_VP_LOAD(cast<VPLoadSDNode>(N), Lo, Hi); break; + case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: + SplitVecRes_VP_STRIDED_LOAD(cast<VPStridedLoadSDNode>(N), Lo, Hi); + break; case ISD::MLOAD: SplitVecRes_MLOAD(cast<MaskedLoadSDNode>(N), Lo, Hi); break; @@ -1006,23 +1011,34 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { break; case ISD::ABS: + case ISD::VP_ABS: case ISD::BITREVERSE: + case ISD::VP_BITREVERSE: case ISD::BSWAP: + case ISD::VP_BSWAP: case ISD::CTLZ: + case ISD::VP_CTLZ: case ISD::CTTZ: + case ISD::VP_CTTZ: case ISD::CTLZ_ZERO_UNDEF: + case ISD::VP_CTLZ_ZERO_UNDEF: case ISD::CTTZ_ZERO_UNDEF: + case ISD::VP_CTTZ_ZERO_UNDEF: case ISD::CTPOP: - case ISD::FABS: + case ISD::VP_CTPOP: + case ISD::FABS: case ISD::VP_FABS: case ISD::FCEIL: + case ISD::VP_FCEIL: case ISD::FCOS: case ISD::FEXP: case ISD::FEXP2: case ISD::FFLOOR: + case ISD::VP_FFLOOR: case ISD::FLOG: case ISD::FLOG10: case ISD::FLOG2: case ISD::FNEARBYINT: + case ISD::VP_FNEARBYINT: case ISD::FNEG: case ISD::VP_FNEG: case ISD::FREEZE: case ISD::ARITH_FENCE: @@ -1031,21 +1047,25 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FP_ROUND: case ISD::VP_FP_ROUND: case ISD::FP_TO_SINT: - case ISD::VP_FPTOSI: + case ISD::VP_FP_TO_SINT: case ISD::FP_TO_UINT: - case ISD::VP_FPTOUI: + case ISD::VP_FP_TO_UINT: case ISD::FRINT: + case ISD::VP_FRINT: case ISD::FROUND: + case ISD::VP_FROUND: case ISD::FROUNDEVEN: + case ISD::VP_FROUNDEVEN: case ISD::FSIN: - case ISD::FSQRT: + case ISD::FSQRT: case ISD::VP_SQRT: case ISD::FTRUNC: + case ISD::VP_FROUNDTOZERO: case ISD::SINT_TO_FP: - case ISD::VP_SITOFP: + case ISD::VP_SINT_TO_FP: case ISD::TRUNCATE: case ISD::VP_TRUNCATE: case ISD::UINT_TO_FP: - case ISD::VP_UITOFP: + case ISD::VP_UINT_TO_FP: case ISD::FCANONICALIZE: SplitVecRes_UnaryOp(N, Lo, Hi); break; @@ -1066,8 +1086,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FADD: case ISD::VP_FADD: case ISD::FSUB: case ISD::VP_FSUB: case ISD::FMUL: case ISD::VP_FMUL: - case ISD::FMINNUM: - case ISD::FMAXNUM: + case ISD::FMINNUM: case ISD::VP_FMINNUM: + case ISD::FMAXNUM: case ISD::VP_FMAXNUM: case ISD::FMINIMUM: case ISD::FMAXIMUM: case ISD::SDIV: case ISD::VP_SDIV: @@ -1083,10 +1103,10 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::UREM: case ISD::VP_UREM: case ISD::SREM: case ISD::VP_SREM: case ISD::FREM: case ISD::VP_FREM: - case ISD::SMIN: - case ISD::SMAX: - case ISD::UMIN: - case ISD::UMAX: + case ISD::SMIN: case ISD::VP_SMIN: + case ISD::SMAX: case ISD::VP_SMAX: + case ISD::UMIN: case ISD::VP_UMIN: + case ISD::UMAX: case ISD::VP_UMAX: case ISD::SADDSAT: case ISD::UADDSAT: case ISD::SSUBSAT: @@ -1095,11 +1115,14 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::USHLSAT: case ISD::ROTL: case ISD::ROTR: + case ISD::VP_FCOPYSIGN: SplitVecRes_BinOp(N, Lo, Hi); break; case ISD::FMA: case ISD::VP_FMA: case ISD::FSHL: + case ISD::VP_FSHL: case ISD::FSHR: + case ISD::VP_FSHR: SplitVecRes_TernaryOp(N, Lo, Hi); break; @@ -1143,13 +1166,13 @@ void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT, MachinePointerInfo &MPI, SDValue &Ptr, uint64_t *ScaledOffset) { SDLoc DL(N); - unsigned IncrementSize = MemVT.getSizeInBits().getKnownMinSize() / 8; + unsigned IncrementSize = MemVT.getSizeInBits().getKnownMinValue() / 8; if (MemVT.isScalableVector()) { SDNodeFlags Flags; SDValue BytesIncrement = DAG.getVScale( DL, Ptr.getValueType(), - APInt(Ptr.getValueSizeInBits().getFixedSize(), IncrementSize)); + APInt(Ptr.getValueSizeInBits().getFixedValue(), IncrementSize)); MPI = MachinePointerInfo(N->getPointerInfo().getAddrSpace()); Flags.setNoUnsignedWrap(true); if (ScaledOffset) @@ -1465,7 +1488,11 @@ void DAGTypeLegalizer::SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo, SDLoc DL(N); SDValue ArgLo, ArgHi; SDValue Test = N->getOperand(1); - GetSplitVector(N->getOperand(0), ArgLo, ArgHi); + SDValue FpValue = N->getOperand(0); + if (getTypeAction(FpValue.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(FpValue, ArgLo, ArgHi); + else + std::tie(ArgLo, ArgHi) = DAG.SplitVector(FpValue, SDLoc(FpValue)); EVT LoVT, HiVT; std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); @@ -1900,7 +1927,7 @@ void DAGTypeLegalizer::SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, MPI = MachinePointerInfo(LD->getPointerInfo().getAddrSpace()); else MPI = LD->getPointerInfo().getWithOffset( - LoMemVT.getStoreSize().getFixedSize()); + LoMemVT.getStoreSize().getFixedValue()); MMO = DAG.getMachineFunction().getMachineMemOperand( MPI, MachineMemOperand::MOLoad, MemoryLocation::UnknownSize, Alignment, @@ -1921,6 +1948,87 @@ void DAGTypeLegalizer::SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, ReplaceValueWith(SDValue(LD, 1), Ch); } +void DAGTypeLegalizer::SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD, + SDValue &Lo, SDValue &Hi) { + assert(SLD->isUnindexed() && + "Indexed VP strided load during type legalization!"); + assert(SLD->getOffset().isUndef() && + "Unexpected indexed variable-length load offset"); + + SDLoc DL(SLD); + + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(SLD->getValueType(0)); + + EVT LoMemVT, HiMemVT; + bool HiIsEmpty = false; + std::tie(LoMemVT, HiMemVT) = + DAG.GetDependentSplitDestVTs(SLD->getMemoryVT(), LoVT, &HiIsEmpty); + + SDValue Mask = SLD->getMask(); + SDValue LoMask, HiMask; + if (Mask.getOpcode() == ISD::SETCC) { + SplitVecRes_SETCC(Mask.getNode(), LoMask, HiMask); + } else { + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Mask, LoMask, HiMask); + else + std::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL); + } + + SDValue LoEVL, HiEVL; + std::tie(LoEVL, HiEVL) = + DAG.SplitEVL(SLD->getVectorLength(), SLD->getValueType(0), DL); + + // Generate the low vp_strided_load + Lo = DAG.getStridedLoadVP( + SLD->getAddressingMode(), SLD->getExtensionType(), LoVT, DL, + SLD->getChain(), SLD->getBasePtr(), SLD->getOffset(), SLD->getStride(), + LoMask, LoEVL, LoMemVT, SLD->getMemOperand(), SLD->isExpandingLoad()); + + if (HiIsEmpty) { + // The high vp_strided_load has zero storage size. We therefore simply set + // it to the low vp_strided_load and rely on subsequent removal from the + // chain. + Hi = Lo; + } else { + // Generate the high vp_strided_load. + // To calculate the high base address, we need to sum to the low base + // address stride number of bytes for each element already loaded by low, + // that is: Ptr = Ptr + (LoEVL * Stride) + EVT PtrVT = SLD->getBasePtr().getValueType(); + SDValue Increment = + DAG.getNode(ISD::MUL, DL, PtrVT, LoEVL, + DAG.getSExtOrTrunc(SLD->getStride(), DL, PtrVT)); + SDValue Ptr = + DAG.getNode(ISD::ADD, DL, PtrVT, SLD->getBasePtr(), Increment); + + Align Alignment = SLD->getOriginalAlign(); + if (LoMemVT.isScalableVector()) + Alignment = commonAlignment( + Alignment, LoMemVT.getSizeInBits().getKnownMinValue() / 8); + + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo(SLD->getPointerInfo().getAddrSpace()), + MachineMemOperand::MOLoad, MemoryLocation::UnknownSize, Alignment, + SLD->getAAInfo(), SLD->getRanges()); + + Hi = DAG.getStridedLoadVP(SLD->getAddressingMode(), SLD->getExtensionType(), + HiVT, DL, SLD->getChain(), Ptr, SLD->getOffset(), + SLD->getStride(), HiMask, HiEVL, HiMemVT, MMO, + SLD->isExpandingLoad()); + } + + // Build a factor node to remember that this load is independent of the + // other one. + SDValue Ch = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(SLD, 1), Ch); +} + void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue &Lo, SDValue &Hi) { assert(MLD->isUnindexed() && "Indexed masked load during type legalization!"); @@ -1983,7 +2091,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, MPI = MachinePointerInfo(MLD->getPointerInfo().getAddrSpace()); else MPI = MLD->getPointerInfo().getWithOffset( - LoMemVT.getStoreSize().getFixedSize()); + LoMemVT.getStoreSize().getFixedValue()); MMO = DAG.getMachineFunction().getMachineMemOperand( MPI, MachineMemOperand::MOLoad, MemoryLocation::UnknownSize, Alignment, @@ -2286,13 +2394,13 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, // If Lo or Hi uses elements from at most two of the four input vectors, then // express it as a vector shuffle of those two inputs. Otherwise extract the // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR. - SmallVector<int> OrigMask(N->getMask().begin(), N->getMask().end()); + SmallVector<int> OrigMask(N->getMask()); // Try to pack incoming shuffles/inputs. auto &&TryPeekThroughShufflesInputs = [&Inputs, &NewVT, this, NewElts, &DL](SmallVectorImpl<int> &Mask) { // Check if all inputs are shuffles of the same operands or non-shuffles. MapVector<std::pair<SDValue, SDValue>, SmallVector<unsigned>> ShufflesIdxs; - for (unsigned Idx = 0; Idx < array_lengthof(Inputs); ++Idx) { + for (unsigned Idx = 0; Idx < std::size(Inputs); ++Idx) { SDValue Input = Inputs[Idx]; auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(Input.getNode()); if (!Shuffle || @@ -2339,7 +2447,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, ShufflesIdxs[std::make_pair(P.first.second, P.first.first)].clear(); } // Check if any concat_vectors can be simplified. - SmallBitVector UsedSubVector(2 * array_lengthof(Inputs)); + SmallBitVector UsedSubVector(2 * std::size(Inputs)); for (int &Idx : Mask) { if (Idx == UndefMaskElem) continue; @@ -2359,7 +2467,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, } if (UsedSubVector.count() > 1) { SmallVector<SmallVector<std::pair<unsigned, int>, 2>> Pairs; - for (unsigned I = 0; I < array_lengthof(Inputs); ++I) { + for (unsigned I = 0; I < std::size(Inputs); ++I) { if (UsedSubVector.test(2 * I) == UsedSubVector.test(2 * I + 1)) continue; if (Pairs.empty() || Pairs.back().size() == 2) @@ -2403,7 +2511,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, // Try to remove extra shuffles (except broadcasts) and shuffles with the // reused operands. Changed = false; - for (unsigned I = 0; I < array_lengthof(Inputs); ++I) { + for (unsigned I = 0; I < std::size(Inputs); ++I) { auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(Inputs[I].getNode()); if (!Shuffle) continue; @@ -2495,15 +2603,15 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, NewElts](SmallVectorImpl<int> &Mask) { SetVector<SDValue> UniqueInputs; SetVector<SDValue> UniqueConstantInputs; - for (unsigned I = 0; I < array_lengthof(Inputs); ++I) { - if (IsConstant(Inputs[I])) - UniqueConstantInputs.insert(Inputs[I]); - else if (!Inputs[I].isUndef()) - UniqueInputs.insert(Inputs[I]); + for (const auto &I : Inputs) { + if (IsConstant(I)) + UniqueConstantInputs.insert(I); + else if (!I.isUndef()) + UniqueInputs.insert(I); } // Adjust mask in case of reused inputs. Also, need to insert constant // inputs at first, otherwise it affects the final outcome. - if (UniqueInputs.size() != array_lengthof(Inputs)) { + if (UniqueInputs.size() != std::size(Inputs)) { auto &&UniqueVec = UniqueInputs.takeVector(); auto &&UniqueConstantVec = UniqueConstantInputs.takeVector(); unsigned ConstNum = UniqueConstantVec.size(); @@ -2541,8 +2649,8 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, // Build a shuffle mask for the output, discovering on the fly which // input vectors to use as shuffle operands. unsigned FirstMaskIdx = High * NewElts; - SmallVector<int> Mask(NewElts * array_lengthof(Inputs), UndefMaskElem); - copy(makeArrayRef(OrigMask).slice(FirstMaskIdx, NewElts), Mask.begin()); + SmallVector<int> Mask(NewElts * std::size(Inputs), UndefMaskElem); + copy(ArrayRef(OrigMask).slice(FirstMaskIdx, NewElts), Mask.begin()); assert(!Output && "Expected default initialized initial value."); TryPeekThroughShufflesInputs(Mask); MakeUniqueInputs(Mask); @@ -2561,7 +2669,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, return SecondIteration; }; processShuffleMasks( - Mask, array_lengthof(Inputs), array_lengthof(Inputs), + Mask, std::size(Inputs), std::size(Inputs), /*NumOfUsedRegs=*/1, [&Output, &DAG = DAG, NewVT]() { Output = DAG.getUNDEF(NewVT); }, [&Output, &DAG = DAG, NewVT, &DL, &Inputs, @@ -2707,6 +2815,9 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::VP_STORE: Res = SplitVecOp_VP_STORE(cast<VPStoreSDNode>(N), OpNo); break; + case ISD::EXPERIMENTAL_VP_STRIDED_STORE: + Res = SplitVecOp_VP_STRIDED_STORE(cast<VPStridedStoreSDNode>(N), OpNo); + break; case ISD::MSTORE: Res = SplitVecOp_MSTORE(cast<MaskedStoreSDNode>(N), OpNo); break; @@ -2725,6 +2836,8 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::STRICT_UINT_TO_FP: case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: + case ISD::VP_SINT_TO_FP: + case ISD::VP_UINT_TO_FP: if (N->getValueType(0).bitsLT( N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType())) Res = SplitVecOp_TruncateHelper(N); @@ -2737,6 +2850,8 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { break; case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: + case ISD::VP_FP_TO_SINT: + case ISD::VP_FP_TO_UINT: case ISD::STRICT_FP_TO_SINT: case ISD::STRICT_FP_TO_UINT: case ISD::STRICT_FP_EXTEND: @@ -2999,29 +3114,57 @@ SDValue DAGTypeLegalizer::SplitVecOp_INSERT_SUBVECTOR(SDNode *N, SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) { // We know that the extracted result type is legal. EVT SubVT = N->getValueType(0); - SDValue Idx = N->getOperand(1); SDLoc dl(N); SDValue Lo, Hi; - if (SubVT.isScalableVector() != - N->getOperand(0).getValueType().isScalableVector()) - report_fatal_error("Extracting a fixed-length vector from an illegal " - "scalable vector is not yet supported"); - GetSplitVector(N->getOperand(0), Lo, Hi); - uint64_t LoElts = Lo.getValueType().getVectorMinNumElements(); + uint64_t LoEltsMin = Lo.getValueType().getVectorMinNumElements(); uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); - if (IdxVal < LoElts) { - assert(IdxVal + SubVT.getVectorMinNumElements() <= LoElts && + if (IdxVal < LoEltsMin) { + assert(IdxVal + SubVT.getVectorMinNumElements() <= LoEltsMin && "Extracted subvector crosses vector split!"); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx); - } else { + } else if (SubVT.isScalableVector() == + N->getOperand(0).getValueType().isScalableVector()) return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Hi, - DAG.getVectorIdxConstant(IdxVal - LoElts, dl)); - } + DAG.getVectorIdxConstant(IdxVal - LoEltsMin, dl)); + + // After this point the DAG node only permits extracting fixed-width + // subvectors from scalable vectors. + assert(SubVT.isFixedLengthVector() && + "Extracting scalable subvector from fixed-width unsupported"); + + // If the element type is i1 and we're not promoting the result, then we may + // end up loading the wrong data since the bits are packed tightly into + // bytes. For example, if we extract a v4i1 (legal) from a nxv4i1 (legal) + // type at index 4, then we will load a byte starting at index 0. + if (SubVT.getScalarType() == MVT::i1) + report_fatal_error("Don't know how to extract fixed-width predicate " + "subvector from a scalable predicate vector"); + + // Spill the vector to the stack. We should use the alignment for + // the smallest part. + SDValue Vec = N->getOperand(0); + EVT VecVT = Vec.getValueType(); + Align SmallestAlign = DAG.getReducedAlign(VecVT, /*UseABI=*/false); + SDValue StackPtr = + DAG.CreateStackTemporary(VecVT.getStoreSize(), SmallestAlign); + auto &MF = DAG.getMachineFunction(); + auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); + auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex); + + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo, + SmallestAlign); + + // Extract the subvector by loading the correct part. + StackPtr = TLI.getVectorSubVecPointer(DAG, StackPtr, VecVT, SubVT, Idx); + + return DAG.getLoad( + SubVT, dl, Store, StackPtr, + MachinePointerInfo::getUnknownStack(DAG.getMachineFunction())); } SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { @@ -3029,8 +3172,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { SDValue Idx = N->getOperand(1); EVT VecVT = Vec.getValueType(); - if (isa<ConstantSDNode>(Idx)) { - uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); + if (const ConstantSDNode *Index = dyn_cast<ConstantSDNode>(Idx)) { + uint64_t IdxVal = Index->getZExtValue(); SDValue Lo, Hi; GetSplitVector(Vec, Lo, Hi); @@ -3167,11 +3310,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_VP_STORE(VPStoreSDNode *N, unsigned OpNo) { MachinePointerInfo MPI; if (LoMemVT.isScalableVector()) { Alignment = commonAlignment(Alignment, - LoMemVT.getSizeInBits().getKnownMinSize() / 8); + LoMemVT.getSizeInBits().getKnownMinValue() / 8); MPI = MachinePointerInfo(N->getPointerInfo().getAddrSpace()); } else MPI = N->getPointerInfo().getWithOffset( - LoMemVT.getStoreSize().getFixedSize()); + LoMemVT.getStoreSize().getFixedValue()); MMO = DAG.getMachineFunction().getMachineMemOperand( MPI, MachineMemOperand::MOStore, MemoryLocation::UnknownSize, Alignment, @@ -3186,6 +3329,80 @@ SDValue DAGTypeLegalizer::SplitVecOp_VP_STORE(VPStoreSDNode *N, unsigned OpNo) { return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); } +SDValue DAGTypeLegalizer::SplitVecOp_VP_STRIDED_STORE(VPStridedStoreSDNode *N, + unsigned OpNo) { + assert(N->isUnindexed() && "Indexed vp_strided_store of a vector?"); + assert(N->getOffset().isUndef() && "Unexpected VP strided store offset"); + + SDLoc DL(N); + + SDValue Data = N->getValue(); + SDValue LoData, HiData; + if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Data, LoData, HiData); + else + std::tie(LoData, HiData) = DAG.SplitVector(Data, DL); + + EVT LoMemVT, HiMemVT; + bool HiIsEmpty = false; + std::tie(LoMemVT, HiMemVT) = DAG.GetDependentSplitDestVTs( + N->getMemoryVT(), LoData.getValueType(), &HiIsEmpty); + + SDValue Mask = N->getMask(); + SDValue LoMask, HiMask; + if (OpNo == 1 && Mask.getOpcode() == ISD::SETCC) + SplitVecRes_SETCC(Mask.getNode(), LoMask, HiMask); + else if (getTypeAction(Mask.getValueType()) == + TargetLowering::TypeSplitVector) + GetSplitVector(Mask, LoMask, HiMask); + else + std::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL); + + SDValue LoEVL, HiEVL; + std::tie(LoEVL, HiEVL) = + DAG.SplitEVL(N->getVectorLength(), Data.getValueType(), DL); + + // Generate the low vp_strided_store + SDValue Lo = DAG.getStridedStoreVP( + N->getChain(), DL, LoData, N->getBasePtr(), N->getOffset(), + N->getStride(), LoMask, LoEVL, LoMemVT, N->getMemOperand(), + N->getAddressingMode(), N->isTruncatingStore(), N->isCompressingStore()); + + // If the high vp_strided_store has zero storage size, only the low + // vp_strided_store is needed. + if (HiIsEmpty) + return Lo; + + // Generate the high vp_strided_store. + // To calculate the high base address, we need to sum to the low base + // address stride number of bytes for each element already stored by low, + // that is: Ptr = Ptr + (LoEVL * Stride) + EVT PtrVT = N->getBasePtr().getValueType(); + SDValue Increment = + DAG.getNode(ISD::MUL, DL, PtrVT, LoEVL, + DAG.getSExtOrTrunc(N->getStride(), DL, PtrVT)); + SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, N->getBasePtr(), Increment); + + Align Alignment = N->getOriginalAlign(); + if (LoMemVT.isScalableVector()) + Alignment = commonAlignment(Alignment, + LoMemVT.getSizeInBits().getKnownMinValue() / 8); + + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo(N->getPointerInfo().getAddrSpace()), + MachineMemOperand::MOStore, MemoryLocation::UnknownSize, Alignment, + N->getAAInfo(), N->getRanges()); + + SDValue Hi = DAG.getStridedStoreVP( + N->getChain(), DL, HiData, Ptr, N->getOffset(), N->getStride(), HiMask, + HiEVL, HiMemVT, MMO, N->getAddressingMode(), N->isTruncatingStore(), + N->isCompressingStore()); + + // Build a factor node to remember that this store is independent of the + // other one. + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); +} + SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo) { assert(N->isUnindexed() && "Indexed masked store of vector?"); @@ -3243,11 +3460,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, MachinePointerInfo MPI; if (LoMemVT.isScalableVector()) { Alignment = commonAlignment( - Alignment, LoMemVT.getSizeInBits().getKnownMinSize() / 8); + Alignment, LoMemVT.getSizeInBits().getKnownMinValue() / 8); MPI = MachinePointerInfo(N->getPointerInfo().getAddrSpace()); } else MPI = N->getPointerInfo().getWithOffset( - LoMemVT.getStoreSize().getFixedSize()); + LoMemVT.getStoreSize().getFixedValue()); MMO = DAG.getMachineFunction().getMachineMemOperand( MPI, MachineMemOperand::MOStore, MemoryLocation::UnknownSize, Alignment, @@ -3593,7 +3810,26 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) { SDValue DAGTypeLegalizer::SplitVecOp_FCOPYSIGN(SDNode *N) { // The result (and the first input) has a legal vector type, but the second // input needs splitting. - return DAG.UnrollVectorOp(N, N->getValueType(0).getVectorNumElements()); + + SDLoc DL(N); + + EVT LHSLoVT, LHSHiVT; + std::tie(LHSLoVT, LHSHiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + + if (!isTypeLegal(LHSLoVT) || !isTypeLegal(LHSHiVT)) + return DAG.UnrollVectorOp(N, N->getValueType(0).getVectorNumElements()); + + SDValue LHSLo, LHSHi; + std::tie(LHSLo, LHSHi) = + DAG.SplitVector(N->getOperand(0), DL, LHSLoVT, LHSHiVT); + + SDValue RHSLo, RHSHi; + std::tie(RHSLo, RHSHi) = DAG.SplitVector(N->getOperand(1), DL); + + SDValue Lo = DAG.getNode(ISD::FCOPYSIGN, DL, LHSLoVT, LHSLo, RHSLo); + SDValue Hi = DAG.getNode(ISD::FCOPYSIGN, DL, LHSHiVT, LHSHi, RHSHi); + + return DAG.getNode(ISD::CONCAT_VECTORS, DL, N->getValueType(0), Lo, Hi); } SDValue DAGTypeLegalizer::SplitVecOp_FP_TO_XINT_SAT(SDNode *N) { @@ -3683,6 +3919,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::VP_LOAD: Res = WidenVecRes_VP_LOAD(cast<VPLoadSDNode>(N)); break; + case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: + Res = WidenVecRes_VP_STRIDED_LOAD(cast<VPStridedLoadSDNode>(N)); + break; case ISD::MLOAD: Res = WidenVecRes_MLOAD(cast<MaskedLoadSDNode>(N)); break; @@ -3692,6 +3931,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::VP_GATHER: Res = WidenVecRes_VP_GATHER(cast<VPGatherSDNode>(N)); break; + case ISD::VECTOR_REVERSE: + Res = WidenVecRes_VECTOR_REVERSE(N); + break; case ISD::ADD: case ISD::VP_ADD: case ISD::AND: case ISD::VP_AND: @@ -3704,14 +3946,14 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::SHL: case ISD::VP_SHL: case ISD::SRA: case ISD::VP_ASHR: case ISD::SRL: case ISD::VP_LSHR: - case ISD::FMINNUM: - case ISD::FMAXNUM: + case ISD::FMINNUM: case ISD::VP_FMINNUM: + case ISD::FMAXNUM: case ISD::VP_FMAXNUM: case ISD::FMINIMUM: case ISD::FMAXIMUM: - case ISD::SMIN: - case ISD::SMAX: - case ISD::UMIN: - case ISD::UMAX: + case ISD::SMIN: case ISD::VP_SMIN: + case ISD::SMAX: case ISD::VP_SMAX: + case ISD::UMIN: case ISD::VP_UMIN: + case ISD::UMAX: case ISD::VP_UMAX: case ISD::UADDSAT: case ISD::SADDSAT: case ISD::USUBSAT: @@ -3738,6 +3980,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::VP_FMUL: case ISD::VP_FDIV: case ISD::VP_FREM: + case ISD::VP_FCOPYSIGN: Res = WidenVecRes_Binary(N); break; @@ -3748,7 +3991,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { // If the target has custom/legal support for the scalar FP intrinsic ops // (they are probably not destined to become libcalls), then widen those // like any other binary ops. - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::FADD: case ISD::FMUL: @@ -3809,17 +4052,17 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::FP_ROUND: case ISD::VP_FP_ROUND: case ISD::FP_TO_SINT: - case ISD::VP_FPTOSI: + case ISD::VP_FP_TO_SINT: case ISD::FP_TO_UINT: - case ISD::VP_FPTOUI: + case ISD::VP_FP_TO_UINT: case ISD::SIGN_EXTEND: case ISD::VP_SIGN_EXTEND: case ISD::SINT_TO_FP: - case ISD::VP_SITOFP: + case ISD::VP_SINT_TO_FP: case ISD::VP_TRUNCATE: case ISD::TRUNCATE: case ISD::UINT_TO_FP: - case ISD::VP_UITOFP: + case ISD::VP_UINT_TO_FP: case ISD::ZERO_EXTEND: case ISD::VP_ZERO_EXTEND: Res = WidenVecRes_Convert(N); @@ -3851,17 +4094,34 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { // If the target has custom/legal support for the scalar FP intrinsic ops // (they are probably not destined to become libcalls), then widen those // like any other unary ops. - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::ABS: + case ISD::VP_ABS: case ISD::BITREVERSE: + case ISD::VP_BITREVERSE: case ISD::BSWAP: + case ISD::VP_BSWAP: case ISD::CTLZ: + case ISD::VP_CTLZ: case ISD::CTLZ_ZERO_UNDEF: + case ISD::VP_CTLZ_ZERO_UNDEF: case ISD::CTPOP: + case ISD::VP_CTPOP: case ISD::CTTZ: + case ISD::VP_CTTZ: case ISD::CTTZ_ZERO_UNDEF: + case ISD::VP_CTTZ_ZERO_UNDEF: case ISD::FNEG: case ISD::VP_FNEG: + case ISD::VP_FABS: + case ISD::VP_SQRT: + case ISD::VP_FCEIL: + case ISD::VP_FFLOOR: + case ISD::VP_FRINT: + case ISD::VP_FNEARBYINT: + case ISD::VP_FROUND: + case ISD::VP_FROUNDEVEN: + case ISD::VP_FROUNDTOZERO: case ISD::FREEZE: case ISD::ARITH_FENCE: case ISD::FCANONICALIZE: @@ -3869,7 +4129,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { break; case ISD::FMA: case ISD::VP_FMA: case ISD::FSHL: + case ISD::VP_FSHL: case ISD::FSHR: + case ISD::VP_FSHR: Res = WidenVecRes_Ternary(N); break; } @@ -4005,7 +4267,7 @@ static SDValue CollectOpsToWiden(SelectionDAG &DAG, const TargetLowering &TLI, ConcatOps[j] = UndefVal; } return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, - makeArrayRef(ConcatOps.data(), NumOps)); + ArrayRef(ConcatOps.data(), NumOps)); } SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { @@ -4480,8 +4742,11 @@ SDValue DAGTypeLegalizer::WidenVecRes_FCOPYSIGN(SDNode *N) { } SDValue DAGTypeLegalizer::WidenVecRes_IS_FPCLASS(SDNode *N) { + SDValue FpValue = N->getOperand(0); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Arg = GetWidenedVector(N->getOperand(0)); + if (getTypeAction(FpValue.getValueType()) != TargetLowering::TypeWidenVector) + return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()); + SDValue Arg = GetWidenedVector(FpValue); return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, {Arg, N->getOperand(1)}, N->getFlags()); } @@ -4585,33 +4850,43 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { unsigned WidenSize = WidenVT.getSizeInBits(); unsigned InSize = InVT.getSizeInBits(); + unsigned InScalarSize = InVT.getScalarSizeInBits(); // x86mmx is not an acceptable vector element type, so don't try. - if (WidenSize % InSize == 0 && InVT != MVT::x86mmx) { + if (WidenSize % InScalarSize == 0 && InVT != MVT::x86mmx) { // Determine new input vector type. The new input vector type will use // the same element type (if its a vector) or use the input type as a // vector. It is the same size as the type to widen to. EVT NewInVT; - unsigned NewNumElts = WidenSize / InSize; + unsigned NewNumParts = WidenSize / InSize; if (InVT.isVector()) { EVT InEltVT = InVT.getVectorElementType(); NewInVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenSize / InEltVT.getSizeInBits()); } else { - NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts); + NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumParts); } if (TLI.isTypeLegal(NewInVT)) { SDValue NewVec; if (InVT.isVector()) { // Because the result and the input are different vector types, widening - // the result could create a legal type but widening the input might make - // it an illegal type that might lead to repeatedly splitting the input - // and then widening it. To avoid this, we widen the input only if + // the result could create a legal type but widening the input might + // make it an illegal type that might lead to repeatedly splitting the + // input and then widening it. To avoid this, we widen the input only if // it results in a legal type. - SmallVector<SDValue, 16> Ops(NewNumElts, DAG.getUNDEF(InVT)); - Ops[0] = InOp; + if (WidenSize % InSize == 0) { + SmallVector<SDValue, 16> Ops(NewNumParts, DAG.getUNDEF(InVT)); + Ops[0] = InOp; - NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewInVT, Ops); + NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewInVT, Ops); + } else { + SmallVector<SDValue, 16> Ops; + DAG.ExtractVectorElements(InOp, Ops); + Ops.append(WidenSize / InScalarSize - Ops.size(), + DAG.getUNDEF(InVT.getVectorElementType())); + + NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, NewInVT, Ops); + } } else { NewVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewInVT, InOp); } @@ -4768,7 +5043,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { // nxv2i64 extract_subvector(nxv16i64, 8) // nxv2i64 extract_subvector(nxv16i64, 10) // undef) - unsigned GCD = greatestCommonDivisor(VTNumElts, WidenNumElts); + unsigned GCD = std::gcd(VTNumElts, WidenNumElts); assert((IdxVal % GCD) == 0 && "Expected Idx to be a multiple of the broken " "down type's element count"); EVT PartVT = EVT::getVectorVT(*DAG.getContext(), EltVT, @@ -4915,6 +5190,33 @@ SDValue DAGTypeLegalizer::WidenVecRes_VP_LOAD(VPLoadSDNode *N) { return Res; } +SDValue DAGTypeLegalizer::WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N) { + SDLoc DL(N); + + // The mask should be widened as well + SDValue Mask = N->getMask(); + assert(getTypeAction(Mask.getValueType()) == + TargetLowering::TypeWidenVector && + "Unable to widen VP strided load"); + Mask = GetWidenedVector(Mask); + + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + assert(Mask.getValueType().getVectorElementCount() == + WidenVT.getVectorElementCount() && + "Data and mask vectors should have the same number of elements"); + + SDValue Res = DAG.getStridedLoadVP( + N->getAddressingMode(), N->getExtensionType(), WidenVT, DL, N->getChain(), + N->getBasePtr(), N->getOffset(), N->getStride(), Mask, + N->getVectorLength(), N->getMemoryVT(), N->getMemOperand(), + N->isExpandingLoad()); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),N->getValueType(0)); @@ -5316,6 +5618,61 @@ SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) { return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, NewMask); } +SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_REVERSE(SDNode *N) { + EVT VT = N->getValueType(0); + EVT EltVT = VT.getVectorElementType(); + SDLoc dl(N); + + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue OpValue = GetWidenedVector(N->getOperand(0)); + assert(WidenVT == OpValue.getValueType() && "Unexpected widened vector type"); + + SDValue ReverseVal = DAG.getNode(ISD::VECTOR_REVERSE, dl, WidenVT, OpValue); + unsigned WidenNumElts = WidenVT.getVectorMinNumElements(); + unsigned VTNumElts = VT.getVectorMinNumElements(); + unsigned IdxVal = WidenNumElts - VTNumElts; + + if (VT.isScalableVector()) { + // Try to split the 'Widen ReverseVal' into smaller extracts and concat the + // results together, e.g.(nxv6i64 -> nxv8i64) + // nxv8i64 vector_reverse + // <-> + // nxv8i64 concat( + // nxv2i64 extract_subvector(nxv8i64, 2) + // nxv2i64 extract_subvector(nxv8i64, 4) + // nxv2i64 extract_subvector(nxv8i64, 6) + // nxv2i64 undef) + + unsigned GCD = std::gcd(VTNumElts, WidenNumElts); + EVT PartVT = EVT::getVectorVT(*DAG.getContext(), EltVT, + ElementCount::getScalable(GCD)); + assert((IdxVal % GCD) == 0 && "Expected Idx to be a multiple of the broken " + "down type's element count"); + SmallVector<SDValue> Parts; + unsigned i = 0; + for (; i < VTNumElts / GCD; ++i) + Parts.push_back( + DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, PartVT, ReverseVal, + DAG.getVectorIdxConstant(IdxVal + i * GCD, dl))); + for (; i < WidenNumElts / GCD; ++i) + Parts.push_back(DAG.getUNDEF(PartVT)); + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Parts); + } + + // Use VECTOR_SHUFFLE to combine new vector from 'ReverseVal' for + // fixed-vectors. + SmallVector<int, 16> Mask; + for (unsigned i = 0; i != VTNumElts; ++i) { + Mask.push_back(IdxVal + i); + } + for (unsigned i = VTNumElts; i != WidenNumElts; ++i) + Mask.push_back(-1); + + return DAG.getVectorShuffle(WidenVT, dl, ReverseVal, DAG.getUNDEF(WidenVT), + Mask); +} + SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) { assert(N->getValueType(0).isVector() && N->getOperand(0).getValueType().isVector() && @@ -5432,6 +5789,9 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::STORE: Res = WidenVecOp_STORE(N); break; case ISD::VP_STORE: Res = WidenVecOp_VP_STORE(N, OpNo); break; + case ISD::EXPERIMENTAL_VP_STRIDED_STORE: + Res = WidenVecOp_VP_STRIDED_STORE(N, OpNo); + break; case ISD::MSTORE: Res = WidenVecOp_MSTORE(N, OpNo); break; case ISD::MGATHER: Res = WidenVecOp_MGATHER(N, OpNo); break; case ISD::MSCATTER: Res = WidenVecOp_MSCATTER(N, OpNo); break; @@ -5910,6 +6270,38 @@ SDValue DAGTypeLegalizer::WidenVecOp_VP_STORE(SDNode *N, unsigned OpNo) { ST->isCompressingStore()); } +SDValue DAGTypeLegalizer::WidenVecOp_VP_STRIDED_STORE(SDNode *N, + unsigned OpNo) { + assert((OpNo == 1 || OpNo == 4) && + "Can widen only data or mask operand of vp_strided_store"); + VPStridedStoreSDNode *SST = cast<VPStridedStoreSDNode>(N); + SDValue Mask = SST->getMask(); + SDValue StVal = SST->getValue(); + SDLoc DL(N); + + if (OpNo == 1) + assert(getTypeAction(Mask.getValueType()) == + TargetLowering::TypeWidenVector && + "Unable to widen VP strided store"); + else + assert(getTypeAction(StVal.getValueType()) == + TargetLowering::TypeWidenVector && + "Unable to widen VP strided store"); + + StVal = GetWidenedVector(StVal); + Mask = GetWidenedVector(Mask); + + assert(StVal.getValueType().getVectorElementCount() == + Mask.getValueType().getVectorElementCount() && + "Data and mask vectors should have the same number of elements"); + + return DAG.getStridedStoreVP( + SST->getChain(), DL, StVal, SST->getBasePtr(), SST->getOffset(), + SST->getStride(), Mask, SST->getVectorLength(), SST->getMemoryVT(), + SST->getMemOperand(), SST->getAddressingMode(), SST->isTruncatingStore(), + SST->isCompressingStore()); +} + SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) { assert((OpNo == 1 || OpNo == 3) && "Can widen only data or mask operand of mstore"); @@ -6127,7 +6519,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) { unsigned WideElts = WideVT.getVectorMinNumElements(); if (WideVT.isScalableVector()) { - unsigned GCD = greatestCommonDivisor(OrigElts, WideElts); + unsigned GCD = std::gcd(OrigElts, WideElts); EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, ElementCount::getScalable(GCD)); SDValue SplatNeutral = DAG.getSplatVector(SplatVT, dl, NeutralElem); @@ -6164,7 +6556,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE_SEQ(SDNode *N) { unsigned WideElts = WideVT.getVectorMinNumElements(); if (WideVT.isScalableVector()) { - unsigned GCD = greatestCommonDivisor(OrigElts, WideElts); + unsigned GCD = std::gcd(OrigElts, WideElts); EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, ElementCount::getScalable(GCD)); SDValue SplatNeutral = DAG.getSplatVector(SplatVT, dl, NeutralElem); @@ -6223,12 +6615,13 @@ SDValue DAGTypeLegalizer::WidenVecOp_VSELECT(SDNode *N) { // Align: If 0, don't allow use of a wider type // WidenEx: If Align is not 0, the amount additional we can load/store from. -static Optional<EVT> findMemType(SelectionDAG &DAG, const TargetLowering &TLI, - unsigned Width, EVT WidenVT, - unsigned Align = 0, unsigned WidenEx = 0) { +static std::optional<EVT> findMemType(SelectionDAG &DAG, + const TargetLowering &TLI, unsigned Width, + EVT WidenVT, unsigned Align = 0, + unsigned WidenEx = 0) { EVT WidenEltVT = WidenVT.getVectorElementType(); const bool Scalable = WidenVT.isScalableVector(); - unsigned WidenWidth = WidenVT.getSizeInBits().getKnownMinSize(); + unsigned WidenWidth = WidenVT.getSizeInBits().getKnownMinValue(); unsigned WidenEltWidth = WidenEltVT.getSizeInBits(); unsigned AlignInBits = Align*8; @@ -6266,7 +6659,7 @@ static Optional<EVT> findMemType(SelectionDAG &DAG, const TargetLowering &TLI, // Skip vector MVTs which don't match the scalable property of WidenVT. if (Scalable != MemVT.isScalableVector()) continue; - unsigned MemVTWidth = MemVT.getSizeInBits().getKnownMinSize(); + unsigned MemVTWidth = MemVT.getSizeInBits().getKnownMinValue(); auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT); if ((Action == TargetLowering::TypeLegal || Action == TargetLowering::TypePromoteInteger) && @@ -6283,7 +6676,7 @@ static Optional<EVT> findMemType(SelectionDAG &DAG, const TargetLowering &TLI, // Using element-wise loads and stores for widening operations is not // supported for scalable vectors if (Scalable) - return None; + return std::nullopt; return RetVT; } @@ -6348,9 +6741,9 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, (!LD->isSimple() || LdVT.isScalableVector()) ? 0 : LD->getAlign().value(); // Find the vector type that can load from. - Optional<EVT> FirstVT = - findMemType(DAG, TLI, LdWidth.getKnownMinSize(), WidenVT, LdAlign, - WidthDiff.getKnownMinSize()); + std::optional<EVT> FirstVT = + findMemType(DAG, TLI, LdWidth.getKnownMinValue(), WidenVT, LdAlign, + WidthDiff.getKnownMinValue()); if (!FirstVT) return SDValue(); @@ -6361,15 +6754,15 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, // Unless we're able to load in one instruction we must work out how to load // the remainder. if (!TypeSize::isKnownLE(LdWidth, FirstVTWidth)) { - Optional<EVT> NewVT = FirstVT; + std::optional<EVT> NewVT = FirstVT; TypeSize RemainingWidth = LdWidth; TypeSize NewVTWidth = FirstVTWidth; do { RemainingWidth -= NewVTWidth; if (TypeSize::isKnownLT(RemainingWidth, NewVTWidth)) { // The current type we are using is too large. Find a better size. - NewVT = findMemType(DAG, TLI, RemainingWidth.getKnownMinSize(), WidenVT, - LdAlign, WidthDiff.getKnownMinSize()); + NewVT = findMemType(DAG, TLI, RemainingWidth.getKnownMinValue(), + WidenVT, LdAlign, WidthDiff.getKnownMinValue()); if (!NewVT) return SDValue(); NewVTWidth = NewVT->getSizeInBits(); @@ -6387,7 +6780,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth)); if (!FirstVT->isVector()) { unsigned NumElts = - WidenWidth.getFixedSize() / FirstVTWidth.getFixedSize(); + WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue(); EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), *FirstVT, NumElts); SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp); return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp); @@ -6396,9 +6789,9 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, return LdOp; // TODO: We don't currently have any tests that exercise this code path. - assert(WidenWidth.getFixedSize() % FirstVTWidth.getFixedSize() == 0); + assert(WidenWidth.getFixedValue() % FirstVTWidth.getFixedValue() == 0); unsigned NumConcat = - WidenWidth.getFixedSize() / FirstVTWidth.getFixedSize(); + WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue(); SmallVector<SDValue, 16> ConcatOps(NumConcat); SDValue UndefVal = DAG.getUNDEF(*FirstVT); ConcatOps[0] = LdOp; @@ -6461,9 +6854,9 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, TypeSize LdTySize = LdTy.getSizeInBits(); TypeSize NewLdTySize = NewLdTy.getSizeInBits(); assert(NewLdTySize.isScalable() == LdTySize.isScalable() && - NewLdTySize.isKnownMultipleOf(LdTySize.getKnownMinSize())); + NewLdTySize.isKnownMultipleOf(LdTySize.getKnownMinValue())); unsigned NumOps = - NewLdTySize.getKnownMinSize() / LdTySize.getKnownMinSize(); + NewLdTySize.getKnownMinValue() / LdTySize.getKnownMinValue(); SmallVector<SDValue, 16> WidenOps(NumOps); unsigned j = 0; for (; j != End-Idx; ++j) @@ -6481,11 +6874,11 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, if (WidenWidth == LdTy.getSizeInBits() * (End - Idx)) return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, - makeArrayRef(&ConcatOps[Idx], End - Idx)); + ArrayRef(&ConcatOps[Idx], End - Idx)); // We need to fill the rest with undefs to build the vector. unsigned NumOps = - WidenWidth.getKnownMinSize() / LdTy.getSizeInBits().getKnownMinSize(); + WidenWidth.getKnownMinValue() / LdTy.getSizeInBits().getKnownMinValue(); SmallVector<SDValue, 16> WidenOps(NumOps); SDValue UndefVal = DAG.getUNDEF(LdTy); { @@ -6584,8 +6977,8 @@ bool DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, while (StWidth.isNonZero()) { // Find the largest vector type we can store with. - Optional<EVT> NewVT = - findMemType(DAG, TLI, StWidth.getKnownMinSize(), ValVT); + std::optional<EVT> NewVT = + findMemType(DAG, TLI, StWidth.getKnownMinValue(), ValVT); if (!NewVT) return false; MemVTs.push_back({*NewVT, 0}); @@ -6620,11 +7013,11 @@ bool DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, } while (--Count); } else { // Cast the vector to the scalar type we can store. - unsigned NumElts = ValWidth.getFixedSize() / NewVTWidth.getFixedSize(); + unsigned NumElts = ValWidth.getFixedValue() / NewVTWidth.getFixedValue(); EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts); SDValue VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, ValOp); // Readjust index position based on new vector type. - Idx = Idx * ValEltWidth / NewVTWidth.getFixedSize(); + Idx = Idx * ValEltWidth / NewVTWidth.getFixedValue(); do { SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp, DAG.getVectorIdxConstant(Idx++, dl)); @@ -6636,7 +7029,7 @@ bool DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, IncrementPointer(cast<StoreSDNode>(PartStore), NewVT, MPI, BasePtr); } while (--Count); // Restore index back to be relative to the original widen element type. - Idx = Idx * NewVTWidth.getFixedSize() / ValEltWidth; + Idx = Idx * NewVTWidth.getFixedValue() / ValEltWidth; } } @@ -6685,7 +7078,7 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT, unsigned InNumElts = InEC.getFixedValue(); unsigned WidenNumElts = WidenEC.getFixedValue(); - // Fall back to extract and build. + // Fall back to extract and build (+ mask, if padding with zeros). SmallVector<SDValue, 16> Ops(WidenNumElts); EVT EltVT = NVT.getVectorElementType(); unsigned MinNumElts = std::min(WidenNumElts, InNumElts); @@ -6694,9 +7087,21 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT, Ops[Idx] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, DAG.getVectorIdxConstant(Idx, dl)); - SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) : - DAG.getUNDEF(EltVT); - for ( ; Idx < WidenNumElts; ++Idx) - Ops[Idx] = FillVal; - return DAG.getBuildVector(NVT, dl, Ops); + SDValue UndefVal = DAG.getUNDEF(EltVT); + for (; Idx < WidenNumElts; ++Idx) + Ops[Idx] = UndefVal; + + SDValue Widened = DAG.getBuildVector(NVT, dl, Ops); + if (!FillWithZeroes) + return Widened; + + assert(NVT.isInteger() && + "We expect to never want to FillWithZeroes for non-integral types."); + + SmallVector<SDValue, 16> MaskOps; + MaskOps.append(MinNumElts, DAG.getAllOnesConstant(dl, EltVT)); + MaskOps.append(WidenNumElts - MinNumElts, DAG.getConstant(0, dl, EltVT)); + + return DAG.getNode(ISD::AND, dl, NVT, Widened, + DAG.getBuildVector(NVT, dl, MaskOps)); } diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 3ac2a7bddc5a..2d93adea6b9b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -426,10 +426,11 @@ static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, NumRes = 1; } else { const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); - assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); + assert(!MCID.implicit_defs().empty() && + "Physical reg def must be in implicit def list!"); NumRes = MCID.getNumDefs(); - for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { - if (Reg == *ImpDef) + for (MCPhysReg ImpDef : MCID.implicit_defs()) { + if (Reg == ImpDef) break; ++NumRes; } @@ -526,11 +527,8 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, if (!Node->isMachineOpcode()) continue; const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); - if (!MCID.ImplicitDefs) - continue; - for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) { - CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); - } + for (MCPhysReg Reg : MCID.implicit_defs()) + CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); } return !LRegs.empty(); } @@ -777,8 +775,7 @@ void ScheduleDAGLinearize::Schedule() { MachineBasicBlock* ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) { - InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos, - DAG->getUseInstrRefDebugInfo()); + InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos); DenseMap<SDValue, Register> VRBaseMap; LLVM_DEBUG({ dbgs() << "\n*** Final schedule ***\n"; }); diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 8a04ce7535a1..c252046ef10b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -24,7 +24,7 @@ #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineOperand.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Register.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SchedulerRegistry.h" @@ -302,6 +302,8 @@ private: } // end anonymous namespace +static constexpr unsigned RegSequenceCost = 1; + /// GetCostForDef - Looks up the register class and cost for a given definition. /// Typically this just means looking up the representative register class, /// but for untyped values (MVT::Untyped) it means inspecting the node's @@ -321,7 +323,7 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, // Special handling for CopyFromReg of untyped values. if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) { - unsigned Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); + Register Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg); RegClass = RC->getID(); Cost = 1; @@ -333,13 +335,14 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); RegClass = RC->getID(); - Cost = 1; + Cost = RegSequenceCost; return; } unsigned Idx = RegDefPos.GetIdx(); - const MCInstrDesc Desc = TII->get(Opcode); + const MCInstrDesc &Desc = TII->get(Opcode); const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF); + assert(RC && "Not a valid register class"); RegClass = RC->getID(); // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a // better way to determine it. @@ -1089,7 +1092,7 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) { RemovePred(SU, Pred); AddPredQueued(NewSU, Pred); } - for (SDep D : NodeSuccs) { + for (SDep &D : NodeSuccs) { SUnit *SuccDep = D.getSUnit(); D.setSUnit(SU); RemovePred(SuccDep, D); @@ -1100,7 +1103,7 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) { !D.isCtrl() && NewSU->NumRegDefsLeft > 0) --NewSU->NumRegDefsLeft; } - for (SDep D : ChainSuccs) { + for (SDep &D : ChainSuccs) { SUnit *SuccDep = D.getSUnit(); D.setSUnit(SU); RemovePred(SuccDep, D); @@ -1204,11 +1207,11 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { D.setSUnit(NewSU); AddPredQueued(SuccSU, D); D.setSUnit(SU); - DelDeps.push_back(std::make_pair(SuccSU, D)); + DelDeps.emplace_back(SuccSU, D); } } - for (auto &DelDep : DelDeps) - RemovePred(DelDep.first, DelDep.second); + for (const auto &[DelSU, DelD] : DelDeps) + RemovePred(DelSU, DelD); AvailableQueue->updateNode(SU); AvailableQueue->addNode(NewSU); @@ -1242,17 +1245,17 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, SDep D = Succ; D.setSUnit(CopyToSU); AddPredQueued(SuccSU, D); - DelDeps.push_back(std::make_pair(SuccSU, Succ)); + DelDeps.emplace_back(SuccSU, Succ); } else { - // Avoid scheduling the def-side copy before other successors. Otherwise + // Avoid scheduling the def-side copy before other successors. Otherwise, // we could introduce another physreg interference on the copy and // continue inserting copies indefinitely. AddPredQueued(SuccSU, SDep(CopyFromSU, SDep::Artificial)); } } - for (auto &DelDep : DelDeps) - RemovePred(DelDep.first, DelDep.second); + for (const auto &[DelSU, DelD] : DelDeps) + RemovePred(DelSU, DelD); SDep FromDep(SU, SDep::Data, Reg); FromDep.setLatency(SU->Latency); @@ -1281,10 +1284,11 @@ static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, NumRes = 1; } else { const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); - assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); + assert(!MCID.implicit_defs().empty() && + "Physical reg def must be in implicit def list!"); NumRes = MCID.getNumDefs(); - for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { - if (Reg == *ImpDef) + for (MCPhysReg ImpDef : MCID.implicit_defs()) { + if (Reg == ImpDef) break; ++NumRes; } @@ -1381,8 +1385,8 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) { InlineAsm::isClobberKind(Flags)) { // Check for def of register or earlyclobber register. for (; NumVals; --NumVals, ++i) { - unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); - if (Register::isPhysicalRegister(Reg)) + Register Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); + if (Reg.isPhysical()) CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI); } } else @@ -1419,7 +1423,7 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) { } if (const uint32_t *RegMask = getNodeRegMask(Node)) CheckForLiveRegDefMasked(SU, RegMask, - makeArrayRef(LiveRegDefs.get(), TRI->getNumRegs()), + ArrayRef(LiveRegDefs.get(), TRI->getNumRegs()), RegAdded, LRegs); const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); @@ -1429,16 +1433,14 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) { // of %noreg. When the OptionalDef is set to a valid register, we need to // handle it in the same way as an ImplicitDef. for (unsigned i = 0; i < MCID.getNumDefs(); ++i) - if (MCID.OpInfo[i].isOptionalDef()) { + if (MCID.operands()[i].isOptionalDef()) { const SDValue &OptionalDef = Node->getOperand(i - Node->getNumValues()); - unsigned Reg = cast<RegisterSDNode>(OptionalDef)->getReg(); + Register Reg = cast<RegisterSDNode>(OptionalDef)->getReg(); CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI); } } - if (!MCID.ImplicitDefs) - continue; - for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) - CheckForLiveRegDef(SU, *Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI); + for (MCPhysReg Reg : MCID.implicit_defs()) + CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI); } return !LRegs.empty(); @@ -1484,16 +1486,15 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { if (LRegs[0] == TRI->getNumRegs()) dbgs() << "CallResource"; else dbgs() << printReg(LRegs[0], TRI); dbgs() << " SU #" << CurSU->NodeNum << '\n'); - std::pair<LRegsMapT::iterator, bool> LRegsPair = - LRegsMap.insert(std::make_pair(CurSU, LRegs)); - if (LRegsPair.second) { + auto [LRegsIter, LRegsInserted] = LRegsMap.try_emplace(CurSU, LRegs); + if (LRegsInserted) { CurSU->isPending = true; // This SU is not in AvailableQueue right now. Interferences.push_back(CurSU); } else { assert(CurSU->isPending && "Interferences are pending"); // Update the interference with current live regs. - LRegsPair.first->second = LRegs; + LRegsIter->second = LRegs; } CurSU = AvailableQueue->pop(); } @@ -2302,6 +2303,16 @@ void RegReductionPQBase::unscheduledNode(SUnit *SU) { RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); continue; } + if (POpc == TargetOpcode::REG_SEQUENCE) { + unsigned DstRCIdx = + cast<ConstantSDNode>(PN->getOperand(0))->getZExtValue(); + const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); + unsigned RCId = RC->getID(); + // REG_SEQUENCE is untyped, so getRepRegClassCostFor could not be used + // here. Instead use the same constant as in GetCostForDef. + RegPressure[RCId] += RegSequenceCost; + continue; + } unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); for (unsigned i = 0; i != NumDefs; ++i) { MVT VT = PN->getSimpleValueType(i); @@ -2376,9 +2387,9 @@ static bool hasOnlyLiveInOpers(const SUnit *SU) { const SUnit *PredSU = Pred.getSUnit(); if (PredSU->getNode() && PredSU->getNode()->getOpcode() == ISD::CopyFromReg) { - unsigned Reg = - cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg(); - if (Register::isVirtualRegister(Reg)) { + Register Reg = + cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg(); + if (Reg.isVirtual()) { RetVal = true; continue; } @@ -2397,9 +2408,9 @@ static bool hasOnlyLiveOutUses(const SUnit *SU) { if (Succ.isCtrl()) continue; const SUnit *SuccSU = Succ.getSUnit(); if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) { - unsigned Reg = - cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg(); - if (Register::isVirtualRegister(Reg)) { + Register Reg = + cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg(); + if (Reg.isVirtual()) { RetVal = true; continue; } @@ -2854,10 +2865,10 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, ScheduleDAGRRList *scheduleDAG, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) { - const MCPhysReg *ImpDefs - = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs(); + ArrayRef<MCPhysReg> ImpDefs = + TII->get(SU->getNode()->getMachineOpcode()).implicit_defs(); const uint32_t *RegMask = getNodeRegMask(SU->getNode()); - if(!ImpDefs && !RegMask) + if (ImpDefs.empty() && !RegMask) return false; for (const SDep &Succ : SU->Succs) { @@ -2871,14 +2882,14 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit())) return true; - if (ImpDefs) - for (const MCPhysReg *ImpDef = ImpDefs; *ImpDef; ++ImpDef) - // Return true if SU clobbers this physical register use and the - // definition of the register reaches from DepSU. IsReachable queries - // a topological forward sort of the DAG (following the successors). - if (TRI->regsOverlap(*ImpDef, SuccPred.getReg()) && - scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit())) - return true; + for (MCPhysReg ImpDef : ImpDefs) { + // Return true if SU clobbers this physical register use and the + // definition of the register reaches from DepSU. IsReachable queries + // a topological forward sort of the DAG (following the successors). + if (TRI->regsOverlap(ImpDef, SuccPred.getReg()) && + scheduleDAG->IsReachable(DepSU, SuccPred.getSUnit())) + return true; + } } } return false; @@ -2891,16 +2902,16 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, const TargetRegisterInfo *TRI) { SDNode *N = SuccSU->getNode(); unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); - const MCPhysReg *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); - assert(ImpDefs && "Caller should check hasPhysRegDefs"); + ArrayRef<MCPhysReg> ImpDefs = TII->get(N->getMachineOpcode()).implicit_defs(); + assert(!ImpDefs.empty() && "Caller should check hasPhysRegDefs"); for (const SDNode *SUNode = SU->getNode(); SUNode; SUNode = SUNode->getGluedNode()) { if (!SUNode->isMachineOpcode()) continue; - const MCPhysReg *SUImpDefs = - TII->get(SUNode->getMachineOpcode()).getImplicitDefs(); + ArrayRef<MCPhysReg> SUImpDefs = + TII->get(SUNode->getMachineOpcode()).implicit_defs(); const uint32_t *SURegMask = getNodeRegMask(SUNode); - if (!SUImpDefs && !SURegMask) + if (SUImpDefs.empty() && !SURegMask) continue; for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { MVT VT = N->getSimpleValueType(i); @@ -2908,13 +2919,10 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, continue; if (!N->hasAnyUseOfValue(i)) continue; - unsigned Reg = ImpDefs[i - NumDefs]; + MCPhysReg Reg = ImpDefs[i - NumDefs]; if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg)) return true; - if (!SUImpDefs) - continue; - for (;*SUImpDefs; ++SUImpDefs) { - unsigned SUReg = *SUImpDefs; + for (MCPhysReg SUReg : SUImpDefs) { if (TRI->regsOverlap(Reg, SUReg)) return true; } @@ -2968,8 +2976,7 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { // like other nodes from the perspective of scheduling heuristics. if (SDNode *N = SU.getNode()) if (N->getOpcode() == ISD::CopyToReg && - Register::isVirtualRegister( - cast<RegisterSDNode>(N->getOperand(1))->getReg())) + cast<RegisterSDNode>(N->getOperand(1))->getReg().isVirtual()) continue; SDNode *PredFrameSetup = nullptr; @@ -3015,8 +3022,7 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { // like other nodes from the perspective of scheduling heuristics. if (SDNode *N = SU.getNode()) if (N->getOpcode() == ISD::CopyFromReg && - Register::isVirtualRegister( - cast<RegisterSDNode>(N->getOperand(1))->getReg())) + cast<RegisterSDNode>(N->getOperand(1))->getReg().isVirtual()) continue; // Perform checks on the successors of PredSU. diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 5166db033c62..2e1fd1e8a758 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -110,11 +110,15 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) { static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, const TargetRegisterInfo *TRI, const TargetInstrInfo *TII, + const TargetLowering &TLI, unsigned &PhysReg, int &Cost) { if (Op != 2 || User->getOpcode() != ISD::CopyToReg) return; unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); + if (TLI.checkForPhysRegDependency(Def, User, Op, TRI, TII, PhysReg, Cost)) + return; + if (Register::isVirtualRegister(Reg)) return; @@ -188,7 +192,7 @@ static void RemoveUnusedGlue(SDNode *N, SelectionDAG *DAG) { "expected an unused glue value"); CloneNodeWithValues(N, DAG, - makeArrayRef(N->value_begin(), N->getNumValues() - 1)); + ArrayRef(N->value_begin(), N->getNumValues() - 1)); } /// ClusterNeighboringLoads - Force nearby loads together by "gluing" them. @@ -460,7 +464,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() { // Find all predecessors and successors of the group. for (SDNode *N = SU.getNode(); N; N = N->getGluedNode()) { if (N->isMachineOpcode() && - TII->get(N->getMachineOpcode()).getImplicitDefs()) { + !TII->get(N->getMachineOpcode()).implicit_defs().empty()) { SU.hasPhysRegClobbers = true; unsigned NumUsed = InstrEmitter::CountResults(N); while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1)) @@ -485,7 +489,8 @@ void ScheduleDAGSDNodes::AddSchedEdges() { unsigned PhysReg = 0; int Cost = 1; // Determine if this is a physical register dependency. - CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost); + const TargetLowering &TLI = DAG->getTargetLoweringInfo(); + CheckForPhysRegDependency(OpN, N, i, TRI, TII, TLI, PhysReg, Cost); assert((PhysReg == 0 || !isChain) && "Chain dependence via physreg data?"); // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler @@ -843,8 +848,7 @@ EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, Register> &VRBaseMap, /// not necessarily refer to returned BB. The emitter may split blocks. MachineBasicBlock *ScheduleDAGSDNodes:: EmitSchedule(MachineBasicBlock::iterator &InsertPos) { - InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos, - DAG->getUseInstrRefDebugInfo()); + InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos); DenseMap<SDValue, Register> VRBaseMap; DenseMap<SUnit*, Register> CopyVRBaseMap; SmallVector<std::pair<unsigned, MachineInstr*>, 32> Orders; @@ -890,6 +894,9 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { MI->setFlag(MachineInstr::MIFlag::NoMerge); } + if (MDNode *MD = DAG->getPCSections(Node)) + MI->setPCSections(MF, MD); + return MI; }; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 195c0e6a836f..9a3609bc183b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -18,7 +18,6 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/FoldingSet.h" -#include "llvm/ADT/None.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -27,6 +26,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/ISDOpcodes.h" @@ -45,6 +45,7 @@ #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/Constant.h" +#include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfoMetadata.h" @@ -92,6 +93,7 @@ void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {} void SelectionDAG::DAGUpdateListener::NodeInserted(SDNode *) {} void SelectionDAG::DAGNodeDeletedListener::anchor() {} +void SelectionDAG::DAGNodeInsertedListener::anchor() {} #define DEBUG_TYPE "selectiondag" @@ -291,6 +293,43 @@ bool ISD::isBuildVectorOfConstantFPSDNodes(const SDNode *N) { return true; } +bool ISD::isVectorShrinkable(const SDNode *N, unsigned NewEltSize, + bool Signed) { + assert(N->getValueType(0).isVector() && "Expected a vector!"); + + unsigned EltSize = N->getValueType(0).getScalarSizeInBits(); + if (EltSize <= NewEltSize) + return false; + + if (N->getOpcode() == ISD::ZERO_EXTEND) { + return (N->getOperand(0).getValueType().getScalarSizeInBits() <= + NewEltSize) && + !Signed; + } + if (N->getOpcode() == ISD::SIGN_EXTEND) { + return (N->getOperand(0).getValueType().getScalarSizeInBits() <= + NewEltSize) && + Signed; + } + if (N->getOpcode() != ISD::BUILD_VECTOR) + return false; + + for (const SDValue &Op : N->op_values()) { + if (Op.isUndef()) + continue; + if (!isa<ConstantSDNode>(Op)) + return false; + + APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().trunc(EltSize); + if (Signed && C.trunc(NewEltSize).sext(EltSize) != C) + return false; + if (!Signed && C.trunc(NewEltSize).zext(EltSize) != C) + return false; + } + + return true; +} + bool ISD::allOperandsUndef(const SDNode *N) { // Return false if the node has no operands. // This is "logically inconsistent" with the definition of "all" but @@ -300,6 +339,10 @@ bool ISD::allOperandsUndef(const SDNode *N) { return all_of(N->op_values(), [](SDValue Op) { return Op.isUndef(); }); } +bool ISD::isFreezeUndef(const SDNode *N) { + return N->getOpcode() == ISD::FREEZE && N->getOperand(0).isUndef(); +} + bool ISD::matchUnaryPredicate(SDValue Op, std::function<bool(ConstantSDNode *)> Match, bool AllowUndefs) { @@ -450,10 +493,10 @@ bool ISD::isVPReduction(unsigned Opcode) { } /// The operand position of the vector mask. -Optional<unsigned> ISD::getVPMaskIdx(unsigned Opcode) { +std::optional<unsigned> ISD::getVPMaskIdx(unsigned Opcode) { switch (Opcode) { default: - return None; + return std::nullopt; #define BEGIN_REGISTER_VP_SDNODE(VPSD, LEGALPOS, TDNAME, MASKPOS, ...) \ case ISD::VPSD: \ return MASKPOS; @@ -462,10 +505,10 @@ Optional<unsigned> ISD::getVPMaskIdx(unsigned Opcode) { } /// The operand position of the explicit vector length parameter. -Optional<unsigned> ISD::getVPExplicitVectorLengthIdx(unsigned Opcode) { +std::optional<unsigned> ISD::getVPExplicitVectorLengthIdx(unsigned Opcode) { switch (Opcode) { default: - return None; + return std::nullopt; #define BEGIN_REGISTER_VP_SDNODE(VPSD, LEGALPOS, TDNAME, MASKPOS, EVLPOS) \ case ISD::VPSD: \ return EVLPOS; @@ -618,7 +661,7 @@ static void AddNodeIDOperands(FoldingSetNodeID &ID, } } -static void AddNodeIDNode(FoldingSetNodeID &ID, unsigned short OpC, +static void AddNodeIDNode(FoldingSetNodeID &ID, unsigned OpC, SDVTList VTList, ArrayRef<SDValue> OpList) { AddNodeIDOpcode(ID, OpC); AddNodeIDValueTypes(ID, VTList); @@ -1018,6 +1061,9 @@ void SelectionDAG::DeallocateNode(SDNode *N) { // If any of the SDDbgValue nodes refer to this SDNode, invalidate // them and forget about that node. DbgInfo->erase(N); + + // Invalidate extra info. + SDEI.erase(N); } #ifndef NDEBUG @@ -1230,18 +1276,18 @@ Align SelectionDAG::getEVTAlign(EVT VT) const { // EntryNode could meaningfully have debug info if we can find it... SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) : TM(tm), OptLevel(OL), - EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)), + EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other, MVT::Glue)), Root(getEntryNode()) { InsertNode(&EntryNode); DbgInfo = new SDDbgInfo(); } void SelectionDAG::init(MachineFunction &NewMF, - OptimizationRemarkEmitter &NewORE, - Pass *PassPtr, const TargetLibraryInfo *LibraryInfo, - LegacyDivergenceAnalysis * Divergence, - ProfileSummaryInfo *PSIin, - BlockFrequencyInfo *BFIin) { + OptimizationRemarkEmitter &NewORE, Pass *PassPtr, + const TargetLibraryInfo *LibraryInfo, + LegacyDivergenceAnalysis *Divergence, + ProfileSummaryInfo *PSIin, BlockFrequencyInfo *BFIin, + FunctionVarLocs const *VarLocs) { MF = &NewMF; SDAGISelPass = PassPtr; ORE = &NewORE; @@ -1252,6 +1298,7 @@ void SelectionDAG::init(MachineFunction &NewMF, DA = Divergence; PSI = PSIin; BFI = BFIin; + FnVarLocs = VarLocs; } SelectionDAG::~SelectionDAG() { @@ -1326,7 +1373,7 @@ void SelectionDAG::clear() { ExternalSymbols.clear(); TargetExternalSymbols.clear(); MCSymbols.clear(); - SDCallSiteDbgInfo.clear(); + SDEI.clear(); std::fill(CondCodeNodes.begin(), CondCodeNodes.end(), static_cast<CondCodeSDNode*>(nullptr)); std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(), @@ -1341,7 +1388,8 @@ void SelectionDAG::clear() { SDValue SelectionDAG::getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT) { return VT.bitsGT(Op.getValueType()) ? getNode(ISD::FP_EXTEND, DL, VT, Op) - : getNode(ISD::FP_ROUND, DL, VT, Op, getIntPtrConstant(0, DL)); + : getNode(ISD::FP_ROUND, DL, VT, Op, + getIntPtrConstant(0, DL, /*isTarget=*/true)); } std::pair<SDValue, SDValue> @@ -1415,6 +1463,10 @@ SDValue SelectionDAG::getPtrExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) { return getZeroExtendInReg(Op, DL, VT); } +SDValue SelectionDAG::getNegative(SDValue Val, const SDLoc &DL, EVT VT) { + return getNode(ISD::SUB, DL, VT, getConstant(0, DL, VT), Val); +} + /// getNOT - Create a bitwise NOT operation as (XOR Val, -1). SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) { return getNode(ISD::XOR, DL, VT, Val, getAllOnesConstant(DL, VT)); @@ -1431,6 +1483,20 @@ SDValue SelectionDAG::getVPLogicalNOT(const SDLoc &DL, SDValue Val, return getNode(ISD::VP_XOR, DL, VT, Val, TrueValue, Mask, EVL); } +SDValue SelectionDAG::getVPPtrExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, + SDValue Mask, SDValue EVL) { + return getVPZExtOrTrunc(DL, VT, Op, Mask, EVL); +} + +SDValue SelectionDAG::getVPZExtOrTrunc(const SDLoc &DL, EVT VT, SDValue Op, + SDValue Mask, SDValue EVL) { + if (VT.bitsGT(Op.getValueType())) + return getNode(ISD::VP_ZERO_EXTEND, DL, VT, Op, Mask, EVL); + if (VT.bitsLT(Op.getValueType())) + return getNode(ISD::VP_TRUNCATE, DL, VT, Op, Mask, EVL); + return Op; +} + SDValue SelectionDAG::getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT) { if (!V) @@ -1544,7 +1610,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL, "APInt size does not match type size!"); unsigned Opc = isT ? ISD::TargetConstant : ISD::Constant; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(EltVT), None); + AddNodeIDNode(ID, Opc, getVTList(EltVT), std::nullopt); ID.AddPointer(Elt); ID.AddBoolean(isO); void *IP = nullptr; @@ -1561,11 +1627,8 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL, } SDValue Result(N, 0); - if (VT.isScalableVector()) - Result = getSplatVector(VT, DL, Result); - else if (VT.isVector()) - Result = getSplatBuildVector(VT, DL, Result); - + if (VT.isVector()) + Result = getSplat(VT, DL, Result); return Result; } @@ -1602,7 +1665,7 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP &V, const SDLoc &DL, // we don't have issues with SNANs. unsigned Opc = isTarget ? ISD::TargetConstantFP : ISD::ConstantFP; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(EltVT), None); + AddNodeIDNode(ID, Opc, getVTList(EltVT), std::nullopt); ID.AddPointer(&V); void *IP = nullptr; SDNode *N = nullptr; @@ -1617,10 +1680,8 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP &V, const SDLoc &DL, } SDValue Result(N, 0); - if (VT.isScalableVector()) - Result = getSplatVector(VT, DL, Result); - else if (VT.isVector()) - Result = getSplatBuildVector(VT, DL, Result); + if (VT.isVector()) + Result = getSplat(VT, DL, Result); NewSDValueDbgMsg(Result, "Creating fp constant: ", this); return Result; } @@ -1661,7 +1722,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, Opc = isTargetGA ? ISD::TargetGlobalAddress : ISD::GlobalAddress; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(VT), None); + AddNodeIDNode(ID, Opc, getVTList(VT), std::nullopt); ID.AddPointer(GV); ID.AddInteger(Offset); ID.AddInteger(TargetFlags); @@ -1679,7 +1740,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) { unsigned Opc = isTarget ? ISD::TargetFrameIndex : ISD::FrameIndex; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(VT), None); + AddNodeIDNode(ID, Opc, getVTList(VT), std::nullopt); ID.AddInteger(FI); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, IP)) @@ -1697,7 +1758,7 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget, "Cannot set target flags on target-independent jump tables"); unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(VT), None); + AddNodeIDNode(ID, Opc, getVTList(VT), std::nullopt); ID.AddInteger(JTI); ID.AddInteger(TargetFlags); void *IP = nullptr; @@ -1721,7 +1782,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, : getDataLayout().getPrefTypeAlign(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(VT), None); + AddNodeIDNode(ID, Opc, getVTList(VT), std::nullopt); ID.AddInteger(Alignment->value()); ID.AddInteger(Offset); ID.AddPointer(C); @@ -1748,7 +1809,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, Alignment = getDataLayout().getPrefTypeAlign(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(VT), None); + AddNodeIDNode(ID, Opc, getVTList(VT), std::nullopt); ID.AddInteger(Alignment->value()); ID.AddInteger(Offset); C->addSelectionDAGCSEId(ID); @@ -1767,7 +1828,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset, unsigned TargetFlags) { FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), None); + AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), std::nullopt); ID.AddInteger(Index); ID.AddInteger(Offset); ID.AddInteger(TargetFlags); @@ -1783,7 +1844,7 @@ SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset, SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) { FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), None); + AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), std::nullopt); ID.AddPointer(MBB); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, IP)) @@ -1894,7 +1955,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, "Index out of range"); // Copy the mask so we can do any needed cleanup. - SmallVector<int, 8> MaskVec(Mask.begin(), Mask.end()); + SmallVector<int, 8> MaskVec(Mask); // Canonicalize shuffle v, v -> v, undef if (N1 == N2) { @@ -2050,7 +2111,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) { EVT VT = SV.getValueType(0); - SmallVector<int, 8> MaskVec(SV.getMask().begin(), SV.getMask().end()); + SmallVector<int, 8> MaskVec(SV.getMask()); ShuffleVectorSDNode::commuteMask(MaskVec); SDValue Op0 = SV.getOperand(0); @@ -2060,7 +2121,7 @@ SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) { SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) { FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::Register, getVTList(VT), None); + AddNodeIDNode(ID, ISD::Register, getVTList(VT), std::nullopt); ID.AddInteger(RegNo); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, IP)) @@ -2075,7 +2136,7 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) { SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) { FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::RegisterMask, getVTList(MVT::Untyped), None); + AddNodeIDNode(ID, ISD::RegisterMask, getVTList(MVT::Untyped), std::nullopt); ID.AddPointer(RegMask); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, IP)) @@ -2117,7 +2178,7 @@ SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT, unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(VT), None); + AddNodeIDNode(ID, Opc, getVTList(VT), std::nullopt); ID.AddPointer(BA); ID.AddInteger(Offset); ID.AddInteger(TargetFlags); @@ -2133,7 +2194,7 @@ SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT, SDValue SelectionDAG::getSrcValue(const Value *V) { FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), None); + AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), std::nullopt); ID.AddPointer(V); void *IP = nullptr; @@ -2148,7 +2209,7 @@ SDValue SelectionDAG::getSrcValue(const Value *V) { SDValue SelectionDAG::getMDNode(const MDNode *MD) { FoldingSetNodeID ID; - AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), None); + AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), std::nullopt); ID.AddPointer(MD); void *IP = nullptr; @@ -2287,7 +2348,7 @@ SDValue SelectionDAG::CreateStackTemporary(TypeSize Bytes, Align Alignment) { StackID = TFI->getStackIDForScalableVectors(); // The stack id gives an indication of whether the object is scalable or // not, so it's safe to pass in the minimum size here. - int FrameIdx = MFI.CreateStackObject(Bytes.getKnownMinSize(), Alignment, + int FrameIdx = MFI.CreateStackObject(Bytes.getKnownMinValue(), Alignment, false, nullptr, StackID); return getFrameIndex(FrameIdx, TLI->getFrameIndexTy(getDataLayout())); } @@ -2305,8 +2366,9 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { assert(VT1Size.isScalable() == VT2Size.isScalable() && "Don't know how to choose the maximum size when creating a stack " "temporary"); - TypeSize Bytes = - VT1Size.getKnownMinSize() > VT2Size.getKnownMinSize() ? VT1Size : VT2Size; + TypeSize Bytes = VT1Size.getKnownMinValue() > VT2Size.getKnownMinValue() + ? VT1Size + : VT2Size; Type *Ty1 = VT1.getTypeForEVT(*getContext()); Type *Ty2 = VT2.getTypeForEVT(*getContext()); @@ -2380,34 +2442,34 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2, default: break; case ISD::SETEQ: if (R==APFloat::cmpUnordered) return getUNDEF(VT); - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::SETOEQ: return getBoolConstant(R==APFloat::cmpEqual, dl, VT, OpVT); case ISD::SETNE: if (R==APFloat::cmpUnordered) return getUNDEF(VT); - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::SETONE: return getBoolConstant(R==APFloat::cmpGreaterThan || R==APFloat::cmpLessThan, dl, VT, OpVT); case ISD::SETLT: if (R==APFloat::cmpUnordered) return getUNDEF(VT); - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::SETOLT: return getBoolConstant(R==APFloat::cmpLessThan, dl, VT, OpVT); case ISD::SETGT: if (R==APFloat::cmpUnordered) return getUNDEF(VT); - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::SETOGT: return getBoolConstant(R==APFloat::cmpGreaterThan, dl, VT, OpVT); case ISD::SETLE: if (R==APFloat::cmpUnordered) return getUNDEF(VT); - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::SETOLE: return getBoolConstant(R==APFloat::cmpLessThan || R==APFloat::cmpEqual, dl, VT, OpVT); case ISD::SETGE: if (R==APFloat::cmpUnordered) return getUNDEF(VT); - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::SETOGE: return getBoolConstant(R==APFloat::cmpGreaterThan || R==APFloat::cmpEqual, dl, VT, OpVT); case ISD::SETO: return getBoolConstant(R!=APFloat::cmpUnordered, dl, VT, @@ -2459,48 +2521,6 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2, return SDValue(); } -/// See if the specified operand can be simplified with the knowledge that only -/// the bits specified by DemandedBits are used. -/// TODO: really we should be making this into the DAG equivalent of -/// SimplifyMultipleUseDemandedBits and not generate any new nodes. -SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits) { - EVT VT = V.getValueType(); - - if (VT.isScalableVector()) - return SDValue(); - - switch (V.getOpcode()) { - default: - return TLI->SimplifyMultipleUseDemandedBits(V, DemandedBits, *this); - case ISD::Constant: { - const APInt &CVal = cast<ConstantSDNode>(V)->getAPIntValue(); - APInt NewVal = CVal & DemandedBits; - if (NewVal != CVal) - return getConstant(NewVal, SDLoc(V), V.getValueType()); - break; - } - case ISD::SRL: - // Only look at single-use SRLs. - if (!V.getNode()->hasOneUse()) - break; - if (auto *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) { - // See if we can recursively simplify the LHS. - unsigned Amt = RHSC->getZExtValue(); - - // Watch out for shift count overflow though. - if (Amt >= DemandedBits.getBitWidth()) - break; - APInt SrcDemandedBits = DemandedBits << Amt; - if (SDValue SimplifyLHS = TLI->SimplifyMultipleUseDemandedBits( - V.getOperand(0), SrcDemandedBits, *this)) - return getNode(ISD::SRL, SDLoc(V), V.getValueType(), SimplifyLHS, - V.getOperand(1)); - } - break; - } - return SDValue(); -} - /// SignBitIsZero - Return true if the sign bit of Op is known to be zero. We /// use this predicate to simplify operations downstream. bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const { @@ -2538,17 +2558,40 @@ bool SelectionDAG::MaskedValueIsAllOnes(SDValue V, const APInt &Mask, return Mask.isSubsetOf(computeKnownBits(V, Depth).One); } +APInt SelectionDAG::computeVectorKnownZeroElements(SDValue Op, + const APInt &DemandedElts, + unsigned Depth) const { + EVT VT = Op.getValueType(); + assert(VT.isVector() && !VT.isScalableVector() && "Only for fixed vectors!"); + + unsigned NumElts = VT.getVectorNumElements(); + assert(DemandedElts.getBitWidth() == NumElts && "Unexpected demanded mask."); + + APInt KnownZeroElements = APInt::getNullValue(NumElts); + for (unsigned EltIdx = 0; EltIdx != NumElts; ++EltIdx) { + if (!DemandedElts[EltIdx]) + continue; // Don't query elements that are not demanded. + APInt Mask = APInt::getOneBitSet(NumElts, EltIdx); + if (MaskedVectorIsZero(Op, Mask, Depth)) + KnownZeroElements.setBit(EltIdx); + } + return KnownZeroElements; +} + /// isSplatValue - Return true if the vector V has the same value -/// across all DemandedElts. For scalable vectors it does not make -/// sense to specify which elements are demanded or undefined, therefore -/// they are simply ignored. +/// across all DemandedElts. For scalable vectors, we don't know the +/// number of lanes at compile time. Instead, we use a 1 bit APInt +/// to represent a conservative value for all lanes; that is, that +/// one bit value is implicitly splatted across all lanes. bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth) const { unsigned Opcode = V.getOpcode(); EVT VT = V.getValueType(); assert(VT.isVector() && "Vector type expected"); + assert((!VT.isScalableVector() || DemandedElts.getBitWidth() == 1) && + "scalable demanded bits are ignored"); - if (!VT.isScalableVector() && !DemandedElts) + if (!DemandedElts) return false; // No demanded elts, better to assume we don't know anything. if (Depth >= MaxRecursionDepth) @@ -2585,7 +2628,8 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts, default: if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::INTRINSIC_WO_CHAIN || Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::INTRINSIC_VOID) - return TLI->isSplatValueForTargetNode(V, DemandedElts, UndefElts, Depth); + return TLI->isSplatValueForTargetNode(V, DemandedElts, UndefElts, *this, + Depth); break; } @@ -2730,11 +2774,11 @@ bool SelectionDAG::isSplatValue(SDValue V, bool AllowUndefs) const { assert(VT.isVector() && "Vector type expected"); APInt UndefElts; - APInt DemandedElts; - - // For now we don't support this with scalable vectors. - if (!VT.isScalableVector()) - DemandedElts = APInt::getAllOnes(VT.getVectorNumElements()); + // Since the number of lanes in a scalable vector is unknown at compile time, + // we track one bit which is implicitly broadcast to all lanes. This means + // that all lanes in a scalable vector are considered demanded. + APInt DemandedElts + = APInt::getAllOnes(VT.isScalableVector() ? 1 : VT.getVectorNumElements()); return isSplatValue(V, DemandedElts, UndefElts) && (AllowUndefs || !UndefElts); } @@ -2747,10 +2791,11 @@ SDValue SelectionDAG::getSplatSourceVector(SDValue V, int &SplatIdx) { switch (Opcode) { default: { APInt UndefElts; - APInt DemandedElts; - - if (!VT.isScalableVector()) - DemandedElts = APInt::getAllOnes(VT.getVectorNumElements()); + // Since the number of lanes in a scalable vector is unknown at compile time, + // we track one bit which is implicitly broadcast to all lanes. This means + // that all lanes in a scalable vector are considered demanded. + APInt DemandedElts + = APInt::getAllOnes(VT.isScalableVector() ? 1 : VT.getVectorNumElements()); if (isSplatValue(V, DemandedElts, UndefElts)) { if (VT.isScalableVector()) { @@ -2773,9 +2818,7 @@ SDValue SelectionDAG::getSplatSourceVector(SDValue V, int &SplatIdx) { SplatIdx = 0; return V; case ISD::VECTOR_SHUFFLE: { - if (VT.isScalableVector()) - return SDValue(); - + assert(!VT.isScalableVector()); // Check if this is a shuffle node doing a splat. // TODO - remove this and rely purely on SelectionDAG::isSplatValue, // getTargetVShiftNode currently struggles without the splat source. @@ -2890,14 +2933,10 @@ const APInt *SelectionDAG::getValidMaximumShiftAmountConstant( KnownBits SelectionDAG::computeKnownBits(SDValue Op, unsigned Depth) const { EVT VT = Op.getValueType(); - // TOOD: Until we have a plan for how to represent demanded elements for - // scalable vectors, we can just bail out for now. - if (Op.getValueType().isScalableVector()) { - unsigned BitWidth = Op.getScalarValueSizeInBits(); - return KnownBits(BitWidth); - } - - APInt DemandedElts = VT.isVector() + // Since the number of lanes in a scalable vector is unknown at compile time, + // we track one bit which is implicitly broadcast to all lanes. This means + // that all lanes in a scalable vector are considered demanded. + APInt DemandedElts = VT.isFixedLengthVector() ? APInt::getAllOnes(VT.getVectorNumElements()) : APInt(1, 1); return computeKnownBits(Op, DemandedElts, Depth); @@ -2912,11 +2951,6 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, KnownBits Known(BitWidth); // Don't know anything. - // TOOD: Until we have a plan for how to represent demanded elements for - // scalable vectors, we can just bail out for now. - if (Op.getValueType().isScalableVector()) - return Known; - if (auto *C = dyn_cast<ConstantSDNode>(Op)) { // We know all of the bits for a constant! return KnownBits::makeConstant(C->getAPIntValue()); @@ -2931,7 +2965,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, KnownBits Known2; unsigned NumElts = DemandedElts.getBitWidth(); - assert((!Op.getValueType().isVector() || + assert((!Op.getValueType().isFixedLengthVector() || NumElts == Op.getValueType().getVectorNumElements()) && "Unexpected vector size"); @@ -2943,7 +2977,17 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, case ISD::MERGE_VALUES: return computeKnownBits(Op.getOperand(Op.getResNo()), DemandedElts, Depth + 1); + case ISD::SPLAT_VECTOR: { + SDValue SrcOp = Op.getOperand(0); + assert(SrcOp.getValueSizeInBits() >= BitWidth && + "Expected SPLAT_VECTOR implicit truncation"); + // Implicitly truncate the bits to match the official semantics of + // SPLAT_VECTOR. + Known = computeKnownBits(SrcOp, Depth + 1).trunc(BitWidth); + break; + } case ISD::BUILD_VECTOR: + assert(!Op.getValueType().isScalableVector()); // Collect the known bits that are shared by every demanded vector element. Known.Zero.setAllBits(); Known.One.setAllBits(); for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { @@ -2969,32 +3013,18 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, } break; case ISD::VECTOR_SHUFFLE: { + assert(!Op.getValueType().isScalableVector()); // Collect the known bits that are shared by every vector element referenced // by the shuffle. - APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0); - Known.Zero.setAllBits(); Known.One.setAllBits(); + APInt DemandedLHS, DemandedRHS; const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op); assert(NumElts == SVN->getMask().size() && "Unexpected vector size"); - for (unsigned i = 0; i != NumElts; ++i) { - if (!DemandedElts[i]) - continue; - - int M = SVN->getMaskElt(i); - if (M < 0) { - // For UNDEF elements, we don't know anything about the common state of - // the shuffle result. - Known.resetAll(); - DemandedLHS.clearAllBits(); - DemandedRHS.clearAllBits(); - break; - } + if (!getShuffleDemandedElts(NumElts, SVN->getMask(), DemandedElts, + DemandedLHS, DemandedRHS)) + break; - if ((unsigned)M < NumElts) - DemandedLHS.setBit((unsigned)M % NumElts); - else - DemandedRHS.setBit((unsigned)M % NumElts); - } // Known bits are the values that are shared by every demanded element. + Known.Zero.setAllBits(); Known.One.setAllBits(); if (!!DemandedLHS) { SDValue LHS = Op.getOperand(0); Known2 = computeKnownBits(LHS, DemandedLHS, Depth + 1); @@ -3011,6 +3041,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, break; } case ISD::CONCAT_VECTORS: { + if (Op.getValueType().isScalableVector()) + break; // Split DemandedElts and test each of the demanded subvectors. Known.Zero.setAllBits(); Known.One.setAllBits(); EVT SubVectorVT = Op.getOperand(0).getValueType(); @@ -3031,6 +3063,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, break; } case ISD::INSERT_SUBVECTOR: { + if (Op.getValueType().isScalableVector()) + break; // Demand any elements from the subvector and the remainder from the src its // inserted into. SDValue Src = Op.getOperand(0); @@ -3058,7 +3092,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, // Offset the demanded elts by the subvector index. SDValue Src = Op.getOperand(0); // Bail until we can represent demanded elements for scalable vectors. - if (Src.getValueType().isScalableVector()) + if (Op.getValueType().isScalableVector() || Src.getValueType().isScalableVector()) break; uint64_t Idx = Op.getConstantOperandVal(1); unsigned NumSrcElts = Src.getValueType().getVectorNumElements(); @@ -3067,6 +3101,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, break; } case ISD::SCALAR_TO_VECTOR: { + if (Op.getValueType().isScalableVector()) + break; // We know about scalar_to_vector as much as we know about it source, // which becomes the first element of otherwise unknown vector. if (DemandedElts != 1) @@ -3080,6 +3116,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, break; } case ISD::BITCAST: { + if (Op.getValueType().isScalableVector()) + break; + SDValue N0 = Op.getOperand(0); EVT SubVT = N0.getValueType(); unsigned SubBitWidth = SubVT.getScalarSizeInBits(); @@ -3335,13 +3374,11 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, assert((Op.getResNo() == 0 || Op.getResNo() == 1) && "Unknown result"); // Collect lo/hi source values and concatenate. - // TODO: Would a KnownBits::concatBits helper be useful? unsigned LoBits = Op.getOperand(0).getScalarValueSizeInBits(); unsigned HiBits = Op.getOperand(1).getScalarValueSizeInBits(); Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); - Known = Known.anyext(LoBits + HiBits); - Known.insertBits(Known2, LoBits); + Known = Known2.concat(Known); // Collect shift amount. Known2 = computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1); @@ -3372,7 +3409,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); // If we have a known 1, its position is our upper bound. unsigned PossibleTZ = Known2.countMaxTrailingZeros(); - unsigned LowBits = Log2_32(PossibleTZ) + 1; + unsigned LowBits = llvm::bit_width(PossibleTZ); Known.Zero.setBitsFrom(LowBits); break; } @@ -3381,7 +3418,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); // If we have a known 1, its position is our upper bound. unsigned PossibleLZ = Known2.countMaxLeadingZeros(); - unsigned LowBits = Log2_32(PossibleLZ) + 1; + unsigned LowBits = llvm::bit_width(PossibleLZ); Known.Zero.setBitsFrom(LowBits); break; } @@ -3389,7 +3426,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); // If we know some of the bits are zero, they can't be one. unsigned PossibleOnes = Known2.countMaxPopulation(); - Known.Zero.setBitsFrom(Log2_32(PossibleOnes) + 1); + Known.Zero.setBitsFrom(llvm::bit_width(PossibleOnes)); break; } case ISD::PARITY: { @@ -3403,7 +3440,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, if (ISD::isNON_EXTLoad(LD) && Cst) { // Determine any common known bits from the loaded constant pool value. Type *CstTy = Cst->getType(); - if ((NumElts * BitWidth) == CstTy->getPrimitiveSizeInBits()) { + if ((NumElts * BitWidth) == CstTy->getPrimitiveSizeInBits() && + !Op.getValueType().isScalableVector()) { // If its a vector splat, then we can (quickly) reuse the scalar path. // NOTE: We assume all elements match and none are UNDEF. if (CstTy->isVectorTy()) { @@ -3453,12 +3491,32 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, unsigned MemBits = VT.getScalarSizeInBits(); Known.Zero.setBitsFrom(MemBits); } else if (const MDNode *Ranges = LD->getRanges()) { - if (LD->getExtensionType() == ISD::NON_EXTLOAD) - computeKnownBitsFromRangeMetadata(*Ranges, Known); + EVT VT = LD->getValueType(0); + + // TODO: Handle for extending loads + if (LD->getExtensionType() == ISD::NON_EXTLOAD) { + if (VT.isVector()) { + // Handle truncation to the first demanded element. + // TODO: Figure out which demanded elements are covered + if (DemandedElts != 1 || !getDataLayout().isLittleEndian()) + break; + + // Handle the case where a load has a vector type, but scalar memory + // with an attached range. + EVT MemVT = LD->getMemoryVT(); + KnownBits KnownFull(MemVT.getSizeInBits()); + + computeKnownBitsFromRangeMetadata(*Ranges, KnownFull); + Known = KnownFull.trunc(BitWidth); + } else + computeKnownBitsFromRangeMetadata(*Ranges, Known); + } } break; } case ISD::ZERO_EXTEND_VECTOR_INREG: { + if (Op.getValueType().isScalableVector()) + break; EVT InVT = Op.getOperand(0).getValueType(); APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements()); Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1); @@ -3471,6 +3529,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, break; } case ISD::SIGN_EXTEND_VECTOR_INREG: { + if (Op.getValueType().isScalableVector()) + break; EVT InVT = Op.getOperand(0).getValueType(); APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements()); Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1); @@ -3487,6 +3547,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, break; } case ISD::ANY_EXTEND_VECTOR_INREG: { + if (Op.getValueType().isScalableVector()) + break; EVT InVT = Op.getOperand(0).getValueType(); APInt InDemandedElts = DemandedElts.zext(InVT.getVectorNumElements()); Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1); @@ -3506,7 +3568,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, case ISD::AssertZext: { EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits()); - Known = computeKnownBits(Op.getOperand(0), Depth+1); + Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known.Zero |= (~InMask); Known.One &= (~Known.Zero); break; @@ -3538,7 +3600,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known.Zero.setBitsFrom(1); break; } - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::SUB: case ISD::SUBC: { assert(Op.getResNo() == 0 && @@ -3566,7 +3628,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known.Zero.setBitsFrom(1); break; } - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::ADD: case ISD::ADDC: case ISD::ADDE: { @@ -3652,6 +3714,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, break; } case ISD::INSERT_VECTOR_ELT: { + if (Op.getValueType().isScalableVector()) + break; + // If we know the element index, split the demand between the // source vector and the inserted element, otherwise assume we need // the original demanded vector elements and the value. @@ -3781,7 +3846,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known.Zero.setBitsFrom(1); break; } - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::ATOMIC_CMP_SWAP: case ISD::ATOMIC_SWAP: case ISD::ATOMIC_LOAD_ADD: @@ -3814,10 +3879,15 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, default: if (Opcode < ISD::BUILTIN_OP_END) break; - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::INTRINSIC_WO_CHAIN: case ISD::INTRINSIC_W_CHAIN: case ISD::INTRINSIC_VOID: + // TODO: Probably okay to remove after audit; here to reduce change size + // in initial enablement patch for scalable vectors + if (Op.getValueType().isScalableVector()) + break; + // Allow the target to implement this method for its nodes. TLI->computeKnownBitsForTargetNode(Op, Known, DemandedElts, *this, Depth); break; @@ -3914,11 +3984,10 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const { unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const { EVT VT = Op.getValueType(); - // TODO: Assume we don't know anything for now. - if (VT.isScalableVector()) - return 1; - - APInt DemandedElts = VT.isVector() + // Since the number of lanes in a scalable vector is unknown at compile time, + // we track one bit which is implicitly broadcast to all lanes. This means + // that all lanes in a scalable vector are considered demanded. + APInt DemandedElts = VT.isFixedLengthVector() ? APInt::getAllOnes(VT.getVectorNumElements()) : APInt(1, 1); return ComputeNumSignBits(Op, DemandedElts, Depth); @@ -3941,7 +4010,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, if (Depth >= MaxRecursionDepth) return 1; // Limit search depth. - if (!DemandedElts || VT.isScalableVector()) + if (!DemandedElts) return 1; // No demanded elts, better to assume we don't know anything. unsigned Opcode = Op.getOpcode(); @@ -3956,7 +4025,16 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, case ISD::MERGE_VALUES: return ComputeNumSignBits(Op.getOperand(Op.getResNo()), DemandedElts, Depth + 1); + case ISD::SPLAT_VECTOR: { + // Check if the sign bits of source go down as far as the truncated value. + unsigned NumSrcBits = Op.getOperand(0).getValueSizeInBits(); + unsigned NumSrcSignBits = ComputeNumSignBits(Op.getOperand(0), Depth + 1); + if (NumSrcSignBits > (NumSrcBits - VTBits)) + return NumSrcSignBits - (NumSrcBits - VTBits); + break; + } case ISD::BUILD_VECTOR: + assert(!VT.isScalableVector()); Tmp = VTBits; for (unsigned i = 0, e = Op.getNumOperands(); (i < e) && (Tmp > 1); ++i) { if (!DemandedElts[i]) @@ -3979,22 +4057,13 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, case ISD::VECTOR_SHUFFLE: { // Collect the minimum number of sign bits that are shared by every vector // element referenced by the shuffle. - APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0); + APInt DemandedLHS, DemandedRHS; const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op); assert(NumElts == SVN->getMask().size() && "Unexpected vector size"); - for (unsigned i = 0; i != NumElts; ++i) { - int M = SVN->getMaskElt(i); - if (!DemandedElts[i]) - continue; - // For UNDEF elements, we don't know anything about the common state of - // the shuffle result. - if (M < 0) - return 1; - if ((unsigned)M < NumElts) - DemandedLHS.setBit((unsigned)M % NumElts); - else - DemandedRHS.setBit((unsigned)M % NumElts); - } + if (!getShuffleDemandedElts(NumElts, SVN->getMask(), DemandedElts, + DemandedLHS, DemandedRHS)) + return 1; + Tmp = std::numeric_limits<unsigned>::max(); if (!!DemandedLHS) Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedLHS, Depth + 1); @@ -4010,6 +4079,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, } case ISD::BITCAST: { + if (VT.isScalableVector()) + break; SDValue N0 = Op.getOperand(0); EVT SrcVT = N0.getValueType(); unsigned SrcBits = SrcVT.getScalarSizeInBits(); @@ -4067,6 +4138,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, Tmp2 = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1); return std::max(Tmp, Tmp2); case ISD::SIGN_EXTEND_VECTOR_INREG: { + if (VT.isScalableVector()) + break; SDValue Src = Op.getOperand(0); EVT SrcVT = Src.getValueType(); APInt DemandedSrcElts = DemandedElts.zext(SrcVT.getVectorNumElements()); @@ -4284,6 +4357,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, break; } case ISD::EXTRACT_ELEMENT: { + if (VT.isScalableVector()) + break; const int KnownSign = ComputeNumSignBits(Op.getOperand(0), Depth+1); const int BitWidth = Op.getValueSizeInBits(); const int Items = Op.getOperand(0).getValueSizeInBits() / BitWidth; @@ -4294,9 +4369,11 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, // If the sign portion ends in our element the subtraction gives correct // result. Otherwise it gives either negative or > bitwidth result - return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0); + return std::clamp(KnownSign - rIndex * BitWidth, 0, BitWidth); } case ISD::INSERT_VECTOR_ELT: { + if (VT.isScalableVector()) + break; // If we know the element index, split the demand between the // source vector and the inserted element, otherwise assume we need // the original demanded vector elements and the value. @@ -4327,6 +4404,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return Tmp; } case ISD::EXTRACT_VECTOR_ELT: { + assert(!VT.isScalableVector()); SDValue InVec = Op.getOperand(0); SDValue EltNo = Op.getOperand(1); EVT VecVT = InVec.getValueType(); @@ -4365,6 +4443,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return ComputeNumSignBits(Src, DemandedSrcElts, Depth + 1); } case ISD::CONCAT_VECTORS: { + if (VT.isScalableVector()) + break; // Determine the minimum number of sign bits across all demanded // elts of the input vectors. Early out if the result is already 1. Tmp = std::numeric_limits<unsigned>::max(); @@ -4383,6 +4463,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, return Tmp; } case ISD::INSERT_SUBVECTOR: { + if (VT.isScalableVector()) + break; // Demand any elements from the subvector and the remainder from the src its // inserted into. SDValue Src = Op.getOperand(0); @@ -4406,6 +4488,34 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, assert(Tmp <= VTBits && "Failed to determine minimum sign bits"); return Tmp; } + case ISD::LOAD: { + LoadSDNode *LD = cast<LoadSDNode>(Op); + if (const MDNode *Ranges = LD->getRanges()) { + if (DemandedElts != 1) + break; + + ConstantRange CR = getConstantRangeFromMetadata(*Ranges); + if (VTBits > CR.getBitWidth()) { + switch (LD->getExtensionType()) { + case ISD::SEXTLOAD: + CR = CR.signExtend(VTBits); + break; + case ISD::ZEXTLOAD: + CR = CR.zeroExtend(VTBits); + break; + default: + break; + } + } + + if (VTBits != CR.getBitWidth()) + break; + return std::min(CR.getSignedMin().getNumSignBits(), + CR.getSignedMax().getNumSignBits()); + } + + break; + } case ISD::ATOMIC_CMP_SWAP: case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: case ISD::ATOMIC_SWAP: @@ -4453,7 +4563,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, // We only need to handle vectors - computeKnownBits should handle // scalar cases. Type *CstTy = Cst->getType(); - if (CstTy->isVectorTy() && + if (CstTy->isVectorTy() && !VT.isScalableVector() && (NumElts * VTBits) == CstTy->getPrimitiveSizeInBits() && VTBits == CstTy->getScalarSizeInBits()) { Tmp = VTBits; @@ -4488,10 +4598,14 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts, Opcode == ISD::INTRINSIC_WO_CHAIN || Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::INTRINSIC_VOID) { - unsigned NumBits = + // TODO: This can probably be removed once target code is audited. This + // is here purely to reduce patch size and review complexity. + if (!VT.isScalableVector()) { + unsigned NumBits = TLI->ComputeNumSignBitsForTargetNode(Op, DemandedElts, *this, Depth); - if (NumBits > 1) - FirstAnswer = std::max(FirstAnswer, NumBits); + if (NumBits > 1) + FirstAnswer = std::max(FirstAnswer, NumBits); + } } // Finally, if we can prove that the top bits of the result are 0's or 1's, @@ -4547,6 +4661,11 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, return true; switch (Opcode) { + case ISD::VALUETYPE: + case ISD::FrameIndex: + case ISD::TargetFrameIndex: + return true; + case ISD::UNDEF: return PoisonOnly; @@ -4562,9 +4681,9 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, } return true; - // TODO: Search for noundef attributes from library functions. + // TODO: Search for noundef attributes from library functions. - // TODO: Pointers dereferenced by ISD::LOAD/STORE ops are noundef. + // TODO: Pointers dereferenced by ISD::LOAD/STORE ops are noundef. default: // Allow the target to implement this method for its nodes. @@ -4575,7 +4694,94 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op, break; } - return false; + // If Op can't create undef/poison and none of its operands are undef/poison + // then Op is never undef/poison. + // NOTE: TargetNodes should handle this in themselves in + // isGuaranteedNotToBeUndefOrPoisonForTargetNode. + return !canCreateUndefOrPoison(Op, PoisonOnly, /*ConsiderFlags*/ true, + Depth) && + all_of(Op->ops(), [&](SDValue V) { + return isGuaranteedNotToBeUndefOrPoison(V, PoisonOnly, Depth + 1); + }); +} + +bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, bool PoisonOnly, + bool ConsiderFlags, + unsigned Depth) const { + // TODO: Assume we don't know anything for now. + EVT VT = Op.getValueType(); + if (VT.isScalableVector()) + return true; + + APInt DemandedElts = VT.isVector() + ? APInt::getAllOnes(VT.getVectorNumElements()) + : APInt(1, 1); + return canCreateUndefOrPoison(Op, DemandedElts, PoisonOnly, ConsiderFlags, + Depth); +} + +bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, + bool PoisonOnly, bool ConsiderFlags, + unsigned Depth) const { + // TODO: Assume we don't know anything for now. + EVT VT = Op.getValueType(); + if (VT.isScalableVector()) + return true; + + unsigned Opcode = Op.getOpcode(); + switch (Opcode) { + case ISD::AssertSext: + case ISD::AssertZext: + case ISD::FREEZE: + case ISD::INSERT_SUBVECTOR: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::ROTL: + case ISD::ROTR: + case ISD::FSHL: + case ISD::FSHR: + case ISD::BSWAP: + case ISD::CTPOP: + case ISD::BITREVERSE: + case ISD::PARITY: + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::TRUNCATE: + case ISD::SIGN_EXTEND_INREG: + case ISD::SIGN_EXTEND_VECTOR_INREG: + case ISD::ZERO_EXTEND_VECTOR_INREG: + case ISD::BITCAST: + case ISD::BUILD_VECTOR: + return false; + + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: + // Matches hasPoisonGeneratingFlags(). + return ConsiderFlags && (Op->getFlags().hasNoSignedWrap() || + Op->getFlags().hasNoUnsignedWrap()); + + case ISD::SHL: + // If the max shift amount isn't in range, then the shift can create poison. + if (!getValidMaximumShiftAmountConstant(Op, DemandedElts)) + return true; + + // Matches hasPoisonGeneratingFlags(). + return ConsiderFlags && (Op->getFlags().hasNoSignedWrap() || + Op->getFlags().hasNoUnsignedWrap()); + + default: + // Allow the target to implement this method for its nodes. + if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::INTRINSIC_WO_CHAIN || + Opcode == ISD::INTRINSIC_W_CHAIN || Opcode == ISD::INTRINSIC_VOID) + return TLI->canCreateUndefOrPoisonForTargetNode( + Op, DemandedElts, *this, PoisonOnly, ConsiderFlags, Depth); + break; + } + + // Be conservative and return true. + return true; } bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const { @@ -4598,7 +4804,6 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const if (Depth >= MaxRecursionDepth) return false; // Limit search depth. - // TODO: Handle vectors. // If the value is a constant, we can obviously see if it is a NaN or not. if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op)) { return !C->getValueAPF().isNaN() || @@ -4613,7 +4818,9 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const case ISD::FDIV: case ISD::FREM: case ISD::FSIN: - case ISD::FCOS: { + case ISD::FCOS: + case ISD::FMA: + case ISD::FMAD: { if (SNaN) return true; // TODO: Need isKnownNeverInfinity @@ -4650,14 +4857,6 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: return true; - case ISD::FMA: - case ISD::FMAD: { - if (SNaN) - return true; - return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) && - isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) && - isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1); - } case ISD::FSQRT: // Need is known positive case ISD::FLOG: case ISD::FLOG2: @@ -4696,6 +4895,12 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const case ISD::EXTRACT_VECTOR_ELT: { return isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1); } + case ISD::BUILD_VECTOR: { + for (const SDValue &Opnd : Op->ops()) + if (!isKnownNeverNaN(Opnd, SNaN, Depth + 1)) + return false; + return true; + } default: if (Opcode >= ISD::BUILTIN_OP_END || Opcode == ISD::INTRINSIC_WO_CHAIN || @@ -4938,7 +5143,7 @@ static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT, /// Gets or creates the specified node. SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT) { FoldingSetNodeID ID; - AddNodeIDNode(ID, Opcode, getVTList(VT), None); + AddNodeIDNode(ID, Opcode, getVTList(VT), std::nullopt); void *IP = nullptr; if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) return SDValue(E, 0); @@ -4980,7 +5185,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, case ISD::TRUNCATE: if (C->isOpaque()) break; - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::ZERO_EXTEND: return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT, C->isTargetOpcode(), C->isOpaque()); @@ -5166,7 +5371,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, break; case ISD::FREEZE: assert(VT == Operand.getValueType() && "Unexpected VT!"); - if (isGuaranteedNotToBeUndefOrPoison(Operand)) + if (isGuaranteedNotToBeUndefOrPoison(Operand, /*PoisonOnly*/ false, + /*Depth*/ 1)) return Operand; break; case ISD::TokenFactor: @@ -5428,8 +5634,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, return V; } -static llvm::Optional<APInt> FoldValue(unsigned Opcode, const APInt &C1, - const APInt &C2) { +static std::optional<APInt> FoldValue(unsigned Opcode, const APInt &C1, + const APInt &C2) { switch (Opcode) { case ISD::ADD: return C1 + C2; case ISD::SUB: return C1 - C2; @@ -5505,7 +5711,23 @@ static llvm::Optional<APInt> FoldValue(unsigned Opcode, const APInt &C1, return (C1Ext + C2Ext + 1).extractBits(C1.getBitWidth(), 1); } } - return llvm::None; + return std::nullopt; +} + +// Handle constant folding with UNDEF. +// TODO: Handle more cases. +static std::optional<APInt> FoldValueWithUndef(unsigned Opcode, const APInt &C1, + bool IsUndef1, const APInt &C2, + bool IsUndef2) { + if (!(IsUndef1 || IsUndef2)) + return FoldValue(Opcode, C1, C2); + + // Fold and(x, undef) -> 0 + // Fold mul(x, undef) -> 0 + if (Opcode == ISD::AND || Opcode == ISD::MUL) + return APInt::getZero(C1.getBitWidth()); + + return std::nullopt; } SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT, @@ -5581,7 +5803,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, if (C1->isOpaque() || C2->isOpaque()) return SDValue(); - Optional<APInt> FoldAttempt = + std::optional<APInt> FoldAttempt = FoldValue(Opcode, C1->getAPIntValue(), C2->getAPIntValue()); if (!FoldAttempt) return SDValue(); @@ -5608,7 +5830,6 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, ElementCount NumElts = VT.getVectorElementCount(); // See if we can fold through bitcasted integer ops. - // TODO: Can we handle undef elements? if (NumOps == 2 && VT.isFixedLengthVector() && VT.isInteger() && Ops[0].getValueType() == VT && Ops[1].getValueType() == VT && Ops[0].getOpcode() == ISD::BITCAST && @@ -5624,11 +5845,11 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, SmallVector<APInt> RawBits1, RawBits2; BitVector UndefElts1, UndefElts2; if (BV1->getConstantRawBits(IsLE, EltBits, RawBits1, UndefElts1) && - BV2->getConstantRawBits(IsLE, EltBits, RawBits2, UndefElts2) && - UndefElts1.none() && UndefElts2.none()) { + BV2->getConstantRawBits(IsLE, EltBits, RawBits2, UndefElts2)) { SmallVector<APInt> RawBits; for (unsigned I = 0, E = NumElts.getFixedValue(); I != E; ++I) { - Optional<APInt> Fold = FoldValue(Opcode, RawBits1[I], RawBits2[I]); + std::optional<APInt> Fold = FoldValueWithUndef( + Opcode, RawBits1[I], UndefElts1[I], RawBits2[I], UndefElts2[I]); if (!Fold) break; RawBits.push_back(*Fold); @@ -5823,7 +6044,7 @@ SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL, if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, /*AllowUndefs*/ true)) if (N1C && N1C->getValueAPF().isNegZero() && N2.isUndef()) return getUNDEF(VT); - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::FADD: case ISD::FMUL: @@ -5882,11 +6103,11 @@ void SelectionDAG::canonicalizeCommutativeBinop(unsigned Opcode, SDValue &N1, // Canonicalize: // binop(const, nonconst) -> binop(nonconst, const) - bool IsN1C = isConstantIntBuildVectorOrConstantInt(N1); - bool IsN2C = isConstantIntBuildVectorOrConstantInt(N2); - bool IsN1CFP = isConstantFPBuildVectorOrConstantFP(N1); - bool IsN2CFP = isConstantFPBuildVectorOrConstantFP(N2); - if ((IsN1C && !IsN2C) || (IsN1CFP && !IsN2CFP)) + SDNode *N1C = isConstantIntBuildVectorOrConstantInt(N1); + SDNode *N2C = isConstantIntBuildVectorOrConstantInt(N2); + SDNode *N1CFP = isConstantFPBuildVectorOrConstantFP(N1); + SDNode *N2CFP = isConstantFPBuildVectorOrConstantFP(N2); + if ((N1C && !N2C) || (N1CFP && !N2CFP)) std::swap(N1, N2); // Canonicalize: @@ -5995,6 +6216,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, return getNode(ISD::AND, DL, VT, N1, getNOT(DL, N2, VT)); } break; + case ISD::ABDS: + case ISD::ABDU: + assert(VT.isInteger() && "This operator does not apply to FP types!"); + assert(N1.getValueType() == N2.getValueType() && + N1.getValueType() == VT && "Binary operator types must match!"); + break; case ISD::SMIN: case ISD::UMAX: assert(VT.isInteger() && "This operator does not apply to FP types!"); @@ -6034,12 +6261,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, const APInt &ShiftImm = N2C->getAPIntValue(); return getVScale(DL, VT, MulImm << ShiftImm); } - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::SRA: case ISD::SRL: if (SDValue V = simplifyShift(N1, N2)) return V; - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::ROTL: case ISD::ROTR: assert(VT == N1.getValueType() && @@ -6329,7 +6556,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, // Handle undef ^ undef -> 0 special case. This is a common // idiom (misuse). return getConstant(0, DL, VT); - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::ADD: case ISD::SUB: case ISD::UDIV: @@ -6484,6 +6711,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, "Dest and insert subvector source types must match!"); assert(VT.isVector() && N2VT.isVector() && "Insert subvector VTs must be vectors!"); + assert(VT.getVectorElementType() == N2VT.getVectorElementType() && + "Insert subvector VTs must have the same element type!"); assert((VT.isScalableVector() || N2VT.isFixedLengthVector()) && "Cannot insert a scalable vector into a fixed length vector!"); assert((VT.isScalableVector() != N2VT.isScalableVector() || @@ -6674,10 +6903,10 @@ SDValue SelectionDAG::getMemBasePlusOffset(SDValue Base, TypeSize Offset, if (Offset.isScalable()) Index = getVScale(DL, Base.getValueType(), - APInt(Base.getValueSizeInBits().getFixedSize(), - Offset.getKnownMinSize())); + APInt(Base.getValueSizeInBits().getFixedValue(), + Offset.getKnownMinValue())); else - Index = getConstant(Offset.getFixedSize(), DL, VT); + Index = getConstant(Offset.getFixedValue(), DL, VT); return getMemBasePlusOffset(Base, Index, DL, Flags); } @@ -6794,7 +7023,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, Align NewAlign = DL.getABITypeAlign(Ty); // Don't promote to an alignment that would require dynamic stack - // realignment. + // realignment which may conflict with optimizations such as tail call + // optimization. const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); if (!TRI->hasStackRealignment(MF)) while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign)) @@ -6986,6 +7216,15 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl, if (DstAlignCanChange) { Type *Ty = MemOps[0].getTypeForEVT(C); Align NewAlign = DL.getABITypeAlign(Ty); + + // Don't promote to an alignment that would require dynamic stack + // realignment which may conflict with optimizations such as tail call + // optimization. + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + if (!TRI->hasStackRealignment(MF)) + while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign)) + NewAlign = NewAlign.previous(); + if (NewAlign > Alignment) { // Give the stack frame object a larger alignment if needed. if (MFI.getObjectAlign(FI->getIndex()) < NewAlign) @@ -7094,7 +7333,17 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl, if (DstAlignCanChange) { Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); - Align NewAlign = DAG.getDataLayout().getABITypeAlign(Ty); + const DataLayout &DL = DAG.getDataLayout(); + Align NewAlign = DL.getABITypeAlign(Ty); + + // Don't promote to an alignment that would require dynamic stack + // realignment which may conflict with optimizations such as tail call + // optimization. + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + if (!TRI->hasStackRealignment(MF)) + while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign)) + NewAlign = NewAlign.previous(); + if (NewAlign > Alignment) { // Give the stack frame object a larger alignment if needed. if (MFI.getObjectAlign(FI->getIndex()) < NewAlign) @@ -7562,6 +7811,8 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, Opcode == ISD::ATOMIC_LOAD_FSUB || Opcode == ISD::ATOMIC_LOAD_FMAX || Opcode == ISD::ATOMIC_LOAD_FMIN || + Opcode == ISD::ATOMIC_LOAD_UINC_WRAP || + Opcode == ISD::ATOMIC_LOAD_UDEC_WRAP || Opcode == ISD::ATOMIC_SWAP || Opcode == ISD::ATOMIC_STORE) && "Invalid Atomic Op"); @@ -8816,12 +9067,12 @@ SDValue SelectionDAG::simplifySelect(SDValue Cond, SDValue T, SDValue F) { if (auto *CondC = dyn_cast<ConstantSDNode>(Cond)) return CondC->isZero() ? F : T; - // TODO: This should simplify VSELECT with constant condition using something - // like this (but check boolean contents to be complete?): - // if (ISD::isBuildVectorAllOnes(Cond.getNode())) - // return T; - // if (ISD::isBuildVectorAllZeros(Cond.getNode())) - // return F; + // TODO: This should simplify VSELECT with non-zero constant condition using + // something like this (but check boolean contents to be complete?): + if (ConstantSDNode *CondC = isConstOrConstSplat(Cond, /*AllowUndefs*/ false, + /*AllowTruncation*/ true)) + if (CondC->isZero()) + return F; // select ?, T, T --> T if (T == F) @@ -9177,7 +9428,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList) { - return getNode(Opcode, DL, VTList, None); + return getNode(Opcode, DL, VTList, std::nullopt); } SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList, @@ -9444,7 +9695,7 @@ void SelectionDAG::setNodeMemRefs(MachineSDNode *N, SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT) { SDVTList VTs = getVTList(VT); - return SelectNodeTo(N, MachineOpc, VTs, None); + return SelectNodeTo(N, MachineOpc, VTs, std::nullopt); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, @@ -9485,7 +9736,7 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT1, EVT VT2) { SDVTList VTs = getVTList(VT1, VT2); - return SelectNodeTo(N, MachineOpc, VTs, None); + return SelectNodeTo(N, MachineOpc, VTs, std::nullopt); } SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, @@ -9652,7 +9903,7 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) { MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT) { SDVTList VTs = getVTList(VT); - return getMachineNode(Opcode, dl, VTs, None); + return getMachineNode(Opcode, dl, VTs, std::nullopt); } MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl, @@ -10091,6 +10342,8 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) { // Preserve Debug Values transferDbgValues(FromN, To); + // Preserve extra info. + copyExtraInfo(From, To.getNode()); // Iterate over all the existing uses of From. New uses will be added // to the beginning of the use list, which we avoid visiting. @@ -10152,6 +10405,8 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To) { assert((i < To->getNumValues()) && "Invalid To location"); transferDbgValues(SDValue(From, i), SDValue(To, i)); } + // Preserve extra info. + copyExtraInfo(From, To); // Iterate over just the existing users of From. See the comments in // the ReplaceAllUsesWith above. @@ -10194,9 +10449,12 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) { if (From->getNumValues() == 1) // Handle the simple case efficiently. return ReplaceAllUsesWith(SDValue(From, 0), To[0]); - // Preserve Debug Info. - for (unsigned i = 0, e = From->getNumValues(); i != e; ++i) + for (unsigned i = 0, e = From->getNumValues(); i != e; ++i) { + // Preserve Debug Info. transferDbgValues(SDValue(From, i), To[i]); + // Preserve extra info. + copyExtraInfo(From, To[i].getNode()); + } // Iterate over just the existing users of From. See the comments in // the ReplaceAllUsesWith above. @@ -10249,6 +10507,7 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){ // Preserve Debug Info. transferDbgValues(From, To); + copyExtraInfo(From.getNode(), To.getNode()); // Iterate over just the existing users of From. See the comments in // the ReplaceAllUsesWith above. @@ -10402,6 +10661,7 @@ void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From, return ReplaceAllUsesOfValueWith(*From, *To); transferDbgValues(*From, *To); + copyExtraInfo(From->getNode(), To->getNode()); // Read up all the uses and make records of them. This helps // processing new uses that are introduced during the @@ -10636,6 +10896,67 @@ bool llvm::isMinSignedConstant(SDValue V) { return Const != nullptr && Const->isMinSignedValue(); } +bool llvm::isNeutralConstant(unsigned Opcode, SDNodeFlags Flags, SDValue V, + unsigned OperandNo) { + // NOTE: The cases should match with IR's ConstantExpr::getBinOpIdentity(). + // TODO: Target-specific opcodes could be added. + if (auto *Const = isConstOrConstSplat(V)) { + switch (Opcode) { + case ISD::ADD: + case ISD::OR: + case ISD::XOR: + case ISD::UMAX: + return Const->isZero(); + case ISD::MUL: + return Const->isOne(); + case ISD::AND: + case ISD::UMIN: + return Const->isAllOnes(); + case ISD::SMAX: + return Const->isMinSignedValue(); + case ISD::SMIN: + return Const->isMaxSignedValue(); + case ISD::SUB: + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + return OperandNo == 1 && Const->isZero(); + case ISD::UDIV: + case ISD::SDIV: + return OperandNo == 1 && Const->isOne(); + } + } else if (auto *ConstFP = isConstOrConstSplatFP(V)) { + switch (Opcode) { + case ISD::FADD: + return ConstFP->isZero() && + (Flags.hasNoSignedZeros() || ConstFP->isNegative()); + case ISD::FSUB: + return OperandNo == 1 && ConstFP->isZero() && + (Flags.hasNoSignedZeros() || !ConstFP->isNegative()); + case ISD::FMUL: + return ConstFP->isExactlyValue(1.0); + case ISD::FDIV: + return OperandNo == 1 && ConstFP->isExactlyValue(1.0); + case ISD::FMINNUM: + case ISD::FMAXNUM: { + // Neutral element for fminnum is NaN, Inf or FLT_MAX, depending on FMF. + EVT VT = V.getValueType(); + const fltSemantics &Semantics = SelectionDAG::EVTToAPFloatSemantics(VT); + APFloat NeutralAF = !Flags.hasNoNaNs() + ? APFloat::getQNaN(Semantics) + : !Flags.hasNoInfs() + ? APFloat::getInf(Semantics) + : APFloat::getLargest(Semantics); + if (Opcode == ISD::FMAXNUM) + NeutralAF.changeSign(); + + return ConstFP->isExactlyValue(NeutralAF); + } + } + } + return false; +} + SDValue llvm::peekThroughBitcasts(SDValue V) { while (V.getOpcode() == ISD::BITCAST) V = V.getOperand(0); @@ -10666,6 +10987,16 @@ bool llvm::isBitwiseNot(SDValue V, bool AllowUndefs) { ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs, bool AllowTruncation) { + EVT VT = N.getValueType(); + APInt DemandedElts = VT.isFixedLengthVector() + ? APInt::getAllOnes(VT.getVectorMinNumElements()) + : APInt(1, 1); + return isConstOrConstSplat(N, DemandedElts, AllowUndefs, AllowTruncation); +} + +ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, const APInt &DemandedElts, + bool AllowUndefs, + bool AllowTruncation) { if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) return CN; @@ -10683,34 +11014,11 @@ ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs, if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { BitVector UndefElements; - ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements); - - // BuildVectors can truncate their operands. Ignore that case here unless - // AllowTruncation is set. - if (CN && (UndefElements.none() || AllowUndefs)) { - EVT CVT = CN->getValueType(0); - EVT NSVT = N.getValueType().getScalarType(); - assert(CVT.bitsGE(NSVT) && "Illegal build vector element extension"); - if (AllowTruncation || (CVT == NSVT)) - return CN; - } - } - - return nullptr; -} - -ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, const APInt &DemandedElts, - bool AllowUndefs, - bool AllowTruncation) { - if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) - return CN; - - if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { - BitVector UndefElements; ConstantSDNode *CN = BV->getConstantSplatNode(DemandedElts, &UndefElements); // BuildVectors can truncate their operands. Ignore that case here unless // AllowTruncation is set. + // TODO: Look into whether we should allow UndefElements in non-DemandedElts if (CN && (UndefElements.none() || AllowUndefs)) { EVT CVT = CN->getValueType(0); EVT NSVT = N.getValueType().getScalarType(); @@ -10724,21 +11032,11 @@ ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, const APInt &DemandedElts, } ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N, bool AllowUndefs) { - if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N)) - return CN; - - if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { - BitVector UndefElements; - ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements); - if (CN && (UndefElements.none() || AllowUndefs)) - return CN; - } - - if (N.getOpcode() == ISD::SPLAT_VECTOR) - if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N.getOperand(0))) - return CN; - - return nullptr; + EVT VT = N.getValueType(); + APInt DemandedElts = VT.isFixedLengthVector() + ? APInt::getAllOnes(VT.getVectorMinNumElements()) + : APInt(1, 1); + return isConstOrConstSplatFP(N, DemandedElts, AllowUndefs); } ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N, @@ -10751,10 +11049,15 @@ ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N, BitVector UndefElements; ConstantFPSDNode *CN = BV->getConstantFPSplatNode(DemandedElts, &UndefElements); + // TODO: Look into whether we should allow UndefElements in non-DemandedElts if (CN && (UndefElements.none() || AllowUndefs)) return CN; } + if (N.getOpcode() == ISD::SPLAT_VECTOR) + if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N.getOperand(0))) + return CN; + return nullptr; } @@ -10808,7 +11111,7 @@ MemSDNode::MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, // the MMO. This is because the MMO might indicate only a possible address // range instead of specifying the affected memory addresses precisely. // TODO: Make MachineMemOperands aware of scalable vectors. - assert(memvt.getStoreSize().getKnownMinSize() <= MMO->getSize() && + assert(memvt.getStoreSize().getKnownMinValue() <= MMO->getSize() && "Size mismatch!"); } @@ -11221,7 +11524,7 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD, return false; if (LD->getChain() != Base->getChain()) return false; - EVT VT = LD->getValueType(0); + EVT VT = LD->getMemoryVT(); if (VT.getSizeInBits() / 8 != Bytes) return false; @@ -11234,8 +11537,8 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD, return false; } -/// InferPtrAlignment - Infer alignment of a load / store address. Return None -/// if it cannot be inferred. +/// InferPtrAlignment - Infer alignment of a load / store address. Return +/// std::nullopt if it cannot be inferred. MaybeAlign SelectionDAG::InferPtrAlign(SDValue Ptr) const { // If this is a GlobalAddress + cst, return the alignment. const GlobalValue *GV = nullptr; @@ -11267,7 +11570,7 @@ MaybeAlign SelectionDAG::InferPtrAlign(SDValue Ptr) const { return commonAlignment(MFI.getObjectAlign(FrameIdx), FrameOffset); } - return None; + return std::nullopt; } /// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type @@ -11690,30 +11993,30 @@ bool BuildVectorSDNode::isConstant() const { return true; } -Optional<std::pair<APInt, APInt>> +std::optional<std::pair<APInt, APInt>> BuildVectorSDNode::isConstantSequence() const { unsigned NumOps = getNumOperands(); if (NumOps < 2) - return None; + return std::nullopt; if (!isa<ConstantSDNode>(getOperand(0)) || !isa<ConstantSDNode>(getOperand(1))) - return None; + return std::nullopt; unsigned EltSize = getValueType(0).getScalarSizeInBits(); APInt Start = getConstantOperandAPInt(0).trunc(EltSize); APInt Stride = getConstantOperandAPInt(1).trunc(EltSize) - Start; if (Stride.isZero()) - return None; + return std::nullopt; for (unsigned i = 2; i < NumOps; ++i) { if (!isa<ConstantSDNode>(getOperand(i))) - return None; + return std::nullopt; APInt Val = getConstantOperandAPInt(i).trunc(EltSize); if (Val != (Start + (Stride * i))) - return None; + return std::nullopt; } return std::make_pair(Start, Stride); @@ -11847,6 +12150,18 @@ SDValue SelectionDAG::getNeutralElement(unsigned Opcode, const SDLoc &DL, } } +void SelectionDAG::copyExtraInfo(SDNode *From, SDNode *To) { + assert(From && To && "Invalid SDNode; empty source SDValue?"); + auto I = SDEI.find(From); + if (I == SDEI.end()) + return; + + // Use of operator[] on the DenseMap may cause an insertion, which invalidates + // the iterator, hence the need to make a copy to prevent a use-after-free. + NodeExtraInfo Copy = I->second; + SDEI[To] = std::move(Copy); +} + #ifndef NDEBUG static void checkForCyclesHelper(const SDNode *N, SmallPtrSetImpl<const SDNode*> &Visited, diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp index d236433f6fb4..a432d8e92bca 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp @@ -85,9 +85,9 @@ bool BaseIndexOffset::equalBaseIndex(const BaseIndexOffset &Other, } bool BaseIndexOffset::computeAliasing(const SDNode *Op0, - const Optional<int64_t> NumBytes0, + const std::optional<int64_t> NumBytes0, const SDNode *Op1, - const Optional<int64_t> NumBytes1, + const std::optional<int64_t> NumBytes1, const SelectionDAG &DAG, bool &IsAlias) { BaseIndexOffset BasePtr0 = match(Op0, DAG); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 35650b9bd00e..0bdfdac6a65f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -15,8 +15,6 @@ #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/None.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" @@ -27,10 +25,12 @@ #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/EHPersonalities.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/AssignmentTrackingAnalysis.h" #include "llvm/CodeGen/CodeGenCommonISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCMetadata.h" @@ -63,6 +63,7 @@ #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DiagnosticInfo.h" @@ -99,6 +100,7 @@ #include <cstddef> #include <iterator> #include <limits> +#include <optional> #include <tuple> using namespace llvm; @@ -148,18 +150,18 @@ static const unsigned MaxParallelChains = 64; static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V, - Optional<CallingConv::ID> CC); + std::optional<CallingConv::ID> CC); /// getCopyFromParts - Create a value that contains the specified legal parts /// combined into the value they represent. If the parts combine to a type /// larger than ValueVT then AssertOp can be used to specify whether the extra /// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT /// (ISD::AssertSext). -static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, - const SDValue *Parts, unsigned NumParts, - MVT PartVT, EVT ValueVT, const Value *V, - Optional<CallingConv::ID> CC = None, - Optional<ISD::NodeType> AssertOp = None) { +static SDValue +getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, + unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V, + std::optional<CallingConv::ID> CC = std::nullopt, + std::optional<ISD::NodeType> AssertOp = std::nullopt) { // Let the target assemble the parts if it wants to const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (SDValue Val = TLI.joinRegisterPartsIntoValue(DAG, DL, Parts, NumParts, @@ -180,8 +182,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL, unsigned ValueBits = ValueVT.getSizeInBits(); // Assemble the power of 2 part. - unsigned RoundParts = - (NumParts & (NumParts - 1)) ? 1 << Log2_32(NumParts) : NumParts; + unsigned RoundParts = llvm::bit_floor(NumParts); unsigned RoundBits = PartBits * RoundParts; EVT RoundVT = RoundBits == ValueBits ? ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits); @@ -320,7 +321,7 @@ static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V, static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V, - Optional<CallingConv::ID> CallConv) { + std::optional<CallingConv::ID> CallConv) { assert(ValueVT.isVector() && "Not a vector value"); assert(NumParts > 0 && "No parts to assemble!"); const bool IsABIRegCopy = CallConv.has_value(); @@ -397,10 +398,9 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); - // If the element type of the source/dest vectors are the same, but the - // parts vector has more elements than the value vector, then we have a - // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the - // elements we want. + // If the parts vector has more elements than the value vector, then we + // have a vector widening case (e.g. <2 x float> -> <4 x float>). + // Extract the elements we want. if (PartEVT.getVectorElementCount() != ValueVT.getVectorElementCount()) { assert((PartEVT.getVectorElementCount().getKnownMinValue() > ValueVT.getVectorElementCount().getKnownMinValue()) && @@ -414,6 +414,8 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, DAG.getVectorIdxConstant(0, DL)); if (PartEVT == ValueVT) return Val; + if (PartEVT.isInteger() && ValueVT.isFloatingPoint()) + return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); } // Promoted vector extract @@ -447,12 +449,22 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, // Handle cases such as i8 -> <1 x i1> EVT ValueSVT = ValueVT.getVectorElementType(); if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT) { - if (ValueSVT.getSizeInBits() == PartEVT.getSizeInBits()) + unsigned ValueSize = ValueSVT.getSizeInBits(); + if (ValueSize == PartEVT.getSizeInBits()) { Val = DAG.getNode(ISD::BITCAST, DL, ValueSVT, Val); - else + } else if (ValueSVT.isFloatingPoint() && PartEVT.isInteger()) { + // It's possible a scalar floating point type gets softened to integer and + // then promoted to a larger integer. If PartEVT is the larger integer + // we need to truncate it and then bitcast to the FP type. + assert(ValueSVT.bitsLT(PartEVT) && "Unexpected types"); + EVT IntermediateType = EVT::getIntegerVT(*DAG.getContext(), ValueSize); + Val = DAG.getNode(ISD::TRUNCATE, DL, IntermediateType, Val); + Val = DAG.getBitcast(ValueSVT, Val); + } else { Val = ValueVT.isFloatingPoint() ? DAG.getFPExtendOrRound(Val, DL, ValueSVT) : DAG.getAnyExtOrTrunc(Val, DL, ValueSVT); + } } return DAG.getBuildVector(ValueVT, DL, Val); @@ -461,16 +473,16 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL, static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, const Value *V, - Optional<CallingConv::ID> CallConv); + std::optional<CallingConv::ID> CallConv); /// getCopyToParts - Create a series of nodes that contain the specified value /// split into legal parts. If the parts contain more bits than Val, then, for /// integers, ExtendKind can be used to specify how to generate the extra bits. -static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, - SDValue *Parts, unsigned NumParts, MVT PartVT, - const Value *V, - Optional<CallingConv::ID> CallConv = None, - ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { +static void +getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, + unsigned NumParts, MVT PartVT, const Value *V, + std::optional<CallingConv::ID> CallConv = std::nullopt, + ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { // Let the target split the parts if it wants to const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (TLI.splitValueIntoRegisterParts(DAG, DL, Val, Parts, NumParts, PartVT, @@ -555,7 +567,7 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, // The number of parts is not a power of 2. Split off and copy the tail. assert(PartVT.isInteger() && ValueVT.isInteger() && "Do not know what to expand to!"); - unsigned RoundParts = 1 << Log2_32(NumParts); + unsigned RoundParts = llvm::bit_floor(NumParts); unsigned RoundBits = RoundParts * PartBits; unsigned OddParts = NumParts - RoundParts; SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val, @@ -643,7 +655,7 @@ static SDValue widenVectorToPartType(SelectionDAG &DAG, SDValue Val, static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, const Value *V, - Optional<CallingConv::ID> CallConv) { + std::optional<CallingConv::ID> CallConv) { EVT ValueVT = Val.getValueType(); assert(ValueVT.isVector() && "Not a vector"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -678,7 +690,11 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, SDValue Widened = widenVectorToPartType(DAG, Val, DL, WidenVT); Val = DAG.getAnyExtOrTrunc(Widened, DL, PartVT); } else { - if (ValueVT.getVectorElementCount().isScalar()) { + // Don't extract an integer from a float vector. This can happen if the + // FP type gets softened to integer and then promoted. The promotion + // prevents it from being picked up by the earlier bitcast case. + if (ValueVT.getVectorElementCount().isScalar() && + (!ValueVT.isFloatingPoint() || !PartVT.isInteger())) { Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, DAG.getVectorIdxConstant(0, DL)); } else { @@ -703,8 +719,8 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, unsigned NumRegs; if (IsABIRegCopy) { NumRegs = TLI.getVectorTypeBreakdownForCallingConv( - *DAG.getContext(), CallConv.value(), ValueVT, IntermediateVT, - NumIntermediates, RegisterVT); + *DAG.getContext(), *CallConv, ValueVT, IntermediateVT, NumIntermediates, + RegisterVT); } else { NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, @@ -718,7 +734,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, assert(IntermediateVT.isScalableVector() == ValueVT.isScalableVector() && "Mixing scalable and fixed vectors when copying in parts"); - Optional<ElementCount> DestEltCnt; + std::optional<ElementCount> DestEltCnt; if (IntermediateVT.isVector()) DestEltCnt = IntermediateVT.getVectorElementCount() * NumIntermediates; @@ -786,13 +802,13 @@ static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL, } RegsForValue::RegsForValue(const SmallVector<unsigned, 4> ®s, MVT regvt, - EVT valuevt, Optional<CallingConv::ID> CC) + EVT valuevt, std::optional<CallingConv::ID> CC) : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs), RegCount(1, regs.size()), CallConv(CC) {} RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI, const DataLayout &DL, unsigned Reg, Type *Ty, - Optional<CallingConv::ID> CC) { + std::optional<CallingConv::ID> CC) { ComputeValueVTs(TLI, DL, Ty, ValueVTs); CallConv = CC; @@ -800,11 +816,11 @@ RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI, for (EVT ValueVT : ValueVTs) { unsigned NumRegs = isABIMangled() - ? TLI.getNumRegistersForCallingConv(Context, CC.value(), ValueVT) + ? TLI.getNumRegistersForCallingConv(Context, *CC, ValueVT) : TLI.getNumRegisters(Context, ValueVT); MVT RegisterVT = isABIMangled() - ? TLI.getRegisterTypeForCallingConv(Context, CC.value(), ValueVT) + ? TLI.getRegisterTypeForCallingConv(Context, *CC, ValueVT) : TLI.getRegisterType(Context, ValueVT); for (unsigned i = 0; i != NumRegs; ++i) Regs.push_back(Reg + i); @@ -831,10 +847,10 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, // Copy the legal parts from the registers. EVT ValueVT = ValueVTs[Value]; unsigned NumRegs = RegCount[Value]; - MVT RegisterVT = - isABIMangled() ? TLI.getRegisterTypeForCallingConv( - *DAG.getContext(), CallConv.value(), RegVTs[Value]) - : RegVTs[Value]; + MVT RegisterVT = isABIMangled() + ? TLI.getRegisterTypeForCallingConv( + *DAG.getContext(), *CallConv, RegVTs[Value]) + : RegVTs[Value]; Parts.resize(NumRegs); for (unsigned i = 0; i != NumRegs; ++i) { @@ -914,10 +930,10 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { unsigned NumParts = RegCount[Value]; - MVT RegisterVT = - isABIMangled() ? TLI.getRegisterTypeForCallingConv( - *DAG.getContext(), CallConv.value(), RegVTs[Value]) - : RegVTs[Value]; + MVT RegisterVT = isABIMangled() + ? TLI.getRegisterTypeForCallingConv( + *DAG.getContext(), *CallConv, RegVTs[Value]) + : RegVTs[Value]; if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT)) ExtendKind = ISD::ZERO_EXTEND; @@ -1025,8 +1041,10 @@ RegsForValue::getRegsAndSizes() const { } void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis *aa, + AssumptionCache *ac, const TargetLibraryInfo *li) { AA = aa; + AC = ac; GFI = gfi; LibInfo = li; Context = DAG.getContext(); @@ -1117,18 +1135,57 @@ void SelectionDAGBuilder::visit(const Instruction &I) { HandlePHINodesInSuccessorBlocks(I.getParent()); } + // Add SDDbgValue nodes for any var locs here. Do so before updating + // SDNodeOrder, as this mapping is {Inst -> Locs BEFORE Inst}. + if (FunctionVarLocs const *FnVarLocs = DAG.getFunctionVarLocs()) { + // Add SDDbgValue nodes for any var locs here. Do so before updating + // SDNodeOrder, as this mapping is {Inst -> Locs BEFORE Inst}. + for (auto It = FnVarLocs->locs_begin(&I), End = FnVarLocs->locs_end(&I); + It != End; ++It) { + auto *Var = FnVarLocs->getDILocalVariable(It->VariableID); + dropDanglingDebugInfo(Var, It->Expr); + if (!handleDebugValue(It->V, Var, It->Expr, It->DL, SDNodeOrder, + /*IsVariadic=*/false)) + addDanglingDebugInfo(It, SDNodeOrder); + } + } + // Increase the SDNodeOrder if dealing with a non-debug instruction. if (!isa<DbgInfoIntrinsic>(I)) ++SDNodeOrder; CurInst = &I; + // Set inserted listener only if required. + bool NodeInserted = false; + std::unique_ptr<SelectionDAG::DAGNodeInsertedListener> InsertedListener; + MDNode *PCSectionsMD = I.getMetadata(LLVMContext::MD_pcsections); + if (PCSectionsMD) { + InsertedListener = std::make_unique<SelectionDAG::DAGNodeInsertedListener>( + DAG, [&](SDNode *) { NodeInserted = true; }); + } + visit(I.getOpcode(), I); if (!I.isTerminator() && !HasTailCall && !isa<GCStatepointInst>(I)) // statepoints handle their exports internally CopyToExportRegsIfNeeded(&I); + // Handle metadata. + if (PCSectionsMD) { + auto It = NodeMap.find(&I); + if (It != NodeMap.end()) { + DAG.addPCSections(It->second.getNode(), PCSectionsMD); + } else if (NodeInserted) { + // This should not happen; if it does, don't let it go unnoticed so we can + // fix it. Relevant visit*() function is probably missing a setValue(). + errs() << "warning: loosing !pcsections metadata [" + << I.getModule()->getName() << "]\n"; + LLVM_DEBUG(I.dump()); + assert(false); + } + } + CurInst = nullptr; } @@ -1148,8 +1205,13 @@ void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) { } } +void SelectionDAGBuilder::addDanglingDebugInfo(const VarLocInfo *VarLoc, + unsigned Order) { + DanglingDebugInfoMap[VarLoc->V].emplace_back(VarLoc, Order); +} + void SelectionDAGBuilder::addDanglingDebugInfo(const DbgValueInst *DI, - DebugLoc DL, unsigned Order) { + unsigned Order) { // We treat variadic dbg_values differently at this stage. if (DI->hasArgList()) { // For variadic dbg_values we will now insert an undef. @@ -1161,7 +1223,7 @@ void SelectionDAGBuilder::addDanglingDebugInfo(const DbgValueInst *DI, } SDDbgValue *SDV = DAG.getDbgValueList( DI->getVariable(), DI->getExpression(), Locs, {}, - /*IsIndirect=*/false, DL, Order, /*IsVariadic=*/true); + /*IsIndirect=*/false, DI->getDebugLoc(), Order, /*IsVariadic=*/true); DAG.AddDbgValue(SDV, /*isParameter=*/false); } else { // TODO: Dangling debug info will eventually either be resolved or produce @@ -1171,18 +1233,18 @@ void SelectionDAGBuilder::addDanglingDebugInfo(const DbgValueInst *DI, assert(DI->getNumVariableLocationOps() == 1 && "DbgValueInst without an ArgList should have a single location " "operand."); - DanglingDebugInfoMap[DI->getValue(0)].emplace_back(DI, DL, Order); + DanglingDebugInfoMap[DI->getValue(0)].emplace_back(DI, Order); } } void SelectionDAGBuilder::dropDanglingDebugInfo(const DILocalVariable *Variable, const DIExpression *Expr) { auto isMatchingDbgValue = [&](DanglingDebugInfo &DDI) { - const DbgValueInst *DI = DDI.getDI(); - DIVariable *DanglingVariable = DI->getVariable(); - DIExpression *DanglingExpr = DI->getExpression(); + DIVariable *DanglingVariable = DDI.getVariable(DAG.getFunctionVarLocs()); + DIExpression *DanglingExpr = DDI.getExpression(); if (DanglingVariable == Variable && Expr->fragmentsOverlap(DanglingExpr)) { - LLVM_DEBUG(dbgs() << "Dropping dangling debug info for " << *DI << "\n"); + LLVM_DEBUG(dbgs() << "Dropping dangling debug info for " << printDDI(DDI) + << "\n"); return true; } return false; @@ -1211,15 +1273,12 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, DanglingDebugInfoVector &DDIV = DanglingDbgInfoIt->second; for (auto &DDI : DDIV) { - const DbgValueInst *DI = DDI.getDI(); - assert(!DI->hasArgList() && "Not implemented for variadic dbg_values"); - assert(DI && "Ill-formed DanglingDebugInfo"); - DebugLoc dl = DDI.getdl(); + DebugLoc DL = DDI.getDebugLoc(); unsigned ValSDNodeOrder = Val.getNode()->getIROrder(); unsigned DbgSDNodeOrder = DDI.getSDNodeOrder(); - DILocalVariable *Variable = DI->getVariable(); - DIExpression *Expr = DI->getExpression(); - assert(Variable->isValidLocationForIntrinsic(dl) && + DILocalVariable *Variable = DDI.getVariable(DAG.getFunctionVarLocs()); + DIExpression *Expr = DDI.getExpression(); + assert(Variable->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); SDDbgValue *SDV; if (Val.getNode()) { @@ -1229,10 +1288,10 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, // in the first place we should not be more successful here). Unless we // have some test case that prove this to be correct we should avoid // calling EmitFuncArgumentDbgValue here. - if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, + if (!EmitFuncArgumentDbgValue(V, Variable, Expr, DL, FuncArgumentDbgValueKind::Value, Val)) { - LLVM_DEBUG(dbgs() << "Resolve dangling debug info [order=" - << DbgSDNodeOrder << "] for:\n " << *DI << "\n"); + LLVM_DEBUG(dbgs() << "Resolve dangling debug info for " << printDDI(DDI) + << "\n"); LLVM_DEBUG(dbgs() << " By mapping to:\n "; Val.dump()); // Increase the SDNodeOrder for the DbgValue here to make sure it is // inserted after the definition of Val when emitting the instructions @@ -1241,17 +1300,17 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, LLVM_DEBUG(if (ValSDNodeOrder > DbgSDNodeOrder) dbgs() << "changing SDNodeOrder from " << DbgSDNodeOrder << " to " << ValSDNodeOrder << "\n"); - SDV = getDbgValue(Val, Variable, Expr, dl, + SDV = getDbgValue(Val, Variable, Expr, DL, std::max(DbgSDNodeOrder, ValSDNodeOrder)); DAG.AddDbgValue(SDV, false); } else - LLVM_DEBUG(dbgs() << "Resolved dangling debug info for " << *DI - << "in EmitFuncArgumentDbgValue\n"); + LLVM_DEBUG(dbgs() << "Resolved dangling debug info for " + << printDDI(DDI) << " in EmitFuncArgumentDbgValue\n"); } else { - LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); - auto Undef = UndefValue::get(DDI.getDI()->getValue(0)->getType()); + LLVM_DEBUG(dbgs() << "Dropping debug info for " << printDDI(DDI) << "\n"); + auto Undef = UndefValue::get(V->getType()); auto SDV = - DAG.getConstantDbgValue(Variable, Expr, Undef, dl, DbgSDNodeOrder); + DAG.getConstantDbgValue(Variable, Expr, Undef, DL, DbgSDNodeOrder); DAG.AddDbgValue(SDV, false); } } @@ -1263,21 +1322,19 @@ void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) { // state of `handleDebugValue`, we need know specifically which values were // invalid, so that we attempt to salvage only those values when processing // a DIArgList. - assert(!DDI.getDI()->hasArgList() && - "Not implemented for variadic dbg_values"); - Value *V = DDI.getDI()->getValue(0); - DILocalVariable *Var = DDI.getDI()->getVariable(); - DIExpression *Expr = DDI.getDI()->getExpression(); - DebugLoc DL = DDI.getdl(); - DebugLoc InstDL = DDI.getDI()->getDebugLoc(); + Value *V = DDI.getVariableLocationOp(0); + Value *OrigV = V; + DILocalVariable *Var = DDI.getVariable(DAG.getFunctionVarLocs()); + DIExpression *Expr = DDI.getExpression(); + DebugLoc DL = DDI.getDebugLoc(); unsigned SDOrder = DDI.getSDNodeOrder(); + // Currently we consider only dbg.value intrinsics -- we tell the salvager // that DW_OP_stack_value is desired. - assert(isa<DbgValueInst>(DDI.getDI())); bool StackValue = true; // Can this Value can be encoded without any further work? - if (handleDebugValue(V, Var, Expr, DL, InstDL, SDOrder, /*IsVariadic=*/false)) + if (handleDebugValue(V, Var, Expr, DL, SDOrder, /*IsVariadic=*/false)) return; // Attempt to salvage back through as many instructions as possible. Bail if @@ -1306,10 +1363,10 @@ void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) { // Some kind of simplification occurred: check whether the operand of the // salvaged debug expression can be encoded in this DAG. - if (handleDebugValue(V, Var, Expr, DL, InstDL, SDOrder, - /*IsVariadic=*/false)) { - LLVM_DEBUG(dbgs() << "Salvaged debug location info for:\n " - << *DDI.getDI() << "\nBy stripping back to:\n " << *V); + if (handleDebugValue(V, Var, Expr, DL, SDOrder, /*IsVariadic=*/false)) { + LLVM_DEBUG( + dbgs() << "Salvaged debug location info for:\n " << *Var << "\n" + << *OrigV << "\nBy stripping back to:\n " << *V << "\n"); return; } } @@ -1317,21 +1374,18 @@ void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) { // This was the final opportunity to salvage this debug information, and it // couldn't be done. Place an undef DBG_VALUE at this location to terminate // any earlier variable location. - auto Undef = UndefValue::get(DDI.getDI()->getValue(0)->getType()); - auto SDV = DAG.getConstantDbgValue(Var, Expr, Undef, DL, SDNodeOrder); + assert(OrigV && "V shouldn't be null"); + auto *Undef = UndefValue::get(OrigV->getType()); + auto *SDV = DAG.getConstantDbgValue(Var, Expr, Undef, DL, SDNodeOrder); DAG.AddDbgValue(SDV, false); - - LLVM_DEBUG(dbgs() << "Dropping debug value info for:\n " << *DDI.getDI() - << "\n"); - LLVM_DEBUG(dbgs() << " Last seen at:\n " << *DDI.getDI()->getOperand(0) + LLVM_DEBUG(dbgs() << "Dropping debug value info for:\n " << printDDI(DDI) << "\n"); } bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values, DILocalVariable *Var, - DIExpression *Expr, DebugLoc dl, - DebugLoc InstDL, unsigned Order, - bool IsVariadic) { + DIExpression *Expr, DebugLoc DbgLoc, + unsigned Order, bool IsVariadic) { if (Values.empty()) return true; SmallVector<SDDbgOperand> LocationOps; @@ -1344,6 +1398,13 @@ bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values, continue; } + // Look through IntToPtr constants. + if (auto *CE = dyn_cast<ConstantExpr>(V)) + if (CE->getOpcode() == Instruction::IntToPtr) { + LocationOps.emplace_back(SDDbgOperand::fromConst(CE->getOperand(0))); + continue; + } + // If the Value is a frame index, we can create a FrameIndex debug value // without relying on the DAG at all. if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) { @@ -1362,7 +1423,7 @@ bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values, if (N.getNode()) { // Only emit func arg dbg value for non-variadic dbg.values for now. if (!IsVariadic && - EmitFuncArgumentDbgValue(V, Var, Expr, dl, + EmitFuncArgumentDbgValue(V, Var, Expr, DbgLoc, FuncArgumentDbgValueKind::Value, N)) return true; if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) { @@ -1391,7 +1452,7 @@ bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values, // they're parameters, and they are parameters of the current function. We // need to let them dangle until they get an SDNode. bool IsParamOfFunc = - isa<Argument>(V) && Var->isParameter() && !InstDL.getInlinedAt(); + isa<Argument>(V) && Var->isParameter() && !DbgLoc.getInlinedAt(); if (IsParamOfFunc) return false; @@ -1404,7 +1465,7 @@ bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values, // If this is a PHI node, it may be split up into several MI PHI nodes // (in FunctionLoweringInfo::set). RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, - V->getType(), None); + V->getType(), std::nullopt); if (RFV.occupiesMultipleRegs()) { // FIXME: We could potentially support variadic dbg_values here. if (IsVariadic) @@ -1429,7 +1490,7 @@ bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values, if (!FragmentExpr) continue; SDDbgValue *SDV = DAG.getVRegDbgValue( - Var, *FragmentExpr, RegAndSize.first, false, dl, SDNodeOrder); + Var, *FragmentExpr, RegAndSize.first, false, DbgLoc, SDNodeOrder); DAG.AddDbgValue(SDV, false); Offset += RegisterSize; } @@ -1446,9 +1507,9 @@ bool SelectionDAGBuilder::handleDebugValue(ArrayRef<const Value *> Values, // We have created a SDDbgOperand for each Value in Values. // Should use Order instead of SDNodeOrder? assert(!LocationOps.empty()); - SDDbgValue *SDV = - DAG.getDbgValueList(Var, Expr, LocationOps, Dependencies, - /*IsIndirect=*/false, dl, SDNodeOrder, IsVariadic); + SDDbgValue *SDV = DAG.getDbgValueList(Var, Expr, LocationOps, Dependencies, + /*IsIndirect=*/false, DbgLoc, + SDNodeOrder, IsVariadic); DAG.AddDbgValue(SDV, /*isParameter=*/false); return true; } @@ -1472,7 +1533,7 @@ SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) { RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), DAG.getDataLayout(), InReg, Ty, - None); // This is not an ABI copy. + std::nullopt); // This is not an ABI copy. SDValue Chain = DAG.getEntryNode(); Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); @@ -1647,12 +1708,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { else Op = DAG.getConstant(0, getCurSDLoc(), EltVT); - if (isa<ScalableVectorType>(VecTy)) - return NodeMap[V] = DAG.getSplatVector(VT, getCurSDLoc(), Op); - - SmallVector<SDValue, 16> Ops; - Ops.assign(cast<FixedVectorType>(VecTy)->getNumElements(), Op); - return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops); + return NodeMap[V] = DAG.getSplat(VT, getCurSDLoc(), Op); } llvm_unreachable("Unknown vector constant"); @@ -1664,16 +1720,16 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { DenseMap<const AllocaInst*, int>::iterator SI = FuncInfo.StaticAllocaMap.find(AI); if (SI != FuncInfo.StaticAllocaMap.end()) - return DAG.getFrameIndex(SI->second, - TLI.getFrameIndexTy(DAG.getDataLayout())); + return DAG.getFrameIndex( + SI->second, TLI.getValueType(DAG.getDataLayout(), AI->getType())); } // If this is an instruction which fast-isel has deferred, select it now. if (const Instruction *Inst = dyn_cast<Instruction>(V)) { - unsigned InReg = FuncInfo.InitializeRegForValue(Inst); + Register InReg = FuncInfo.InitializeRegForValue(Inst); RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg, - Inst->getType(), None); + Inst->getType(), std::nullopt); SDValue Chain = DAG.getEntryNode(); return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); } @@ -2082,7 +2138,7 @@ void SelectionDAGBuilder::ExportFromCurrentBlock(const Value *V) { // Already exported? if (FuncInfo.isExportedInst(V)) return; - unsigned Reg = FuncInfo.InitializeRegForValue(V); + Register Reg = FuncInfo.InitializeRegForValue(V); CopyValueToVirtualRegister(V, Reg); } @@ -2536,6 +2592,8 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, MVT::Other, getControlRoot(), Cond, DAG.getBasicBlock(CB.TrueBB)); + setValue(CurInst, BrCond); + // Insert the false branch. Do this even if it's a fall through branch, // this makes it easier to do DAG optimizations which require inverting // the branch condition. @@ -2746,7 +2804,8 @@ SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) { CallOptions.setDiscardResult(true); SDValue Chain = TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid, - None, CallOptions, getCurSDLoc()).second; + std::nullopt, CallOptions, getCurSDLoc()) + .second; // On PS4/PS5, the "return address" must still be within the calling // function, even if it's at the very end, so emit an explicit TRAP here. // Passing 'true' for doesNotReturn above won't generate the trap for us. @@ -2835,7 +2894,7 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, MVT VT = BB.RegVT; SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), dl, Reg, VT); SDValue Cmp; - unsigned PopCount = countPopulation(B.Mask); + unsigned PopCount = llvm::popcount(B.Mask); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (PopCount == 1) { // Testing for a single bit; just compare the shift count with what it @@ -3000,7 +3059,8 @@ void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) { BasicBlock *Dest = I.getIndirectDest(i); MachineBasicBlock *Target = FuncInfo.MBBMap[Dest]; Target->setIsInlineAsmBrIndirectTarget(); - Target->setHasAddressTaken(); + Target->setMachineBlockAddressTaken(); + Target->setLabelMustBeEmitted(); // Don't add duplicate machine successors. if (Dests.insert(Dest).second) addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero()); @@ -3279,7 +3339,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) { Flags.copyFMF(*FPOp); // Min/max matching is only viable if all output VTs are the same. - if (is_splat(ValueVTs)) { + if (all_equal(ValueVTs)) { EVT VT = ValueVTs[0]; LLVMContext &Ctx = *DAG.getContext(); auto &TLI = DAG.getTargetLoweringInfo(); @@ -3339,7 +3399,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) { break; case SPF_NABS: Negate = true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case SPF_ABS: IsUnaryAbs = true; Opc = ISD::ABS; @@ -3375,8 +3435,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) { Values[i] = DAG.getNode(OpCode, dl, VT, LHSVal.getValue(LHSVal.getResNo() + i)); if (Negate) - Values[i] = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), - Values[i]); + Values[i] = DAG.getNegative(Values[i], dl, VT); } } else { for (unsigned i = 0; i != NumValues; ++i) { @@ -3537,7 +3596,7 @@ void SelectionDAGBuilder::visitInsertElement(const User &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue InVec = getValue(I.getOperand(0)); SDValue InVal = getValue(I.getOperand(1)); - SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(), + SDValue InIdx = DAG.getZExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(), TLI.getVectorIdxTy(DAG.getDataLayout())); setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(), TLI.getValueType(DAG.getDataLayout(), I.getType()), @@ -3547,7 +3606,7 @@ void SelectionDAGBuilder::visitInsertElement(const User &I) { void SelectionDAGBuilder::visitExtractElement(const User &I) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue InVec = getValue(I.getOperand(0)); - SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(), + SDValue InIdx = DAG.getZExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(), TLI.getVectorIdxTy(DAG.getDataLayout())); setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(), TLI.getValueType(DAG.getDataLayout(), I.getType()), @@ -3716,7 +3775,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { } // Calculate new mask. - SmallVector<int, 8> MappedOps(Mask.begin(), Mask.end()); + SmallVector<int, 8> MappedOps(Mask); for (int &Idx : MappedOps) { if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts + StartIdx[1] - MaskNumElts; @@ -3856,10 +3915,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (IsVectorGEP && !N.getValueType().isVector()) { LLVMContext &Context = *DAG.getContext(); EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorElementCount); - if (VectorElementCount.isScalable()) - N = DAG.getSplatVector(VT, dl, N); - else - N = DAG.getSplatBuildVector(VT, dl, N); + N = DAG.getSplat(VT, dl, N); } for (gep_type_iterator GTI = gep_type_begin(&I), E = gep_type_end(&I); @@ -3891,7 +3947,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { DAG.getDataLayout().getTypeAllocSize(GTI.getIndexedType()); // We intentionally mask away the high bits here; ElementSize may not // fit in IdxTy. - APInt ElementMul(IdxSize, ElementSize.getKnownMinSize()); + APInt ElementMul(IdxSize, ElementSize.getKnownMinValue()); bool ElementScalable = ElementSize.isScalable(); // If this is a scalar constant or a splat vector of constants, @@ -3931,10 +3987,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (!IdxN.getValueType().isVector() && IsVectorGEP) { EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(), VectorElementCount); - if (VectorElementCount.isScalable()) - IdxN = DAG.getSplatVector(VT, dl, IdxN); - else - IdxN = DAG.getSplatBuildVector(VT, dl, IdxN); + IdxN = DAG.getSplat(VT, dl, IdxN); } // If the index is smaller or larger than intptr_t, truncate or extend @@ -4000,7 +4053,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { SDValue AllocSize = getValue(I.getArraySize()); - EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout(), DL.getAllocaAddrSpace()); + EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout(), I.getAddressSpace()); if (AllocSize.getValueType() != IntPtr) AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr); @@ -4019,7 +4072,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { // the stack alignment, we note this in the DYNAMIC_STACKALLOC node. Align StackAlign = DAG.getSubtarget().getFrameLowering()->getStackAlign(); if (*Alignment <= StackAlign) - Alignment = None; + Alignment = std::nullopt; const uint64_t StackAlignMask = StackAlign.value() - 1U; // Round the size of the allocation up to the stack alignment size @@ -4068,11 +4121,6 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { SDValue Ptr = getValue(SV); Type *Ty = I.getType(); - Align Alignment = I.getAlign(); - - AAMDNodes AAInfo = I.getAAMetadata(); - const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); - SmallVector<EVT, 4> ValueVTs, MemVTs; SmallVector<uint64_t, 4> Offsets; ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &MemVTs, &Offsets); @@ -4080,9 +4128,12 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { if (NumValues == 0) return; + Align Alignment = I.getAlign(); + AAMDNodes AAInfo = I.getAAMetadata(); + const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); bool isVolatile = I.isVolatile(); MachineMemOperand::Flags MMOFlags = - TLI.getLoadMemOperandFlags(I, DAG.getDataLayout()); + TLI.getLoadMemOperandFlags(I, DAG.getDataLayout(), AC, LibInfo); SDValue Root; bool ConstantMemory = false; @@ -4100,11 +4151,6 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { Root = DAG.getEntryNode(); ConstantMemory = true; MMOFlags |= MachineMemOperand::MOInvariant; - - // FIXME: pointsToConstantMemory probably does not imply dereferenceable, - // but the previous usage implied it did. Probably should check - // isDereferenceableAndAlignedPointer. - MMOFlags |= MachineMemOperand::MODereferenceable; } else { // Do not serialize non-volatile loads against each other. Root = DAG.getRoot(); @@ -4135,7 +4181,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { if (ChainI == MaxParallelChains) { assert(PendingLoads.empty() && "PendingLoads must be serialized first"); SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - makeArrayRef(Chains.data(), ChainI)); + ArrayRef(Chains.data(), ChainI)); Root = Chain; ChainI = 0; } @@ -4157,7 +4203,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { if (!ConstantMemory) { SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - makeArrayRef(Chains.data(), ChainI)); + ArrayRef(Chains.data(), ChainI)); if (isVolatile) DAG.setRoot(Chain); else @@ -4278,7 +4324,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { // See visitLoad comments. if (ChainI == MaxParallelChains) { SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - makeArrayRef(Chains.data(), ChainI)); + ArrayRef(Chains.data(), ChainI)); Root = Chain; ChainI = 0; } @@ -4294,7 +4340,8 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { } SDValue StoreNode = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - makeArrayRef(Chains.data(), ChainI)); + ArrayRef(Chains.data(), ChainI)); + setValue(&I, StoreNode); DAG.setRoot(StoreNode); } @@ -4316,7 +4363,7 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I, Src0 = I.getArgOperand(0); Ptr = I.getArgOperand(1); Mask = I.getArgOperand(2); - Alignment = None; + Alignment = std::nullopt; }; Value *PtrOperand, *MaskOperand, *Src0Operand; @@ -4400,17 +4447,17 @@ static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index, if (BasePtr->getType()->isVectorTy() || !IndexVal->getType()->isVectorTy()) return false; + uint64_t ScaleVal = DL.getTypeAllocSize(GEP->getResultElementType()); + + // Target may not support the required addressing mode. + if (ScaleVal != 1 && + !TLI.isLegalScaleForGatherScatter(ScaleVal, ElemSize)) + return false; + Base = SDB->getValue(BasePtr); Index = SDB->getValue(IndexVal); IndexType = ISD::SIGNED_SCALED; - // MGATHER/MSCATTER are only required to support scaling by one or by the - // element size. Other scales may be produced using target-specific DAG - // combines. - uint64_t ScaleVal = DL.getTypeAllocSize(GEP->getResultElementType()); - if (ScaleVal != ElemSize && ScaleVal != 1) - return false; - Scale = DAG.getTargetConstant(ScaleVal, SDB->getCurSDLoc(), TLI.getPointerTy(DL)); return true; @@ -4478,7 +4525,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) { MaybeAlign &Alignment) { // @llvm.masked.expandload.*(Ptr, Mask, Src0) Ptr = I.getArgOperand(0); - Alignment = None; + Alignment = std::nullopt; Mask = I.getArgOperand(1); Src0 = I.getArgOperand(2); }; @@ -4624,6 +4671,12 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { case AtomicRMWInst::FSub: NT = ISD::ATOMIC_LOAD_FSUB; break; case AtomicRMWInst::FMax: NT = ISD::ATOMIC_LOAD_FMAX; break; case AtomicRMWInst::FMin: NT = ISD::ATOMIC_LOAD_FMIN; break; + case AtomicRMWInst::UIncWrap: + NT = ISD::ATOMIC_LOAD_UINC_WRAP; + break; + case AtomicRMWInst::UDecWrap: + NT = ISD::ATOMIC_LOAD_UDEC_WRAP; + break; } AtomicOrdering Ordering = I.getOrdering(); SyncScope::ID SSID = I.getSyncScopeID(); @@ -4659,7 +4712,9 @@ void SelectionDAGBuilder::visitFence(const FenceInst &I) { TLI.getFenceOperandTy(DAG.getDataLayout())); Ops[2] = DAG.getTargetConstant(I.getSyncScopeID(), dl, TLI.getFenceOperandTy(DAG.getDataLayout())); - DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops)); + SDValue N = DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops); + setValue(&I, N); + DAG.setRoot(N); } void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { @@ -4677,7 +4732,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { I.getAlign().value() < MemVT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic load"); - auto Flags = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout()); + auto Flags = TLI.getLoadMemOperandFlags(I, DAG.getDataLayout(), AC, LibInfo); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo(I.getPointerOperand()), Flags, MemVT.getStoreSize(), @@ -4726,7 +4781,8 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), I.getValueOperand()->getType()); - if (I.getAlign().value() < MemVT.getSizeInBits() / 8) + if (!TLI.supportsUnalignedAtomics() && + I.getAlign().value() < MemVT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic store"); auto Flags = TLI.getStoreMemOperandFlags(I, DAG.getDataLayout()); @@ -4745,13 +4801,14 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { // TODO: Once this is better exercised by tests, it should be merged with // the normal path for stores to prevent future divergence. SDValue S = DAG.getStore(InChain, dl, Val, Ptr, MMO); + setValue(&I, S); DAG.setRoot(S); return; } SDValue OutChain = DAG.getAtomic(ISD::ATOMIC_STORE, dl, MemVT, InChain, Ptr, Val, MMO); - + setValue(&I, OutChain); DAG.setRoot(OutChain); } @@ -4826,13 +4883,21 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, // Create the node. SDValue Result; + // In some cases, custom collection of operands from CallInst I may be needed. + TLI.CollectTargetIntrinsicOperands(I, Ops, DAG); if (IsTgtIntrinsic) { // This is target intrinsic that touches memory - Result = - DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT, - MachinePointerInfo(Info.ptrVal, Info.offset), - Info.align, Info.flags, Info.size, - I.getAAMetadata()); + // + // TODO: We currently just fallback to address space 0 if getTgtMemIntrinsic + // didn't yield anything useful. + MachinePointerInfo MPI; + if (Info.ptrVal) + MPI = MachinePointerInfo(Info.ptrVal, Info.offset); + else if (Info.fallbackAddressSpace) + MPI = MachinePointerInfo(*Info.fallbackAddressSpace); + Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, + Info.memVT, MPI, Info.align, Info.flags, + Info.size, I.getAAMetadata()); } else if (!HasChain) { Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops); } else if (!I.getType()->isVoidTy()) { @@ -5515,17 +5580,20 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( // For VRegs, in instruction referencing mode, create a DBG_INSTR_REF // pointing at the VReg, which will be patched up later. auto &Inst = TII->get(TargetOpcode::DBG_INSTR_REF); - auto MIB = BuildMI(MF, DL, Inst); - MIB.addReg(Reg); - MIB.addImm(0); - MIB.addMetadata(Variable); + SmallVector<MachineOperand, 1> MOs({MachineOperand::CreateReg( + /* Reg */ Reg, /* isDef */ false, /* isImp */ false, + /* isKill */ false, /* isDead */ false, + /* isUndef */ false, /* isEarlyClobber */ false, + /* SubReg */ 0, /* isDebug */ true)}); + auto *NewDIExpr = FragExpr; // We don't have an "Indirect" field in DBG_INSTR_REF, fold that into // the DIExpression. if (Indirect) NewDIExpr = DIExpression::prepend(FragExpr, DIExpression::DerefBefore); - MIB.addMetadata(NewDIExpr); - return MIB; + SmallVector<uint64_t, 2> Ops({dwarf::DW_OP_LLVM_arg, 0}); + NewDIExpr = DIExpression::prependOpcodes(NewDIExpr, Ops); + return BuildMI(MF, DL, Inst, false, MOs, Variable, NewDIExpr); } else { // Create a completely standard DBG_VALUE. auto &Inst = TII->get(TargetOpcode::DBG_VALUE); @@ -5599,7 +5667,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( } bool IsIndirect = false; - Optional<MachineOperand> Op; + std::optional<MachineOperand> Op; // Some arguments' frame index is recorded during argument lowering. int FI = FuncInfo.getArgumentFrameIndex(Arg); if (FI != std::numeric_limits<int>::max()) @@ -5680,7 +5748,7 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( if (VMI != FuncInfo.ValueMap.end()) { const auto &TLI = DAG.getTargetLoweringInfo(); RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second, - V->getType(), None); + V->getType(), std::nullopt); if (RFV.occupiesMultipleRegs()) { splitMultiRegDbgValue(RFV.getRegsAndSizes()); return true; @@ -6026,6 +6094,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, } case Intrinsic::dbg_addr: case Intrinsic::dbg_declare: { + // Debug intrinsics are handled seperately in assignment tracking mode. + if (isAssignmentTrackingEnabled(*I.getFunction()->getParent())) + return; // Assume dbg.addr and dbg.declare can not currently use DIArgList, i.e. // they are non-variadic. const auto &DI = cast<DbgVariableIntrinsic>(I); @@ -6125,7 +6196,16 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, DAG.AddDbgLabel(SDV); return; } + case Intrinsic::dbg_assign: { + // Debug intrinsics are handled seperately in assignment tracking mode. + assert(isAssignmentTrackingEnabled(*I.getFunction()->getParent()) && + "expected assignment tracking to be enabled"); + return; + } case Intrinsic::dbg_value: { + // Debug intrinsics are handled seperately in assignment tracking mode. + if (isAssignmentTrackingEnabled(*I.getFunction()->getParent())) + return; const DbgValueInst &DI = cast<DbgValueInst>(I); assert(DI.getVariable() && "Missing variable"); @@ -6140,9 +6220,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, return; bool IsVariadic = DI.hasArgList(); - if (!handleDebugValue(Values, Variable, Expression, dl, DI.getDebugLoc(), + if (!handleDebugValue(Values, Variable, Expression, DI.getDebugLoc(), SDNodeOrder, IsVariadic)) - addDanglingDebugInfo(&DI, dl, SDNodeOrder); + addDanglingDebugInfo(&DI, SDNodeOrder); return; } @@ -6358,7 +6438,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, // Get the last argument, the metadata and convert it to an integer in the // call Metadata *MD = cast<MetadataAsValue>(I.getArgOperand(1))->getMetadata(); - Optional<RoundingMode> RoundMode = + std::optional<RoundingMode> RoundMode = convertStrToRoundingMode(cast<MDString>(MD)->getString()); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); @@ -6750,8 +6830,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::gcread: case Intrinsic::gcwrite: llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!"); - case Intrinsic::flt_rounds: - Res = DAG.getNode(ISD::FLT_ROUNDS_, sdl, {MVT::i32, MVT::Other}, getRoot()); + case Intrinsic::get_rounding: + Res = DAG.getNode(ISD::GET_ROUNDING, sdl, {MVT::i32, MVT::Other}, getRoot()); setValue(&I, Res); DAG.setRoot(Res.getValue(1)); return; @@ -6846,7 +6926,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, SDValue Result = DAG.getMemIntrinsicNode( ISD::PREFETCH, sdl, DAG.getVTList(MVT::Other), Ops, EVT::getIntegerVT(*Context, 8), MachinePointerInfo(I.getArgOperand(0)), - /* align */ None, Flags); + /* align */ std::nullopt, Flags); // Chain the prefetch in parallell with any pending loads, to stay out of // the way of later optimizations. @@ -7178,6 +7258,10 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, DAG.getZExtOrTrunc(Const, sdl, PtrVT))); return; } + case Intrinsic::threadlocal_address: { + setValue(&I, getValue(I.getOperand(0))); + return; + } case Intrinsic::get_active_lane_mask: { EVT CCVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); SDValue Index = getValue(I.getOperand(0)); @@ -7191,14 +7275,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, SDValue TripCount = getValue(I.getOperand(1)); auto VecTy = CCVT.changeVectorElementType(ElementVT); - SDValue VectorIndex, VectorTripCount; - if (VecTy.isScalableVector()) { - VectorIndex = DAG.getSplatVector(VecTy, sdl, Index); - VectorTripCount = DAG.getSplatVector(VecTy, sdl, TripCount); - } else { - VectorIndex = DAG.getSplatBuildVector(VecTy, sdl, Index); - VectorTripCount = DAG.getSplatBuildVector(VecTy, sdl, TripCount); - } + SDValue VectorIndex = DAG.getSplat(VecTy, sdl, Index); + SDValue VectorTripCount = DAG.getSplat(VecTy, sdl, TripCount); SDValue VectorStep = DAG.getStepVector(sdl, VecTy); SDValue VectorInduction = DAG.getNode( ISD::UADDSAT, sdl, VecTy, VectorIndex, VectorStep); @@ -7253,11 +7331,6 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( const ConstrainedFPIntrinsic &FPI) { SDLoc sdl = getCurSDLoc(); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - SmallVector<EVT, 4> ValueVTs; - ComputeValueVTs(TLI, DAG.getDataLayout(), FPI.getType(), ValueVTs); - ValueVTs.push_back(MVT::Other); // Out chain - // We do not need to serialize constrained FP intrinsics against // each other or against (nonvolatile) loads, so they can be // chained like loads. @@ -7286,7 +7359,7 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( // The only reason why ebIgnore nodes still need to be chained is that // they might depend on the current rounding mode, and therefore must // not be moved across instruction that may change that mode. - LLVM_FALLTHROUGH; + [[fallthrough]]; case fp::ExceptionBehavior::ebMayTrap: // These must not be moved across calls or instructions that may change // floating-point exception masks. @@ -7301,7 +7374,9 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( } }; - SDVTList VTs = DAG.getVTList(ValueVTs); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT VT = TLI.getValueType(DAG.getDataLayout(), FPI.getType()); + SDVTList VTs = DAG.getVTList(VT, MVT::Other); fp::ExceptionBehavior EB = *FPI.getExceptionBehavior(); SDNodeFlags Flags; @@ -7323,8 +7398,7 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( Opcode = ISD::STRICT_FMA; // Break fmuladd into fmul and fadd. if (TM.Options.AllowFPOpFusion == FPOpFusion::Strict || - !TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), - ValueVTs[0])) { + !TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT)) { Opers.pop_back(); SDValue Mul = DAG.getNode(ISD::STRICT_FMUL, sdl, VTs, Opers, Flags); pushOutChain(Mul, EB); @@ -7365,8 +7439,18 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( } static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) { - Optional<unsigned> ResOPC; + std::optional<unsigned> ResOPC; switch (VPIntrin.getIntrinsicID()) { + case Intrinsic::vp_ctlz: { + bool IsZeroUndef = cast<ConstantInt>(VPIntrin.getArgOperand(3))->isOne(); + ResOPC = IsZeroUndef ? ISD::VP_CTLZ_ZERO_UNDEF : ISD::VP_CTLZ; + break; + } + case Intrinsic::vp_cttz: { + bool IsZeroUndef = cast<ConstantInt>(VPIntrin.getArgOperand(3))->isOne(); + ResOPC = IsZeroUndef ? ISD::VP_CTTZ_ZERO_UNDEF : ISD::VP_CTTZ; + break; + } #define HELPER_MAP_VPID_TO_VPSD(VPID, VPSD) \ case Intrinsic::VPID: \ ResOPC = ISD::VPSD; \ @@ -7388,118 +7472,133 @@ static unsigned getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) { return *ResOPC; } -void SelectionDAGBuilder::visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT, - SmallVector<SDValue, 7> &OpValues, - bool IsGather) { +void SelectionDAGBuilder::visitVPLoad(const VPIntrinsic &VPIntrin, EVT VT, + SmallVector<SDValue, 7> &OpValues) { SDLoc DL = getCurSDLoc(); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); Value *PtrOperand = VPIntrin.getArgOperand(0); MaybeAlign Alignment = VPIntrin.getPointerAlignment(); AAMDNodes AAInfo = VPIntrin.getAAMetadata(); const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range); SDValue LD; bool AddToChain = true; - if (!IsGather) { - // Do not serialize variable-length loads of constant memory with - // anything. - if (!Alignment) - Alignment = DAG.getEVTAlign(VT); - MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo); - AddToChain = !AA || !AA->pointsToConstantMemory(ML); - SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); - MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad, - MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges); - LD = DAG.getLoadVP(VT, DL, InChain, OpValues[0], OpValues[1], OpValues[2], - MMO, false /*IsExpanding */); - } else { - if (!Alignment) - Alignment = DAG.getEVTAlign(VT.getScalarType()); - unsigned AS = - PtrOperand->getType()->getScalarType()->getPointerAddressSpace(); - MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - MachinePointerInfo(AS), MachineMemOperand::MOLoad, - MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges); - SDValue Base, Index, Scale; - ISD::MemIndexType IndexType; - bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale, - this, VPIntrin.getParent(), - VT.getScalarStoreSize()); - if (!UniformBase) { - Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout())); - Index = getValue(PtrOperand); - IndexType = ISD::SIGNED_SCALED; - Scale = - DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout())); - } - EVT IdxVT = Index.getValueType(); - EVT EltTy = IdxVT.getVectorElementType(); - if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) { - EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy); - Index = DAG.getNode(ISD::SIGN_EXTEND, DL, NewIdxVT, Index); - } - LD = DAG.getGatherVP( - DAG.getVTList(VT, MVT::Other), VT, DL, - {DAG.getRoot(), Base, Index, Scale, OpValues[1], OpValues[2]}, MMO, - IndexType); - } + // Do not serialize variable-length loads of constant memory with + // anything. + if (!Alignment) + Alignment = DAG.getEVTAlign(VT); + MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo); + AddToChain = !AA || !AA->pointsToConstantMemory(ML); + SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad, + MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges); + LD = DAG.getLoadVP(VT, DL, InChain, OpValues[0], OpValues[1], OpValues[2], + MMO, false /*IsExpanding */); if (AddToChain) PendingLoads.push_back(LD.getValue(1)); setValue(&VPIntrin, LD); } -void SelectionDAGBuilder::visitVPStoreScatter(const VPIntrinsic &VPIntrin, - SmallVector<SDValue, 7> &OpValues, - bool IsScatter) { +void SelectionDAGBuilder::visitVPGather(const VPIntrinsic &VPIntrin, EVT VT, + SmallVector<SDValue, 7> &OpValues) { SDLoc DL = getCurSDLoc(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + Value *PtrOperand = VPIntrin.getArgOperand(0); + MaybeAlign Alignment = VPIntrin.getPointerAlignment(); + AAMDNodes AAInfo = VPIntrin.getAAMetadata(); + const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range); + SDValue LD; + if (!Alignment) + Alignment = DAG.getEVTAlign(VT.getScalarType()); + unsigned AS = + PtrOperand->getType()->getScalarType()->getPointerAddressSpace(); + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo(AS), MachineMemOperand::MOLoad, + MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges); + SDValue Base, Index, Scale; + ISD::MemIndexType IndexType; + bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale, + this, VPIntrin.getParent(), + VT.getScalarStoreSize()); + if (!UniformBase) { + Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout())); + Index = getValue(PtrOperand); + IndexType = ISD::SIGNED_SCALED; + Scale = DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout())); + } + EVT IdxVT = Index.getValueType(); + EVT EltTy = IdxVT.getVectorElementType(); + if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) { + EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy); + Index = DAG.getNode(ISD::SIGN_EXTEND, DL, NewIdxVT, Index); + } + LD = DAG.getGatherVP( + DAG.getVTList(VT, MVT::Other), VT, DL, + {DAG.getRoot(), Base, Index, Scale, OpValues[1], OpValues[2]}, MMO, + IndexType); + PendingLoads.push_back(LD.getValue(1)); + setValue(&VPIntrin, LD); +} + +void SelectionDAGBuilder::visitVPStore(const VPIntrinsic &VPIntrin, + SmallVector<SDValue, 7> &OpValues) { + SDLoc DL = getCurSDLoc(); Value *PtrOperand = VPIntrin.getArgOperand(1); EVT VT = OpValues[0].getValueType(); MaybeAlign Alignment = VPIntrin.getPointerAlignment(); AAMDNodes AAInfo = VPIntrin.getAAMetadata(); SDValue ST; - if (!IsScatter) { - if (!Alignment) - Alignment = DAG.getEVTAlign(VT); - SDValue Ptr = OpValues[1]; - SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); - MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore, - MemoryLocation::UnknownSize, *Alignment, AAInfo); - ST = DAG.getStoreVP(getMemoryRoot(), DL, OpValues[0], Ptr, Offset, - OpValues[2], OpValues[3], VT, MMO, ISD::UNINDEXED, - /* IsTruncating */ false, /*IsCompressing*/ false); - } else { - if (!Alignment) - Alignment = DAG.getEVTAlign(VT.getScalarType()); - unsigned AS = - PtrOperand->getType()->getScalarType()->getPointerAddressSpace(); - MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( - MachinePointerInfo(AS), MachineMemOperand::MOStore, - MemoryLocation::UnknownSize, *Alignment, AAInfo); - SDValue Base, Index, Scale; - ISD::MemIndexType IndexType; - bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale, - this, VPIntrin.getParent(), - VT.getScalarStoreSize()); - if (!UniformBase) { - Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout())); - Index = getValue(PtrOperand); - IndexType = ISD::SIGNED_SCALED; - Scale = - DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout())); - } - EVT IdxVT = Index.getValueType(); - EVT EltTy = IdxVT.getVectorElementType(); - if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) { - EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy); - Index = DAG.getNode(ISD::SIGN_EXTEND, DL, NewIdxVT, Index); - } - ST = DAG.getScatterVP(DAG.getVTList(MVT::Other), VT, DL, - {getMemoryRoot(), OpValues[0], Base, Index, Scale, - OpValues[2], OpValues[3]}, - MMO, IndexType); + if (!Alignment) + Alignment = DAG.getEVTAlign(VT); + SDValue Ptr = OpValues[1]; + SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo(PtrOperand), MachineMemOperand::MOStore, + MemoryLocation::UnknownSize, *Alignment, AAInfo); + ST = DAG.getStoreVP(getMemoryRoot(), DL, OpValues[0], Ptr, Offset, + OpValues[2], OpValues[3], VT, MMO, ISD::UNINDEXED, + /* IsTruncating */ false, /*IsCompressing*/ false); + DAG.setRoot(ST); + setValue(&VPIntrin, ST); +} + +void SelectionDAGBuilder::visitVPScatter(const VPIntrinsic &VPIntrin, + SmallVector<SDValue, 7> &OpValues) { + SDLoc DL = getCurSDLoc(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + Value *PtrOperand = VPIntrin.getArgOperand(1); + EVT VT = OpValues[0].getValueType(); + MaybeAlign Alignment = VPIntrin.getPointerAlignment(); + AAMDNodes AAInfo = VPIntrin.getAAMetadata(); + SDValue ST; + if (!Alignment) + Alignment = DAG.getEVTAlign(VT.getScalarType()); + unsigned AS = + PtrOperand->getType()->getScalarType()->getPointerAddressSpace(); + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo(AS), MachineMemOperand::MOStore, + MemoryLocation::UnknownSize, *Alignment, AAInfo); + SDValue Base, Index, Scale; + ISD::MemIndexType IndexType; + bool UniformBase = getUniformBase(PtrOperand, Base, Index, IndexType, Scale, + this, VPIntrin.getParent(), + VT.getScalarStoreSize()); + if (!UniformBase) { + Base = DAG.getConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout())); + Index = getValue(PtrOperand); + IndexType = ISD::SIGNED_SCALED; + Scale = + DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout())); + } + EVT IdxVT = Index.getValueType(); + EVT EltTy = IdxVT.getVectorElementType(); + if (TLI.shouldExtendGSIndex(IdxVT, EltTy)) { + EVT NewIdxVT = IdxVT.changeVectorElementType(EltTy); + Index = DAG.getNode(ISD::SIGN_EXTEND, DL, NewIdxVT, Index); } + ST = DAG.getScatterVP(DAG.getVTList(MVT::Other), VT, DL, + {getMemoryRoot(), OpValues[0], Base, Index, Scale, + OpValues[2], OpValues[3]}, + MMO, IndexType); DAG.setRoot(ST); setValue(&VPIntrin, ST); } @@ -7626,20 +7725,78 @@ void SelectionDAGBuilder::visitVectorPredicationIntrinsic( break; } case ISD::VP_LOAD: + visitVPLoad(VPIntrin, ValueVTs[0], OpValues); + break; case ISD::VP_GATHER: - visitVPLoadGather(VPIntrin, ValueVTs[0], OpValues, - Opcode == ISD::VP_GATHER); + visitVPGather(VPIntrin, ValueVTs[0], OpValues); break; case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: visitVPStridedLoad(VPIntrin, ValueVTs[0], OpValues); break; case ISD::VP_STORE: + visitVPStore(VPIntrin, OpValues); + break; case ISD::VP_SCATTER: - visitVPStoreScatter(VPIntrin, OpValues, Opcode == ISD::VP_SCATTER); + visitVPScatter(VPIntrin, OpValues); break; case ISD::EXPERIMENTAL_VP_STRIDED_STORE: visitVPStridedStore(VPIntrin, OpValues); break; + case ISD::VP_FMULADD: { + assert(OpValues.size() == 5 && "Unexpected number of operands"); + SDNodeFlags SDFlags; + if (auto *FPMO = dyn_cast<FPMathOperator>(&VPIntrin)) + SDFlags.copyFMF(*FPMO); + if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && + TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), ValueVTs[0])) { + setValue(&VPIntrin, DAG.getNode(ISD::VP_FMA, DL, VTs, OpValues, SDFlags)); + } else { + SDValue Mul = DAG.getNode( + ISD::VP_FMUL, DL, VTs, + {OpValues[0], OpValues[1], OpValues[3], OpValues[4]}, SDFlags); + SDValue Add = + DAG.getNode(ISD::VP_FADD, DL, VTs, + {Mul, OpValues[2], OpValues[3], OpValues[4]}, SDFlags); + setValue(&VPIntrin, Add); + } + break; + } + case ISD::VP_INTTOPTR: { + SDValue N = OpValues[0]; + EVT DestVT = TLI.getValueType(DAG.getDataLayout(), VPIntrin.getType()); + EVT PtrMemVT = TLI.getMemValueType(DAG.getDataLayout(), VPIntrin.getType()); + N = DAG.getVPPtrExtOrTrunc(getCurSDLoc(), DestVT, N, OpValues[1], + OpValues[2]); + N = DAG.getVPZExtOrTrunc(getCurSDLoc(), PtrMemVT, N, OpValues[1], + OpValues[2]); + setValue(&VPIntrin, N); + break; + } + case ISD::VP_PTRTOINT: { + SDValue N = OpValues[0]; + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + VPIntrin.getType()); + EVT PtrMemVT = TLI.getMemValueType(DAG.getDataLayout(), + VPIntrin.getOperand(0)->getType()); + N = DAG.getVPPtrExtOrTrunc(getCurSDLoc(), PtrMemVT, N, OpValues[1], + OpValues[2]); + N = DAG.getVPZExtOrTrunc(getCurSDLoc(), DestVT, N, OpValues[1], + OpValues[2]); + setValue(&VPIntrin, N); + break; + } + case ISD::VP_ABS: + case ISD::VP_CTLZ: + case ISD::VP_CTLZ_ZERO_UNDEF: + case ISD::VP_CTTZ: + case ISD::VP_CTTZ_ZERO_UNDEF: { + // Pop is_zero_poison operand for cp.ctlz/cttz or + // is_int_min_poison operand for vp.abs. + OpValues.pop_back(); + SDValue Result = DAG.getNode(Opcode, DL, VTs, OpValues); + setValue(&VPIntrin, Result); + break; + } } } @@ -7820,6 +7977,17 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee, if (TLI.supportSwiftError() && SwiftErrorVal) isTailCall = false; + ConstantInt *CFIType = nullptr; + if (CB.isIndirectCall()) { + if (auto Bundle = CB.getOperandBundle(LLVMContext::OB_kcfi)) { + if (!TLI.supportKCFIBundles()) + report_fatal_error( + "Target doesn't support calls with kcfi operand bundles."); + CFIType = cast<ConstantInt>(Bundle->Inputs[0]); + assert(CFIType->getType()->isIntegerTy(32) && "Invalid CFI type"); + } + } + TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(getCurSDLoc()) .setChain(getRoot()) @@ -7827,7 +7995,8 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee, .setTailCall(isTailCall) .setConvergent(CB.isConvergent()) .setIsPreallocated( - CB.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0); + CB.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0) + .setCFIType(CFIType); std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB); if (Result.first.getNode()) { @@ -8200,9 +8369,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { return; } - if (Function *F = I.getCalledFunction()) { - diagnoseDontCall(I); + diagnoseDontCall(I); + if (Function *F = I.getCalledFunction()) { if (F->isDeclaration()) { // Is this an LLVM intrinsic or a target-specific intrinsic? unsigned IID = F->getIntrinsicID(); @@ -8371,7 +8540,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { assert(!I.hasOperandBundlesOtherThan( {LLVMContext::OB_deopt, LLVMContext::OB_funclet, LLVMContext::OB_cfguardtarget, LLVMContext::OB_preallocated, - LLVMContext::OB_clang_arc_attachedcall}) && + LLVMContext::OB_clang_arc_attachedcall, LLVMContext::OB_kcfi}) && "Cannot lower calls with arbitrary operand bundles!"); SDValue Callee = getValue(I.getCalledOperand()); @@ -8499,7 +8668,7 @@ static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location, /// /// OpInfo describes the operand /// RefOpInfo describes the matching operand if any, the operand otherwise -static llvm::Optional<unsigned> +static std::optional<unsigned> getRegistersForValue(SelectionDAG &DAG, const SDLoc &DL, SDISelAsmOperandInfo &OpInfo, SDISelAsmOperandInfo &RefOpInfo) { @@ -8513,7 +8682,7 @@ getRegistersForValue(SelectionDAG &DAG, const SDLoc &DL, // No work to do for memory/address operands. if (OpInfo.ConstraintType == TargetLowering::C_Memory || OpInfo.ConstraintType == TargetLowering::C_Address) - return None; + return std::nullopt; // If this is a constraint for a single physreg, or a constraint for a // register class, find it. @@ -8523,7 +8692,7 @@ getRegistersForValue(SelectionDAG &DAG, const SDLoc &DL, &TRI, RefOpInfo.ConstraintCode, RefOpInfo.ConstraintVT); // RC is unset only on failure. Return immediately. if (!RC) - return None; + return std::nullopt; // Get the actual register value type. This is important, because the user // may have asked for (e.g.) the AX register in i32 type. We need to @@ -8568,7 +8737,7 @@ getRegistersForValue(SelectionDAG &DAG, const SDLoc &DL, // No need to allocate a matching input constraint since the constraint it's // matching to has already been allocated. if (OpInfo.isMatchingInputConstraint()) - return None; + return std::nullopt; EVT ValueVT = OpInfo.ConstraintVT; if (OpInfo.ConstraintVT == MVT::Other) @@ -8606,7 +8775,7 @@ getRegistersForValue(SelectionDAG &DAG, const SDLoc &DL, } OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT); - return None; + return std::nullopt; } static unsigned @@ -8665,6 +8834,23 @@ public: } // end anonymous namespace +static bool isFunction(SDValue Op) { + if (Op && Op.getOpcode() == ISD::GlobalAddress) { + if (auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) { + auto Fn = dyn_cast_or_null<Function>(GA->getGlobal()); + + // In normal "call dllimport func" instruction (non-inlineasm) it force + // indirect access by specifing call opcode. And usually specially print + // asm with indirect symbol (i.g: "*") according to opcode. Inline asm can + // not do in this way now. (In fact, this is similar with "Data Access" + // action). So here we ignore dllimport function. + if (Fn && !Fn->hasDLLImportStorageClass()) + return true; + } + } + return false; +} + /// visitInlineAsm - Handle a call to an InlineAsm object. void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, const BasicBlock *EHPadBB) { @@ -8713,7 +8899,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, // memory and is nonvolatile. SDValue Flag, Chain = (HasSideEffect) ? getRoot() : DAG.getRoot(); - bool EmitEHLabels = isa<InvokeInst>(Call) && IA->canThrow(); + bool EmitEHLabels = isa<InvokeInst>(Call); if (EmitEHLabels) { assert(EHPadBB && "InvokeInst must have an EHPadBB"); } @@ -8731,8 +8917,15 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, Chain = lowerStartEH(Chain, EHPadBB, BeginLabel); } + int OpNo = -1; + SmallVector<StringRef> AsmStrs; + IA->collectAsmStrs(AsmStrs); + // Second pass over the constraints: compute which constraint option to use. for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) { + if (OpInfo.hasArg() || OpInfo.Type == InlineAsm::isOutput) + OpNo++; + // If this is an output operand with a matching input operand, look up the // matching input. If their types mismatch, e.g. one is an integer, the // other is floating point, or their sizes are different, flag it as an @@ -8750,6 +8943,32 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, OpInfo.ConstraintType == TargetLowering::C_Address) continue; + // In Linux PIC model, there are 4 cases about value/label addressing: + // + // 1: Function call or Label jmp inside the module. + // 2: Data access (such as global variable, static variable) inside module. + // 3: Function call or Label jmp outside the module. + // 4: Data access (such as global variable) outside the module. + // + // Due to current llvm inline asm architecture designed to not "recognize" + // the asm code, there are quite troubles for us to treat mem addressing + // differently for same value/adress used in different instuctions. + // For example, in pic model, call a func may in plt way or direclty + // pc-related, but lea/mov a function adress may use got. + // + // Here we try to "recognize" function call for the case 1 and case 3 in + // inline asm. And try to adjust the constraint for them. + // + // TODO: Due to current inline asm didn't encourage to jmp to the outsider + // label, so here we don't handle jmp function label now, but we need to + // enhance it (especilly in PIC model) if we meet meaningful requirements. + if (OpInfo.isIndirect && isFunction(OpInfo.CallOperand) && + TLI.isInlineAsmTargetBranch(AsmStrs, OpNo) && + TM.getCodeModel() != CodeModel::Large) { + OpInfo.isIndirect = false; + OpInfo.ConstraintType = TargetLowering::C_Address; + } + // If this is a memory input, and if the operand is not indirect, do what we // need to provide an address for the memory input. if (OpInfo.ConstraintType == TargetLowering::C_Memory && @@ -8800,7 +9019,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, if (RegError) { const MachineFunction &MF = DAG.getMachineFunction(); const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); - const char *RegName = TRI.getName(RegError.value()); + const char *RegName = TRI.getName(*RegError); emitInlineAsmError(Call, "register '" + Twine(RegName) + "' allocated for constraint '" + Twine(OpInfo.ConstraintCode) + @@ -8959,8 +9178,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, break; } - if (OpInfo.ConstraintType == TargetLowering::C_Memory || - OpInfo.ConstraintType == TargetLowering::C_Address) { + if (OpInfo.ConstraintType == TargetLowering::C_Memory) { assert((OpInfo.isIndirect || OpInfo.ConstraintType != TargetLowering::C_Memory) && "Operand must be indirect to be a mem!"); @@ -8983,6 +9201,37 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, break; } + if (OpInfo.ConstraintType == TargetLowering::C_Address) { + assert(InOperandVal.getValueType() == + TLI.getPointerTy(DAG.getDataLayout()) && + "Address operands expect pointer values"); + + unsigned ConstraintID = + TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode); + assert(ConstraintID != InlineAsm::Constraint_Unknown && + "Failed to convert memory constraint code to constraint id."); + + unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); + + SDValue AsmOp = InOperandVal; + if (isFunction(InOperandVal)) { + auto *GA = cast<GlobalAddressSDNode>(InOperandVal); + ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Func, 1); + AsmOp = DAG.getTargetGlobalAddress(GA->getGlobal(), getCurSDLoc(), + InOperandVal.getValueType(), + GA->getOffset()); + } + + // Add information to the INLINEASM node to know about this input. + ResOpType = InlineAsm::getFlagWordForMem(ResOpType, ConstraintID); + + AsmNodeOperands.push_back( + DAG.getTargetConstant(ResOpType, getCurSDLoc(), MVT::i32)); + + AsmNodeOperands.push_back(AsmOp); + break; + } + assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass || OpInfo.ConstraintType == TargetLowering::C_Register) && "Unknown constraint type!"); @@ -9047,7 +9296,7 @@ void SelectionDAGBuilder::visitInlineAsm(const CallBase &Call, if (StructType *StructResult = dyn_cast<StructType>(CallResultType)) ResultTypes = StructResult->elements(); else if (!CallResultType->isVoidTy()) - ResultTypes = makeArrayRef(CallResultType); + ResultTypes = ArrayRef(CallResultType); auto CurResultType = ResultTypes.begin(); auto handleRegAssign = [&](SDValue V) { @@ -9327,12 +9576,11 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { assert(CI.getType()->isVoidTy() && "Stackmap cannot return a value."); - SDValue Chain, InFlag, Callee, NullPtr; + SDValue Chain, InFlag, Callee; SmallVector<SDValue, 32> Ops; SDLoc DL = getCurSDLoc(); Callee = getValue(CI.getCalledOperand()); - NullPtr = DAG.getIntPtrConstant(0, DL, true); // The stackmap intrinsic only records the live variables (the arguments // passed to it) and emits NOPS (if requested). Unlike the patchpoint @@ -9375,7 +9623,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { Chain = DAG.getNode(ISD::STACKMAP, DL, NodeTys, Ops); InFlag = Chain.getValue(1); - Chain = DAG.getCALLSEQ_END(Chain, NullPtr, NullPtr, InFlag, DL); + Chain = DAG.getCALLSEQ_END(Chain, 0, 0, InFlag, DL); // Stackmaps don't generate values, so nothing goes into the NodeMap. @@ -9693,6 +9941,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Entry.Alignment = Alignment; CLI.getArgs().insert(CLI.getArgs().begin(), Entry); CLI.NumFixedArgs += 1; + CLI.getArgs()[0].IndirectType = CLI.RetTy; CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext()); // sret demotion isn't compatible with tail-calls, since the sret argument @@ -9897,7 +10146,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { ISD::OutputArg MyFlags( Flags, Parts[j].getValueType().getSimpleVT(), VT, i < CLI.NumFixedArgs, i, - j * Parts[j].getValueType().getStoreSize().getKnownMinSize()); + j * Parts[j].getValueType().getStoreSize().getKnownMinValue()); if (NumParts > 1 && j == 0) MyFlags.Flags.setSplit(); else if (j != 0) { @@ -9985,7 +10234,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { } else { // Collect the legal value parts into potentially illegal values // that correspond to the original function's return values. - Optional<ISD::NodeType> AssertOp; + std::optional<ISD::NodeType> AssertOp; if (CLI.RetSExt) AssertOp = ISD::AssertSext; else if (CLI.RetZExt) @@ -10063,7 +10312,7 @@ void SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, // notional registers required by the type. RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, V->getType(), - None); // This is not an ABI copy. + std::nullopt); // This is not an ABI copy. SDValue Chain = DAG.getEntryNode(); if (ExtendType == ISD::ANY_EXTEND) { @@ -10424,8 +10673,9 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // For scalable vectors, use the minimum size; individual targets // are responsible for handling scalable vector arguments and // return values. - ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed, - ArgNo, PartBase+i*RegisterVT.getStoreSize().getKnownMinSize()); + ISD::InputArg MyFlags( + Flags, RegisterVT, VT, isArgValueUsed, ArgNo, + PartBase + i * RegisterVT.getStoreSize().getKnownMinValue()); if (NumRegs > 1 && i == 0) MyFlags.Flags.setSplit(); // if it isn't first piece, alignment must be 1 @@ -10438,7 +10688,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { } if (NeedsRegBlock && Value == NumValues - 1) Ins[Ins.size() - 1].Flags.setInConsecutiveRegsLast(); - PartBase += VT.getStoreSize().getKnownMinSize(); + PartBase += VT.getStoreSize().getKnownMinValue(); } } @@ -10476,7 +10726,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { ValueVTs); MVT VT = ValueVTs[0].getSimpleVT(); MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT); - Optional<ISD::NodeType> AssertOp = None; + std::optional<ISD::NodeType> AssertOp; SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT, VT, nullptr, F.getCallingConv(), AssertOp); @@ -10538,7 +10788,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // we do generate a copy for it that can be used on return from the // function. if (ArgHasUses || isSwiftErrorArg) { - Optional<ISD::NodeType> AssertOp; + std::optional<ISD::NodeType> AssertOp; if (Arg.hasAttribute(Attribute::SExt)) AssertOp = ISD::AssertSext; else if (Arg.hasAttribute(Attribute::ZExt)) @@ -10561,7 +10811,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) { dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode())) FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex()); - SDValue Res = DAG.getMergeValues(makeArrayRef(ArgValues.data(), NumValues), + SDValue Res = DAG.getMergeValues(ArrayRef(ArgValues.data(), NumValues), SDB->getCurSDLoc()); SDB->setValue(&Arg, Res); @@ -10644,14 +10894,12 @@ void SelectionDAGISel::LowerArguments(const Function &F) { void SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - const Instruction *TI = LLVMBB->getTerminator(); SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled; // Check PHI nodes in successors that expect a value to be available from this // block. - for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { - const BasicBlock *SuccBB = TI->getSuccessor(succ); + for (const BasicBlock *SuccBB : successors(LLVMBB->getTerminator())) { if (!isa<PHINode>(SuccBB->begin())) continue; MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB]; @@ -10677,7 +10925,7 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { unsigned Reg; const Value *PHIOp = PN.getIncomingValueForBlock(LLVMBB); - if (const Constant *C = dyn_cast<Constant>(PHIOp)) { + if (const auto *C = dyn_cast<Constant>(PHIOp)) { unsigned &RegOut = ConstantsOut[C]; if (RegOut == 0) { RegOut = FuncInfo.CreateRegs(C); @@ -10708,10 +10956,9 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // the input for this MBB. SmallVector<EVT, 4> ValueVTs; ComputeValueVTs(TLI, DAG.getDataLayout(), PN.getType(), ValueVTs); - for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { - EVT VT = ValueVTs[vti]; - unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT); - for (unsigned i = 0, e = NumRegisters; i != e; ++i) + for (EVT VT : ValueVTs) { + const unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT); + for (unsigned i = 0; i != NumRegisters; ++i) FuncInfo.PHINodesToUpdate.push_back( std::make_pair(&*MBBI++, Reg + i)); Reg += NumRegisters; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index d1915fd4e7ae..bf2111013461 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -18,6 +18,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/AssignmentTrackingAnalysis.h" #include "llvm/CodeGen/CodeGenCommonISel.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/SelectionDAGNodes.h" @@ -33,6 +34,7 @@ #include <algorithm> #include <cassert> #include <cstdint> +#include <optional> #include <utility> #include <vector> @@ -42,6 +44,7 @@ class AAResults; class AllocaInst; class AtomicCmpXchgInst; class AtomicRMWInst; +class AssumptionCache; class BasicBlock; class BranchInst; class CallInst; @@ -103,20 +106,68 @@ class SelectionDAGBuilder { /// Helper type for DanglingDebugInfoMap. class DanglingDebugInfo { - const DbgValueInst* DI = nullptr; - DebugLoc dl; + using DbgValTy = const DbgValueInst *; + using VarLocTy = const VarLocInfo *; + PointerUnion<DbgValTy, VarLocTy> Info; unsigned SDNodeOrder = 0; public: DanglingDebugInfo() = default; - DanglingDebugInfo(const DbgValueInst *di, DebugLoc DL, unsigned SDNO) - : DI(di), dl(std::move(DL)), SDNodeOrder(SDNO) {} - - const DbgValueInst* getDI() { return DI; } - DebugLoc getdl() { return dl; } - unsigned getSDNodeOrder() { return SDNodeOrder; } + DanglingDebugInfo(const DbgValueInst *DI, unsigned SDNO) + : Info(DI), SDNodeOrder(SDNO) {} + DanglingDebugInfo(const VarLocInfo *VarLoc, unsigned SDNO) + : Info(VarLoc), SDNodeOrder(SDNO) {} + + DILocalVariable *getVariable(const FunctionVarLocs *Locs) const { + if (Info.is<VarLocTy>()) + return Locs->getDILocalVariable(Info.get<VarLocTy>()->VariableID); + return Info.get<DbgValTy>()->getVariable(); + } + DIExpression *getExpression() const { + if (Info.is<VarLocTy>()) + return Info.get<VarLocTy>()->Expr; + return Info.get<DbgValTy>()->getExpression(); + } + Value *getVariableLocationOp(unsigned Idx) const { + assert(Idx == 0 && "Dangling variadic debug values not supported yet"); + if (Info.is<VarLocTy>()) + return Info.get<VarLocTy>()->V; + return Info.get<DbgValTy>()->getVariableLocationOp(Idx); + } + DebugLoc getDebugLoc() const { + if (Info.is<VarLocTy>()) + return Info.get<VarLocTy>()->DL; + return Info.get<DbgValTy>()->getDebugLoc(); + } + unsigned getSDNodeOrder() const { return SDNodeOrder; } + + /// Helper for printing DanglingDebugInfo. This hoop-jumping is to + /// accommodate the fact that an argument is required for getVariable. + /// Call SelectionDAGBuilder::printDDI instead of using directly. + struct Print { + Print(const DanglingDebugInfo &DDI, const FunctionVarLocs *VarLocs) + : DDI(DDI), VarLocs(VarLocs) {} + const DanglingDebugInfo &DDI; + const FunctionVarLocs *VarLocs; + friend raw_ostream &operator<<(raw_ostream &OS, + const DanglingDebugInfo::Print &P) { + OS << "DDI(var=" << *P.DDI.getVariable(P.VarLocs) + << ", val= " << *P.DDI.getVariableLocationOp(0) + << ", expr=" << *P.DDI.getExpression() + << ", order=" << P.DDI.getSDNodeOrder() + << ", loc=" << P.DDI.getDebugLoc() << ")"; + return OS; + } + }; }; + /// Returns an object that defines `raw_ostream &operator<<` for printing. + /// Usage example: + //// errs() << printDDI(MyDanglingInfo) << " is dangling\n"; + DanglingDebugInfo::Print printDDI(const DanglingDebugInfo &DDI) { + return DanglingDebugInfo::Print(DDI, DAG.getFunctionVarLocs()); + } + /// Helper type for DanglingDebugInfoMap. typedef std::vector<DanglingDebugInfo> DanglingDebugInfoVector; @@ -191,6 +242,7 @@ public: SelectionDAG &DAG; AAResults *AA = nullptr; + AssumptionCache *AC = nullptr; const TargetLibraryInfo *LibInfo; class SDAGSwitchLowering : public SwitchCG::SwitchLowering { @@ -244,7 +296,7 @@ public: SL(std::make_unique<SDAGSwitchLowering>(this, funcinfo)), FuncInfo(funcinfo), SwiftError(swifterror) {} - void init(GCFunctionInfo *gfi, AAResults *AA, + void init(GCFunctionInfo *gfi, AAResults *AA, AssumptionCache *AC, const TargetLibraryInfo *li); /// Clear out the current SelectionDAG and the associated state and prepare @@ -296,8 +348,8 @@ public: SDValue getCopyFromRegs(const Value *V, Type *Ty); /// Register a dbg_value which relies on a Value which we have not yet seen. - void addDanglingDebugInfo(const DbgValueInst *DI, DebugLoc DL, - unsigned Order); + void addDanglingDebugInfo(const DbgValueInst *DI, unsigned Order); + void addDanglingDebugInfo(const VarLocInfo *VarLoc, unsigned Order); /// If we have dangling debug info that describes \p Variable, or an /// overlapping part of variable considering the \p Expr, then this method @@ -317,8 +369,8 @@ public: /// For a given list of Values, attempt to create and record a SDDbgValue in /// the SelectionDAG. bool handleDebugValue(ArrayRef<const Value *> Values, DILocalVariable *Var, - DIExpression *Expr, DebugLoc CurDL, DebugLoc InstDL, - unsigned Order, bool IsVariadic); + DIExpression *Expr, DebugLoc DbgLoc, unsigned Order, + bool IsVariadic); /// Evict any dangling debug information, attempting to salvage it first. void resolveOrClearDbgInfo(); @@ -567,10 +619,14 @@ private: void visitIntrinsicCall(const CallInst &I, unsigned Intrinsic); void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic); void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI); - void visitVPLoadGather(const VPIntrinsic &VPIntrin, EVT VT, - SmallVector<SDValue, 7> &OpValues, bool IsGather); - void visitVPStoreScatter(const VPIntrinsic &VPIntrin, - SmallVector<SDValue, 7> &OpValues, bool IsScatter); + void visitVPLoad(const VPIntrinsic &VPIntrin, EVT VT, + SmallVector<SDValue, 7> &OpValues); + void visitVPStore(const VPIntrinsic &VPIntrin, + SmallVector<SDValue, 7> &OpValues); + void visitVPGather(const VPIntrinsic &VPIntrin, EVT VT, + SmallVector<SDValue, 7> &OpValues); + void visitVPScatter(const VPIntrinsic &VPIntrin, + SmallVector<SDValue, 7> &OpValues); void visitVPStridedLoad(const VPIntrinsic &VPIntrin, EVT VT, SmallVectorImpl<SDValue> &OpValues); void visitVPStridedStore(const VPIntrinsic &VPIntrin, @@ -680,14 +736,14 @@ struct RegsForValue { /// Records if this value needs to be treated in an ABI dependant manner, /// different to normal type legalization. - Optional<CallingConv::ID> CallConv; + std::optional<CallingConv::ID> CallConv; RegsForValue() = default; RegsForValue(const SmallVector<unsigned, 4> ®s, MVT regvt, EVT valuevt, - Optional<CallingConv::ID> CC = None); + std::optional<CallingConv::ID> CC = std::nullopt); RegsForValue(LLVMContext &Context, const TargetLowering &TLI, const DataLayout &DL, unsigned Reg, Type *Ty, - Optional<CallingConv::ID> CC); + std::optional<CallingConv::ID> CC); bool isABIMangled() const { return CallConv.has_value(); } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 6ba01664e756..fe4261291fc5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -79,6 +79,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::DELETED_NODE: return "<<Deleted Node!>>"; #endif case ISD::PREFETCH: return "Prefetch"; + case ISD::MEMBARRIER: return "MemBarrier"; case ISD::ATOMIC_FENCE: return "AtomicFence"; case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap"; case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: return "AtomicCmpSwapWithSuccess"; @@ -95,6 +96,10 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin"; case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax"; case ISD::ATOMIC_LOAD_FADD: return "AtomicLoadFAdd"; + case ISD::ATOMIC_LOAD_UINC_WRAP: + return "AtomicLoadUIncWrap"; + case ISD::ATOMIC_LOAD_UDEC_WRAP: + return "AtomicLoadUDecWrap"; case ISD::ATOMIC_LOAD: return "AtomicLoad"; case ISD::ATOMIC_STORE: return "AtomicStore"; case ISD::PCMARKER: return "PCMarker"; @@ -422,7 +427,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { return "call_alloc"; // Floating point environment manipulation - case ISD::FLT_ROUNDS_: return "flt_rounds"; + case ISD::GET_ROUNDING: return "get_rounding"; case ISD::SET_ROUNDING: return "set_rounding"; // Bit manipulation @@ -1059,6 +1064,9 @@ LLVM_DUMP_METHOD void SDNode::dumprFull(const SelectionDAG *G) const { void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const { printr(OS, G); + // Under VerboseDAGDumping divergence will be printed always. + if (isDivergent() && !VerboseDAGDumping) + OS << " # D:1"; for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { if (i) OS << ", "; else OS << " "; printOperand(OS, G, getOperand(i)); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index d46a0a23cca3..902f46115557 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -22,6 +22,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/EHPersonalities.h" @@ -31,6 +32,7 @@ #include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/AssignmentTrackingAnalysis.h" #include "llvm/CodeGen/CodeGenCommonISel.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" @@ -61,6 +63,7 @@ #include "llvm/IR/BasicBlock.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DiagnosticInfo.h" @@ -101,6 +104,7 @@ #include <iterator> #include <limits> #include <memory> +#include <optional> #include <string> #include <utility> #include <vector> @@ -309,7 +313,8 @@ void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, // SelectionDAGISel code //===----------------------------------------------------------------------===// -SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, CodeGenOpt::Level OL) +SelectionDAGISel::SelectionDAGISel(char &ID, TargetMachine &tm, + CodeGenOpt::Level OL) : MachineFunctionPass(ID), TM(tm), FuncInfo(new FunctionLoweringInfo()), SwiftError(new SwiftErrorValueTracking()), CurDAG(new SelectionDAG(tm, OL)), @@ -336,9 +341,14 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<GCModuleInfo>(); AU.addRequired<TargetLibraryInfoWrapperPass>(); AU.addRequired<TargetTransformInfoWrapperPass>(); + AU.addRequired<AssumptionCacheTracker>(); if (UseMBPI && OptLevel != CodeGenOpt::None) AU.addRequired<BranchProbabilityInfoWrapperPass>(); AU.addRequired<ProfileSummaryInfoWrapperPass>(); + // AssignmentTrackingAnalysis only runs if assignment tracking is enabled for + // the module. + AU.addRequired<AssignmentTrackingAnalysis>(); + AU.addPreserved<AssignmentTrackingAnalysis>(); if (OptLevel != CodeGenOpt::None) LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU); MachineFunctionPass::getAnalysisUsage(AU); @@ -382,8 +392,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // Decide what flavour of variable location debug-info will be used, before // we change the optimisation level. - UseInstrRefDebugInfo = mf.useDebugInstrRef(); - CurDAG->useInstrRefDebugInfo(UseInstrRefDebugInfo); + bool InstrRef = mf.shouldUseDebugInstrRef(); + mf.setUseDebugInstrRef(InstrRef); // Reset the target options before resetting the optimization // level below. @@ -403,15 +413,21 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(Fn); GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr; ORE = std::make_unique<OptimizationRemarkEmitter>(&Fn); + AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(mf.getFunction()); auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); BlockFrequencyInfo *BFI = nullptr; if (PSI && PSI->hasProfileSummary() && OptLevel != CodeGenOpt::None) BFI = &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI(); + FunctionVarLocs const *FnVarLocs = nullptr; + if (isAssignmentTrackingEnabled(*Fn.getParent())) + FnVarLocs = getAnalysis<AssignmentTrackingAnalysis>().getResults(); + LLVM_DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); CurDAG->init(*MF, *ORE, this, LibInfo, - getAnalysisIfAvailable<LegacyDivergenceAnalysis>(), PSI, BFI); + getAnalysisIfAvailable<LegacyDivergenceAnalysis>(), PSI, BFI, + FnVarLocs); FuncInfo->set(Fn, *MF, CurDAG); SwiftError->setFunction(*MF); @@ -430,7 +446,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { else AA = nullptr; - SDB->init(GFI, AA, LibInfo); + SDB->init(GFI, AA, AC, LibInfo); MF->setHasInlineAsm(false); @@ -488,7 +504,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { To = J->second; } // Make sure the new register has a sufficiently constrained register class. - if (Register::isVirtualRegister(From) && Register::isVirtualRegister(To)) + if (From.isVirtual() && To.isVirtual()) MRI.constrainRegClass(To, MRI.getRegClass(From)); // Replace it. @@ -530,15 +546,14 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { LiveInMap.insert(LI); // Insert DBG_VALUE instructions for function arguments to the entry block. - bool InstrRef = MF->useDebugInstrRef(); for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) { MachineInstr *MI = FuncInfo->ArgDbgValues[e - i - 1]; assert(MI->getOpcode() != TargetOpcode::DBG_VALUE_LIST && "Function parameters should not be described by DBG_VALUE_LIST."); - bool hasFI = MI->getOperand(0).isFI(); + bool hasFI = MI->getDebugOperand(0).isFI(); Register Reg = - hasFI ? TRI.getFrameRegister(*MF) : MI->getOperand(0).getReg(); - if (Register::isPhysicalRegister(Reg)) + hasFI ? TRI.getFrameRegister(*MF) : MI->getDebugOperand(0).getReg(); + if (Reg.isPhysical()) EntryMBB->insert(EntryMBB->begin(), MI); else { MachineInstr *Def = RegInfo->getVRegDef(Reg); @@ -567,7 +582,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { DebugLoc DL = MI->getDebugLoc(); bool IsIndirect = MI->isIndirectDebugValue(); if (IsIndirect) - assert(MI->getOperand(1).getImm() == 0 && + assert(MI->getDebugOffset().getImm() == 0 && "DBG_VALUE with nonzero offset"); assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) && "Expected inlined-at fields to agree"); @@ -608,7 +623,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // For debug-info, in instruction referencing mode, we need to perform some // post-isel maintenence. - if (UseInstrRefDebugInfo) + if (MF->useDebugInstrRef()) MF->finalizeDebugInstrRefs(); // Determine if there are any calls in this machine function. @@ -997,6 +1012,15 @@ public: if (ISelPosition == SelectionDAG::allnodes_iterator(N)) ++ISelPosition; } + + /// NodeInserted - Handle new nodes inserted into the graph: propagate + /// metadata from root nodes that also applies to new nodes, in case the root + /// is later deleted. + void NodeInserted(SDNode *N) override { + SDNode *CurNode = &*ISelPosition; + if (MDNode *MD = DAG.getPCSections(CurNode)) + DAG.addPCSections(N, MD); + } }; } // end anonymous namespace @@ -1073,7 +1097,7 @@ void SelectionDAGISel::DoInstructionSelection() { ++ISelPosition; // Make sure that ISelPosition gets properly updated when nodes are deleted - // in calls made from this function. + // in calls made from this function. New nodes inherit relevant metadata. ISelUpdater ISU(*CurDAG, ISelPosition); // The AllNodes list is now topological-sorted. Visit the @@ -1181,11 +1205,11 @@ static void mapWasmLandingPadIndex(MachineBasicBlock *MBB, // In case of single catch (...), we don't emit LSDA, so we don't need // this information. bool IsSingleCatchAllClause = - CPI->getNumArgOperands() == 1 && + CPI->arg_size() == 1 && cast<Constant>(CPI->getArgOperand(0))->isNullValue(); // cathchpads for longjmp use an empty type list, e.g. catchpad within %0 [] // and they don't need LSDA info - bool IsCatchLongjmp = CPI->getNumArgOperands() == 0; + bool IsCatchLongjmp = CPI->arg_size() == 0; if (!IsSingleCatchAllClause && !IsCatchLongjmp) { // Create a mapping from landing pad label to landing pad index. bool IntrFound = false; @@ -1279,56 +1303,75 @@ static bool isFoldedOrDeadInstruction(const Instruction *I, !FuncInfo.isExportedInst(I); // Exported instrs must be computed. } +static void processDbgDeclare(FunctionLoweringInfo &FuncInfo, + const Value *Address, DIExpression *Expr, + DILocalVariable *Var, DebugLoc DbgLoc) { + MachineFunction *MF = FuncInfo.MF; + const DataLayout &DL = MF->getDataLayout(); + + assert(Var && "Missing variable"); + assert(DbgLoc && "Missing location"); + + // Look through casts and constant offset GEPs. These mostly come from + // inalloca. + APInt Offset(DL.getTypeSizeInBits(Address->getType()), 0); + Address = Address->stripAndAccumulateInBoundsConstantOffsets(DL, Offset); + + // Check if the variable is a static alloca or a byval or inalloca + // argument passed in memory. If it is not, then we will ignore this + // intrinsic and handle this during isel like dbg.value. + int FI = std::numeric_limits<int>::max(); + if (const auto *AI = dyn_cast<AllocaInst>(Address)) { + auto SI = FuncInfo.StaticAllocaMap.find(AI); + if (SI != FuncInfo.StaticAllocaMap.end()) + FI = SI->second; + } else if (const auto *Arg = dyn_cast<Argument>(Address)) + FI = FuncInfo.getArgumentFrameIndex(Arg); + + if (FI == std::numeric_limits<int>::max()) + return; + + if (Offset.getBoolValue()) + Expr = DIExpression::prepend(Expr, DIExpression::ApplyOffset, + Offset.getZExtValue()); + + LLVM_DEBUG(dbgs() << "processDbgDeclare: setVariableDbgInfo Var=" << *Var + << ", Expr=" << *Expr << ", FI=" << FI + << ", DbgLoc=" << DbgLoc << "\n"); + MF->setVariableDbgInfo(Var, Expr, FI, DbgLoc); +} + /// Collect llvm.dbg.declare information. This is done after argument lowering /// in case the declarations refer to arguments. static void processDbgDeclares(FunctionLoweringInfo &FuncInfo) { - MachineFunction *MF = FuncInfo.MF; - const DataLayout &DL = MF->getDataLayout(); for (const BasicBlock &BB : *FuncInfo.Fn) { for (const Instruction &I : BB) { - const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(&I); - if (!DI) - continue; - - assert(DI->getVariable() && "Missing variable"); - assert(DI->getDebugLoc() && "Missing location"); - const Value *Address = DI->getAddress(); - if (!Address) { - LLVM_DEBUG(dbgs() << "processDbgDeclares skipping " << *DI - << " (bad address)\n"); - continue; + if (const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(&I)) { + Value *Address = DI->getAddress(); + if (!Address) { + LLVM_DEBUG(dbgs() << "processDbgDeclares skipping " << *DI + << " (bad address)\n"); + continue; + } + processDbgDeclare(FuncInfo, Address, DI->getExpression(), + DI->getVariable(), DI->getDebugLoc()); } - - // Look through casts and constant offset GEPs. These mostly come from - // inalloca. - APInt Offset(DL.getTypeSizeInBits(Address->getType()), 0); - Address = Address->stripAndAccumulateInBoundsConstantOffsets(DL, Offset); - - // Check if the variable is a static alloca or a byval or inalloca - // argument passed in memory. If it is not, then we will ignore this - // intrinsic and handle this during isel like dbg.value. - int FI = std::numeric_limits<int>::max(); - if (const auto *AI = dyn_cast<AllocaInst>(Address)) { - auto SI = FuncInfo.StaticAllocaMap.find(AI); - if (SI != FuncInfo.StaticAllocaMap.end()) - FI = SI->second; - } else if (const auto *Arg = dyn_cast<Argument>(Address)) - FI = FuncInfo.getArgumentFrameIndex(Arg); - - if (FI == std::numeric_limits<int>::max()) - continue; - - DIExpression *Expr = DI->getExpression(); - if (Offset.getBoolValue()) - Expr = DIExpression::prepend(Expr, DIExpression::ApplyOffset, - Offset.getZExtValue()); - LLVM_DEBUG(dbgs() << "processDbgDeclares: setVariableDbgInfo FI=" << FI - << ", " << *DI << "\n"); - MF->setVariableDbgInfo(DI->getVariable(), Expr, FI, DI->getDebugLoc()); } } } +/// Collect single location variable information generated with assignment +/// tracking. This is done after argument lowering in case the declarations +/// refer to arguments. +static void processSingleLocVars(FunctionLoweringInfo &FuncInfo, + FunctionVarLocs const *FnVarLocs) { + for (auto It = FnVarLocs->single_locs_begin(), + End = FnVarLocs->single_locs_end(); + It != End; ++It) + processDbgDeclare(FuncInfo, It->V, It->Expr, + FnVarLocs->getDILocalVariable(It->VariableID), It->DL); +} + void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FastISelFailed = false; // Initialize the Fast-ISel state, if needed. @@ -1336,8 +1379,6 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { if (TM.Options.EnableFastISel) { LLVM_DEBUG(dbgs() << "Enabling fast-isel\n"); FastIS = TLI->createFastISel(*FuncInfo, LibInfo); - if (FastIS) - FastIS->useInstrRefDebugInfo(UseInstrRefDebugInfo); } ReversePostOrderTraversal<const Function*> RPOT(&Fn); @@ -1391,7 +1432,13 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { if (FastIS && Inserted) FastIS->setLastLocalValue(&*std::prev(FuncInfo->InsertPt)); - processDbgDeclares(*FuncInfo); + if (isAssignmentTrackingEnabled(*Fn.getParent())) { + assert(CurDAG->getFunctionVarLocs() && + "expected AssignmentTrackingAnalysis pass results"); + processSingleLocVars(*FuncInfo, CurDAG->getFunctionVarLocs()); + } else { + processDbgDeclares(*FuncInfo); + } // Iterate over all basic blocks in the function. StackProtector &SP = getAnalysis<StackProtector>(); @@ -1957,7 +2004,7 @@ void SelectionDAGISel::SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops, while (i != e) { unsigned Flags = cast<ConstantSDNode>(InOps[i])->getZExtValue(); - if (!InlineAsm::isMemKind(Flags)) { + if (!InlineAsm::isMemKind(Flags) && !InlineAsm::isFuncKind(Flags)) { // Just skip over this operand, copying the operands verbatim. Ops.insert(Ops.end(), InOps.begin()+i, InOps.begin()+i+InlineAsm::getNumOperandRegisters(Flags) + 1); @@ -1986,7 +2033,9 @@ void SelectionDAGISel::SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops, // Add this to the output node. unsigned NewFlags = - InlineAsm::getFlagWord(InlineAsm::Kind_Mem, SelOps.size()); + InlineAsm::isMemKind(Flags) + ? InlineAsm::getFlagWord(InlineAsm::Kind_Mem, SelOps.size()) + : InlineAsm::getFlagWord(InlineAsm::Kind_Func, SelOps.size()); NewFlags = InlineAsm::getFlagWordForMem(NewFlags, ConstraintID); Ops.push_back(CurDAG->getTargetConstant(NewFlags, DL, MVT::i32)); llvm::append_range(Ops, SelOps); @@ -2193,6 +2242,11 @@ void SelectionDAGISel::Select_ARITH_FENCE(SDNode *N) { N->getOperand(0)); } +void SelectionDAGISel::Select_MEMBARRIER(SDNode *N) { + CurDAG->SelectNodeTo(N, TargetOpcode::MEMBARRIER, N->getValueType(0), + N->getOperand(0)); +} + void SelectionDAGISel::pushStackMapLiveVariable(SmallVectorImpl<SDValue> &Ops, SDValue OpVal, SDLoc DL) { SDNode *OpNode = OpVal.getNode(); @@ -2249,7 +2303,7 @@ void SelectionDAGISel::Select_PATCHPOINT(SDNode *N) { // Cache arguments that will be moved to the end in the target node. SDValue Chain = *It++; - Optional<SDValue> Glue; + std::optional<SDValue> Glue; if (It->getValueType() == MVT::Glue) Glue = *It++; SDValue RegMask = *It++; @@ -2287,7 +2341,7 @@ void SelectionDAGISel::Select_PATCHPOINT(SDNode *N) { Ops.push_back(RegMask); Ops.push_back(Chain); if (Glue.has_value()) - Ops.push_back(Glue.value()); + Ops.push_back(*Glue); SDVTList NodeTys = N->getVTList(); CurDAG->SelectNodeTo(N, TargetOpcode::PATCHPOINT, NodeTys, Ops); @@ -2847,6 +2901,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch, case ISD::ARITH_FENCE: Select_ARITH_FENCE(NodeToMatch); return; + case ISD::MEMBARRIER: + Select_MEMBARRIER(NodeToMatch); + return; case ISD::STACKMAP: Select_STACKMAP(NodeToMatch); return; @@ -3764,5 +3821,3 @@ void SelectionDAGISel::CannotYetSelect(SDNode *N) { } report_fatal_error(Twine(Msg.str())); } - -char SelectionDAGISel::ID = 0; diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index c5c093ae228f..57bfe344dbab 100644 --- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -14,8 +14,6 @@ #include "StatepointLowering.h" #include "SelectionDAGBuilder.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/None.h" -#include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallBitVector.h" @@ -160,12 +158,12 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType, /// Utility function for reservePreviousStackSlotForValue. Tries to find /// stack slot index to which we have spilled value for previous statepoints. /// LookUpDepth specifies maximum DFS depth this function is allowed to look. -static Optional<int> findPreviousSpillSlot(const Value *Val, - SelectionDAGBuilder &Builder, - int LookUpDepth) { +static std::optional<int> findPreviousSpillSlot(const Value *Val, + SelectionDAGBuilder &Builder, + int LookUpDepth) { // Can not look any further - give up now if (LookUpDepth <= 0) - return None; + return std::nullopt; // Spill location is known for gc relocates if (const auto *Relocate = dyn_cast<GCRelocateInst>(Val)) { @@ -173,18 +171,18 @@ static Optional<int> findPreviousSpillSlot(const Value *Val, assert((isa<GCStatepointInst>(Statepoint) || isa<UndefValue>(Statepoint)) && "GetStatepoint must return one of two types"); if (isa<UndefValue>(Statepoint)) - return None; + return std::nullopt; const auto &RelocationMap = Builder.FuncInfo.StatepointRelocationMaps [cast<GCStatepointInst>(Statepoint)]; auto It = RelocationMap.find(Relocate); if (It == RelocationMap.end()) - return None; + return std::nullopt; auto &Record = It->second; if (Record.type != RecordType::Spill) - return None; + return std::nullopt; return Record.payload.FI; } @@ -197,16 +195,16 @@ static Optional<int> findPreviousSpillSlot(const Value *Val, // All incoming values should have same known stack slot, otherwise result // is unknown. if (const PHINode *Phi = dyn_cast<PHINode>(Val)) { - Optional<int> MergedResult = None; + std::optional<int> MergedResult; for (const auto &IncomingValue : Phi->incoming_values()) { - Optional<int> SpillSlot = + std::optional<int> SpillSlot = findPreviousSpillSlot(IncomingValue, Builder, LookUpDepth - 1); if (!SpillSlot) - return None; + return std::nullopt; if (MergedResult && *MergedResult != *SpillSlot) - return None; + return std::nullopt; MergedResult = SpillSlot; } @@ -241,7 +239,7 @@ static Optional<int> findPreviousSpillSlot(const Value *Val, // which we visit values is unspecified. // Don't know any information about this instruction - return None; + return std::nullopt; } /// Return true if-and-only-if the given SDValue can be lowered as either a @@ -284,7 +282,7 @@ static void reservePreviousStackSlotForValue(const Value *IncomingValue, return; const int LookUpDepth = 6; - Optional<int> Index = + std::optional<int> Index = findPreviousSpillSlot(IncomingValue, Builder, LookUpDepth); if (!Index) return; @@ -321,7 +319,7 @@ static void reservePreviousStackSlotForValue(const Value *IncomingValue, /// reference lowered call result static std::pair<SDValue, SDNode *> lowerCallFromStatepointLoweringInfo( SelectionDAGBuilder::StatepointLoweringInfo &SI, - SelectionDAGBuilder &Builder, SmallVectorImpl<SDValue> &PendingExports) { + SelectionDAGBuilder &Builder) { SDValue ReturnValue, CallEndVal; std::tie(ReturnValue, CallEndVal) = Builder.lowerInvokable(SI.CLI, SI.EHPadBB); @@ -526,34 +524,6 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, SelectionDAGBuilder &Builder) { // Lower the deopt and gc arguments for this statepoint. Layout will be: // deopt argument length, deopt arguments.., gc arguments... -#ifndef NDEBUG - if (auto *GFI = Builder.GFI) { - // Check that each of the gc pointer and bases we've gotten out of the - // safepoint is something the strategy thinks might be a pointer (or vector - // of pointers) into the GC heap. This is basically just here to help catch - // errors during statepoint insertion. TODO: This should actually be in the - // Verifier, but we can't get to the GCStrategy from there (yet). - GCStrategy &S = GFI->getStrategy(); - for (const Value *V : SI.Bases) { - auto Opt = S.isGCManagedPointer(V->getType()->getScalarType()); - if (Opt) { - assert(Opt.value() && - "non gc managed base pointer found in statepoint"); - } - } - for (const Value *V : SI.Ptrs) { - auto Opt = S.isGCManagedPointer(V->getType()->getScalarType()); - if (Opt) { - assert(Opt.value() && - "non gc managed derived pointer found in statepoint"); - } - } - assert(SI.Bases.size() == SI.Ptrs.size() && "Pointer without base!"); - } else { - assert(SI.Bases.empty() && "No gc specified, so cannot relocate pointers!"); - assert(SI.Ptrs.empty() && "No gc specified, so cannot relocate pointers!"); - } -#endif // Figure out what lowering strategy we're going to use for each part // Note: Is is conservatively correct to lower both "live-in" and "live-out" @@ -742,7 +712,9 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( NumOfStatepoints++; // Clear state StatepointLowering.startNewStatepoint(*this); - assert(SI.Bases.size() == SI.Ptrs.size()); + assert(SI.Bases.size() == SI.Ptrs.size() && "Pointer without base!"); + assert((GFI || SI.Bases.empty()) && + "No gc specified, so cannot relocate pointers!"); LLVM_DEBUG(dbgs() << "Lowering statepoint " << *SI.StatepointInstr << "\n"); #ifndef NDEBUG @@ -770,8 +742,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( // Get call node, we will replace it later with statepoint SDValue ReturnVal; SDNode *CallNode; - std::tie(ReturnVal, CallNode) = - lowerCallFromStatepointLoweringInfo(SI, *this, PendingExports); + std::tie(ReturnVal, CallNode) = lowerCallFromStatepointLoweringInfo(SI, *this); // Construct the actual GC_TRANSITION_START, STATEPOINT, and GC_TRANSITION_END // nodes with all the appropriate arguments and return values. @@ -921,7 +892,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT( auto *RetTy = Relocate->getType(); Register Reg = FuncInfo.CreateRegs(RetTy); RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), - DAG.getDataLayout(), Reg, RetTy, None); + DAG.getDataLayout(), Reg, RetTy, std::nullopt); SDValue Chain = DAG.getRoot(); RFV.getCopyToRegs(Relocated, DAG, getCurSDLoc(), Chain, nullptr); PendingExports.push_back(Chain); @@ -1148,7 +1119,7 @@ SelectionDAGBuilder::LowerStatepoint(const GCStatepointInst &I, // TODO: To eliminate this problem we can remove gc.result intrinsics // completely and make statepoint call to return a tuple. Type *RetTy = GCResultLocality.second->getType(); - unsigned Reg = FuncInfo.CreateRegs(RetTy); + Register Reg = FuncInfo.CreateRegs(RetTy); RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), DAG.getDataLayout(), Reg, RetTy, I.getCallingConv()); @@ -1239,10 +1210,6 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { if (cast<GCStatepointInst>(Statepoint)->getParent() == Relocate.getParent()) StatepointLowering.relocCallVisited(Relocate); - - auto *Ty = Relocate.getType()->getScalarType(); - if (auto IsManaged = GFI->getStrategy().isGCManagedPointer(Ty)) - assert(*IsManaged && "Non gc managed pointer relocated!"); #endif const Value *DerivedPtr = Relocate.getDerivedPtr(); @@ -1266,7 +1233,7 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { Register InReg = Record.payload.Reg; RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), DAG.getDataLayout(), InReg, Relocate.getType(), - None); // This is not an ABI copy. + std::nullopt); // This is not an ABI copy. // We generate copy to/from regs even for local uses, hence we must // chain with current root to ensure proper ordering of copies w.r.t. // statepoint. diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 6205e74837c0..8d4c8802f71c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -12,6 +12,7 @@ #include "llvm/CodeGen/TargetLowering.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/CodeGenCommonISel.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -260,7 +261,7 @@ bool TargetLowering::findOptimalMemOpLowering( // If the new VT cannot cover all of the remaining bits, then consider // issuing a (or a pair of) unaligned and overlapping load / store. - bool Fast; + unsigned Fast; if (NumMemOps && Op.allowOverlap() && NewVTSize < Size && allowsMisalignedMemoryAccesses( VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1), @@ -351,7 +352,7 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, break; case ISD::SETO: ShouldInvertCC = true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::SETUO: LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : (VT == MVT::f64) ? RTLIB::UO_F64 : @@ -360,7 +361,7 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, case ISD::SETONE: // SETONE = O && UNE ShouldInvertCC = true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::SETUEQ: LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : (VT == MVT::f64) ? RTLIB::UO_F64 : @@ -397,7 +398,7 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, } } - // Use the target specific return value for comparions lib calls. + // Use the target specific return value for comparison lib calls. EVT RetVT = getCmpLibcallReturnType(); SDValue Ops[2] = {NewLHS, NewRHS}; TargetLowering::MakeLibCallOptions CallOptions; @@ -633,35 +634,22 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, bool AssumeSingleUse) const { EVT VT = Op.getValueType(); - // TODO: We can probably do more work on calculating the known bits and - // simplifying the operations for scalable vectors, but for now we just - // bail out. - if (VT.isScalableVector()) { - // Pretend we don't know anything for now. - Known = KnownBits(DemandedBits.getBitWidth()); - return false; - } - - APInt DemandedElts = VT.isVector() + // Since the number of lanes in a scalable vector is unknown at compile time, + // we track one bit which is implicitly broadcast to all lanes. This means + // that all lanes in a scalable vector are considered demanded. + APInt DemandedElts = VT.isFixedLengthVector() ? APInt::getAllOnes(VT.getVectorNumElements()) : APInt(1, 1); return SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO, Depth, AssumeSingleUse); } -// TODO: Can we merge SelectionDAG::GetDemandedBits into this? // TODO: Under what circumstances can we create nodes? Constant folding? SDValue TargetLowering::SimplifyMultipleUseDemandedBits( SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, SelectionDAG &DAG, unsigned Depth) const { EVT VT = Op.getValueType(); - // Pretend we don't know anything about scalable vectors for now. - // TODO: We can probably do more work on simplifying the operations for - // scalable vectors, but for now we just bail out. - if (VT.isScalableVector()) - return SDValue(); - // Limit search depth. if (Depth >= SelectionDAG::MaxRecursionDepth) return SDValue(); @@ -680,6 +668,9 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( KnownBits LHSKnown, RHSKnown; switch (Op.getOpcode()) { case ISD::BITCAST: { + if (VT.isScalableVector()) + return SDValue(); + SDValue Src = peekThroughBitcasts(Op.getOperand(0)); EVT SrcVT = Src.getValueType(); EVT DstVT = Op.getValueType(); @@ -825,6 +816,9 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( case ISD::ANY_EXTEND_VECTOR_INREG: case ISD::SIGN_EXTEND_VECTOR_INREG: case ISD::ZERO_EXTEND_VECTOR_INREG: { + if (VT.isScalableVector()) + return SDValue(); + // If we only want the lowest element and none of extended bits, then we can // return the bitcasted source vector. SDValue Src = Op.getOperand(0); @@ -838,6 +832,9 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( break; } case ISD::INSERT_VECTOR_ELT: { + if (VT.isScalableVector()) + return SDValue(); + // If we don't demand the inserted element, return the base vector. SDValue Vec = Op.getOperand(0); auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2)); @@ -848,6 +845,9 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( break; } case ISD::INSERT_SUBVECTOR: { + if (VT.isScalableVector()) + return SDValue(); + SDValue Vec = Op.getOperand(0); SDValue Sub = Op.getOperand(1); uint64_t Idx = Op.getConstantOperandVal(2); @@ -857,6 +857,8 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( if (DemandedSubElts == 0) return Vec; // If this simply widens the lowest subvector, see if we can do it earlier. + // TODO: REMOVE ME - SimplifyMultipleUseDemandedBits shouldn't be creating + // general nodes like this. if (Idx == 0 && Vec.isUndef()) { if (SDValue NewSub = SimplifyMultipleUseDemandedBits( Sub, DemandedBits, DemandedSubElts, DAG, Depth + 1)) @@ -866,6 +868,7 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( break; } case ISD::VECTOR_SHUFFLE: { + assert(!VT.isScalableVector()); ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask(); // If all the demanded elts are from one operand and are inline, @@ -889,6 +892,11 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( break; } default: + // TODO: Probably okay to remove after audit; here to reduce change size + // in initial enablement patch for scalable vectors + if (VT.isScalableVector()) + return SDValue(); + if (Op.getOpcode() >= ISD::BUILTIN_OP_END) if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode( Op, DemandedBits, DemandedElts, DAG, Depth)) @@ -902,14 +910,10 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( SDValue Op, const APInt &DemandedBits, SelectionDAG &DAG, unsigned Depth) const { EVT VT = Op.getValueType(); - - // Pretend we don't know anything about scalable vectors for now. - // TODO: We can probably do more work on simplifying the operations for - // scalable vectors, but for now we just bail out. - if (VT.isScalableVector()) - return SDValue(); - - APInt DemandedElts = VT.isVector() + // Since the number of lanes in a scalable vector is unknown at compile time, + // we track one bit which is implicitly broadcast to all lanes. This means + // that all lanes in a scalable vector are considered demanded. + APInt DemandedElts = VT.isFixedLengthVector() ? APInt::getAllOnes(VT.getVectorNumElements()) : APInt(1, 1); return SimplifyMultipleUseDemandedBits(Op, DemandedBits, DemandedElts, DAG, @@ -1068,16 +1072,10 @@ bool TargetLowering::SimplifyDemandedBits( // Don't know anything. Known = KnownBits(BitWidth); - // TODO: We can probably do more work on calculating the known bits and - // simplifying the operations for scalable vectors, but for now we just - // bail out. EVT VT = Op.getValueType(); - if (VT.isScalableVector()) - return false; - bool IsLE = TLO.DAG.getDataLayout().isLittleEndian(); unsigned NumElts = OriginalDemandedElts.getBitWidth(); - assert((!VT.isVector() || NumElts == VT.getVectorNumElements()) && + assert((!VT.isFixedLengthVector() || NumElts == VT.getVectorNumElements()) && "Unexpected vector size"); APInt DemandedBits = OriginalDemandedBits; @@ -1089,6 +1087,10 @@ bool TargetLowering::SimplifyDemandedBits( if (Op.isUndef()) return false; + // We can't simplify target constants. + if (Op.getOpcode() == ISD::TargetConstant) + return false; + if (Op.getOpcode() == ISD::Constant) { // We know all of the bits for a constant! Known = KnownBits::makeConstant(cast<ConstantSDNode>(Op)->getAPIntValue()); @@ -1103,17 +1105,16 @@ bool TargetLowering::SimplifyDemandedBits( } // Other users may use these bits. - if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) { - if (Depth != 0) { - // If not at the root, Just compute the Known bits to - // simplify things downstream. - Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth); + bool HasMultiUse = false; + if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) { + if (Depth >= SelectionDAG::MaxRecursionDepth) { + // Limit search depth. return false; } - // If this is the root being simplified, allow it to have multiple uses, - // just set the DemandedBits/Elts to all bits. + // Allow multiple uses, just set the DemandedBits/Elts to all bits. DemandedBits = APInt::getAllOnes(BitWidth); DemandedElts = APInt::getAllOnes(NumElts); + HasMultiUse = true; } else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) { // Not demanding any bits/elts from Op. return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); @@ -1124,9 +1125,9 @@ bool TargetLowering::SimplifyDemandedBits( KnownBits Known2; switch (Op.getOpcode()) { - case ISD::TargetConstant: - llvm_unreachable("Can't simplify this node"); case ISD::SCALAR_TO_VECTOR: { + if (VT.isScalableVector()) + return false; if (!DemandedElts[0]) return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); @@ -1164,6 +1165,8 @@ bool TargetLowering::SimplifyDemandedBits( break; } case ISD::INSERT_VECTOR_ELT: { + if (VT.isScalableVector()) + return false; SDValue Vec = Op.getOperand(0); SDValue Scl = Op.getOperand(1); auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2)); @@ -1200,6 +1203,8 @@ bool TargetLowering::SimplifyDemandedBits( return false; } case ISD::INSERT_SUBVECTOR: { + if (VT.isScalableVector()) + return false; // Demand any elements from the subvector and the remainder from the src its // inserted into. SDValue Src = Op.getOperand(0); @@ -1243,6 +1248,8 @@ bool TargetLowering::SimplifyDemandedBits( break; } case ISD::EXTRACT_SUBVECTOR: { + if (VT.isScalableVector()) + return false; // Offset the demanded elts by the subvector index. SDValue Src = Op.getOperand(0); if (Src.getValueType().isScalableVector()) @@ -1268,6 +1275,8 @@ bool TargetLowering::SimplifyDemandedBits( break; } case ISD::CONCAT_VECTORS: { + if (VT.isScalableVector()) + return false; Known.Zero.setAllBits(); Known.One.setAllBits(); EVT SubVT = Op.getOperand(0).getValueType(); @@ -1286,28 +1295,14 @@ bool TargetLowering::SimplifyDemandedBits( break; } case ISD::VECTOR_SHUFFLE: { + assert(!VT.isScalableVector()); ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask(); // Collect demanded elements from shuffle operands.. - APInt DemandedLHS(NumElts, 0); - APInt DemandedRHS(NumElts, 0); - for (unsigned i = 0; i != NumElts; ++i) { - if (!DemandedElts[i]) - continue; - int M = ShuffleMask[i]; - if (M < 0) { - // For UNDEF elements, we don't know anything about the common state of - // the shuffle result. - DemandedLHS.clearAllBits(); - DemandedRHS.clearAllBits(); - break; - } - assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range"); - if (M < (int)NumElts) - DemandedLHS.setBit(M); - else - DemandedRHS.setBit(M - NumElts); - } + APInt DemandedLHS, DemandedRHS; + if (!getShuffleDemandedElts(NumElts, ShuffleMask, DemandedElts, DemandedLHS, + DemandedRHS)) + break; if (!!DemandedLHS || !!DemandedRHS) { SDValue Op0 = Op.getOperand(0); @@ -1378,7 +1373,7 @@ bool TargetLowering::SimplifyDemandedBits( // AND(INSERT_SUBVECTOR(C,X,I),M) -> INSERT_SUBVECTOR(AND(C,M),X,I) // iff 'C' is Undef/Constant and AND(X,M) == X (for DemandedBits). - if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && + if (Op0.getOpcode() == ISD::INSERT_SUBVECTOR && !VT.isScalableVector() && (Op0.getOperand(0).isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op0.getOperand(0).getNode())) && Op0->hasOneUse()) { @@ -1745,7 +1740,7 @@ bool TargetLowering::SimplifyDemandedBits( // aren't demanded (as above) and that the shifted upper c1 bits of // x aren't demanded. // TODO - support non-uniform vector amounts. - if (Op0.hasOneUse() && InnerOp.getOpcode() == ISD::SRL && + if (InnerOp.getOpcode() == ISD::SRL && Op0.hasOneUse() && InnerOp.hasOneUse()) { if (const APInt *SA2 = TLO.DAG.getValidShiftAmountConstant(InnerOp, DemandedElts)) { @@ -1879,6 +1874,16 @@ bool TargetLowering::SimplifyDemandedBits( Known.One.lshrInPlace(ShAmt); // High bits known zero. Known.Zero.setHighBits(ShAmt); + + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!InDemandedMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { + SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( + Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1); + if (DemandedOp0) { + SDValue NewOp = TLO.DAG.getNode(ISD::SRL, dl, VT, DemandedOp0, Op1); + return TLO.CombineTo(Op, NewOp); + } + } } break; } @@ -2081,10 +2086,10 @@ bool TargetLowering::SimplifyDemandedBits( KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1); KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1); Known = KnownBits::umin(Known0, Known1); - if (Optional<bool> IsULE = KnownBits::ule(Known0, Known1)) - return TLO.CombineTo(Op, IsULE.value() ? Op0 : Op1); - if (Optional<bool> IsULT = KnownBits::ult(Known0, Known1)) - return TLO.CombineTo(Op, IsULT.value() ? Op0 : Op1); + if (std::optional<bool> IsULE = KnownBits::ule(Known0, Known1)) + return TLO.CombineTo(Op, *IsULE ? Op0 : Op1); + if (std::optional<bool> IsULT = KnownBits::ult(Known0, Known1)) + return TLO.CombineTo(Op, *IsULT ? Op0 : Op1); break; } case ISD::UMAX: { @@ -2094,10 +2099,10 @@ bool TargetLowering::SimplifyDemandedBits( KnownBits Known0 = TLO.DAG.computeKnownBits(Op0, DemandedElts, Depth + 1); KnownBits Known1 = TLO.DAG.computeKnownBits(Op1, DemandedElts, Depth + 1); Known = KnownBits::umax(Known0, Known1); - if (Optional<bool> IsUGE = KnownBits::uge(Known0, Known1)) - return TLO.CombineTo(Op, IsUGE.value() ? Op0 : Op1); - if (Optional<bool> IsUGT = KnownBits::ugt(Known0, Known1)) - return TLO.CombineTo(Op, IsUGT.value() ? Op0 : Op1); + if (std::optional<bool> IsUGE = KnownBits::uge(Known0, Known1)) + return TLO.CombineTo(Op, *IsUGE ? Op0 : Op1); + if (std::optional<bool> IsUGT = KnownBits::ugt(Known0, Known1)) + return TLO.CombineTo(Op, *IsUGT ? Op0 : Op1); break; } case ISD::BITREVERSE: { @@ -2225,19 +2230,18 @@ bool TargetLowering::SimplifyDemandedBits( if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1)) return true; - Known.Zero = KnownLo.Zero.zext(BitWidth) | - KnownHi.Zero.zext(BitWidth).shl(HalfBitWidth); - - Known.One = KnownLo.One.zext(BitWidth) | - KnownHi.One.zext(BitWidth).shl(HalfBitWidth); + Known = KnownHi.concat(KnownLo); break; } - case ISD::ZERO_EXTEND: - case ISD::ZERO_EXTEND_VECTOR_INREG: { + case ISD::ZERO_EXTEND_VECTOR_INREG: + if (VT.isScalableVector()) + return false; + [[fallthrough]]; + case ISD::ZERO_EXTEND: { SDValue Src = Op.getOperand(0); EVT SrcVT = Src.getValueType(); unsigned InBits = SrcVT.getScalarSizeInBits(); - unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; + unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1; bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG; // If none of the top bits are demanded, convert this into an any_extend. @@ -2269,12 +2273,15 @@ bool TargetLowering::SimplifyDemandedBits( return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc)); break; } - case ISD::SIGN_EXTEND: - case ISD::SIGN_EXTEND_VECTOR_INREG: { + case ISD::SIGN_EXTEND_VECTOR_INREG: + if (VT.isScalableVector()) + return false; + [[fallthrough]]; + case ISD::SIGN_EXTEND: { SDValue Src = Op.getOperand(0); EVT SrcVT = Src.getValueType(); unsigned InBits = SrcVT.getScalarSizeInBits(); - unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; + unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1; bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG; // If none of the top bits are demanded, convert this into an any_extend. @@ -2321,12 +2328,15 @@ bool TargetLowering::SimplifyDemandedBits( return TLO.CombineTo(Op, TLO.DAG.getNode(Op.getOpcode(), dl, VT, NewSrc)); break; } - case ISD::ANY_EXTEND: - case ISD::ANY_EXTEND_VECTOR_INREG: { + case ISD::ANY_EXTEND_VECTOR_INREG: + if (VT.isScalableVector()) + return false; + [[fallthrough]]; + case ISD::ANY_EXTEND: { SDValue Src = Op.getOperand(0); EVT SrcVT = Src.getValueType(); unsigned InBits = SrcVT.getScalarSizeInBits(); - unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; + unsigned InElts = SrcVT.isFixedLengthVector() ? SrcVT.getVectorNumElements() : 1; bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG; // If we only need the bottom element then we can just bitcast. @@ -2369,18 +2379,18 @@ bool TargetLowering::SimplifyDemandedBits( // If the input is only used by this truncate, see if we can shrink it based // on the known demanded bits. - if (Src.getNode()->hasOneUse()) { - switch (Src.getOpcode()) { - default: + switch (Src.getOpcode()) { + default: + break; + case ISD::SRL: + // Shrink SRL by a constant if none of the high bits shifted in are + // demanded. + if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT)) + // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is + // undesirable. break; - case ISD::SRL: - // Shrink SRL by a constant if none of the high bits shifted in are - // demanded. - if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT)) - // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is - // undesirable. - break; + if (Src.getNode()->hasOneUse()) { const APInt *ShAmtC = TLO.DAG.getValidShiftAmountConstant(Src, DemandedElts); if (!ShAmtC || ShAmtC->uge(BitWidth)) @@ -2402,8 +2412,8 @@ bool TargetLowering::SimplifyDemandedBits( return TLO.CombineTo( Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, NewShAmt)); } - break; } + break; } assert(!Known.hasConflict() && "Bits known to be one AND zero?"); @@ -2420,6 +2430,7 @@ bool TargetLowering::SimplifyDemandedBits( assert(!Known.hasConflict() && "Bits known to be one AND zero?"); Known.Zero |= ~InMask; + Known.One &= (~Known.Zero); break; } case ISD::EXTRACT_VECTOR_ELT: { @@ -2464,6 +2475,8 @@ bool TargetLowering::SimplifyDemandedBits( break; } case ISD::BITCAST: { + if (VT.isScalableVector()) + return false; SDValue Src = Op.getOperand(0); EVT SrcVT = Src.getValueType(); unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits(); @@ -2576,7 +2589,7 @@ bool TargetLowering::SimplifyDemandedBits( SDValue And1 = TLO.DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), One); return TLO.CombineTo(Op, And1); } - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::ADD: case ISD::SUB: { // Add, Sub, and Mul don't demand any bits in positions beyond that @@ -2601,6 +2614,11 @@ bool TargetLowering::SimplifyDemandedBits( return true; } + // neg x with only low bit demanded is simply x. + if (Op.getOpcode() == ISD::SUB && DemandedBits.isOne() && + isa<ConstantSDNode>(Op0) && cast<ConstantSDNode>(Op0)->isZero()) + return TLO.CombineTo(Op, Op1); + // Attempt to avoid multi-use ops if we don't need anything from them. if (!LoMask.isAllOnes() || !DemandedElts.isAllOnes()) { SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( @@ -2679,10 +2697,16 @@ bool TargetLowering::SimplifyDemandedBits( } } - LLVM_FALLTHROUGH; + [[fallthrough]]; } default: - if (Op.getOpcode() >= ISD::BUILTIN_OP_END) { + // We also ask the target about intrinsics (which could be specific to it). + if (Op.getOpcode() >= ISD::BUILTIN_OP_END || + Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN) { + // TODO: Probably okay to remove after audit; here to reduce change size + // in initial enablement patch for scalable vectors + if (Op.getValueType().isScalableVector()) + break; if (SimplifyDemandedBitsForTargetNode(Op, DemandedBits, DemandedElts, Known, TLO, Depth)) return true; @@ -2715,6 +2739,12 @@ bool TargetLowering::SimplifyDemandedBits( APFloat(TLO.DAG.EVTToAPFloatSemantics(VT), Known.One), dl, VT)); } + // A multi use 'all demanded elts' simplify failed to find any knownbits. + // Try again just for the original demanded elts. + // Ensure we do this AFTER constant folding above. + if (HasMultiUse && Known.isUnknown() && !OriginalDemandedElts.isAllOnes()) + Known = TLO.DAG.computeKnownBits(Op, OriginalDemandedElts, Depth); + return false; } @@ -2746,7 +2776,7 @@ static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG, "Vector binop only"); EVT EltVT = VT.getVectorElementType(); - unsigned NumElts = VT.getVectorNumElements(); + unsigned NumElts = VT.isFixedLengthVector() ? VT.getVectorNumElements() : 1; assert(UndefOp0.getBitWidth() == NumElts && UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis"); @@ -2814,7 +2844,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( } // If Op has other users, assume that all elements are needed. - if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) + if (!AssumeSingleUse && !Op.getNode()->hasOneUse()) DemandedElts.setAllBits(); // Not demanding any elements from Op. @@ -3176,6 +3206,8 @@ bool TargetLowering::SimplifyDemandedVectorElts( break; } case ISD::VECTOR_SHUFFLE: { + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask(); // Collect demanded elements from shuffle operands.. @@ -3195,17 +3227,17 @@ bool TargetLowering::SimplifyDemandedVectorElts( // See if we can simplify either shuffle operand. APInt UndefLHS, ZeroLHS; APInt UndefRHS, ZeroRHS; - if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, UndefLHS, - ZeroLHS, TLO, Depth + 1)) + if (SimplifyDemandedVectorElts(LHS, DemandedLHS, UndefLHS, ZeroLHS, TLO, + Depth + 1)) return true; - if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, UndefRHS, - ZeroRHS, TLO, Depth + 1)) + if (SimplifyDemandedVectorElts(RHS, DemandedRHS, UndefRHS, ZeroRHS, TLO, + Depth + 1)) return true; // Simplify mask using undef elements from LHS/RHS. bool Updated = false; bool IdentityLHS = true, IdentityRHS = true; - SmallVector<int, 32> NewMask(ShuffleMask.begin(), ShuffleMask.end()); + SmallVector<int, 32> NewMask(ShuffleMask); for (unsigned i = 0; i != NumElts; ++i) { int &M = NewMask[i]; if (M < 0) @@ -3223,8 +3255,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( // to Identity which can cause premature removal of the shuffle mask. if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) { SDValue LegalShuffle = - buildLegalVectorShuffle(VT, DL, Op.getOperand(0), Op.getOperand(1), - NewMask, TLO.DAG); + buildLegalVectorShuffle(VT, DL, LHS, RHS, NewMask, TLO.DAG); if (LegalShuffle) return TLO.CombineTo(Op, LegalShuffle); } @@ -3307,7 +3338,7 @@ bool TargetLowering::SimplifyDemandedVectorElts( Depth + 1, /*AssumeSingleUse*/ true)) return true; } - LLVM_FALLTHROUGH; + [[fallthrough]]; } case ISD::OR: case ISD::XOR: @@ -3367,6 +3398,8 @@ bool TargetLowering::SimplifyDemandedVectorElts( break; } case ISD::MUL: + case ISD::MULHU: + case ISD::MULHS: case ISD::AND: { SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); @@ -3375,10 +3408,16 @@ bool TargetLowering::SimplifyDemandedVectorElts( if (SimplifyDemandedVectorElts(Op1, DemandedElts, SrcUndef, SrcZero, TLO, Depth + 1)) return true; - if (SimplifyDemandedVectorElts(Op0, DemandedElts, KnownUndef, KnownZero, + // If we know that a demanded element was zero in Op1 we don't need to + // demand it in Op0 - its guaranteed to be zero. + APInt DemandedElts0 = DemandedElts & ~SrcZero; + if (SimplifyDemandedVectorElts(Op0, DemandedElts0, KnownUndef, KnownZero, TLO, Depth + 1)) return true; + KnownUndef &= DemandedElts0; + KnownZero &= DemandedElts0; + // If every element pair has a zero/undef then just fold to zero. // fold (and x, undef) -> 0 / (and x, 0) -> 0 // fold (mul x, undef) -> 0 / (mul x, 0) -> 0 @@ -3566,6 +3605,19 @@ bool TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode( return false; } +bool TargetLowering::canCreateUndefOrPoisonForTargetNode( + SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, + bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const { + assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || + Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_VOID) && + "Should use canCreateUndefOrPoison if you don't know whether Op" + " is a target node!"); + // Be conservative and return true. + return true; +} + bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op, const SelectionDAG &DAG, bool SNaN, @@ -3582,6 +3634,7 @@ bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op, bool TargetLowering::isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts, APInt &UndefElts, + const SelectionDAG &DAG, unsigned Depth) const { assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || @@ -3692,6 +3745,26 @@ SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT); } + // Try to eliminate a power-of-2 mask constant by converting to a signbit + // test in a narrow type that we can truncate to with no cost. Examples: + // (i32 X & 32768) == 0 --> (trunc X to i16) >= 0 + // (i32 X & 32768) != 0 --> (trunc X to i16) < 0 + // TODO: This conservatively checks for type legality on the source and + // destination types. That may inhibit optimizations, but it also + // allows setcc->shift transforms that may be more beneficial. + auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (AndC && isNullConstant(N1) && AndC->getAPIntValue().isPowerOf2() && + isTypeLegal(OpVT) && N0.hasOneUse()) { + EVT NarrowVT = EVT::getIntegerVT(*DAG.getContext(), + AndC->getAPIntValue().getActiveBits()); + if (isTruncateFree(OpVT, NarrowVT) && isTypeLegal(NarrowVT)) { + SDValue Trunc = DAG.getZExtOrTrunc(N0.getOperand(0), DL, NarrowVT); + SDValue Zero = DAG.getConstant(0, DL, NarrowVT); + return DAG.getSetCC(DL, VT, Trunc, Zero, + Cond == ISD::SETEQ ? ISD::SETGE : ISD::SETLT); + } + } + // Match these patterns in any of their permutations: // (X & Y) == Y // (X & Y) != Y @@ -3968,14 +4041,14 @@ static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT, EVT CTVT = CTPOP.getValueType(); SDValue CTOp = CTPOP.getOperand(0); - // If this is a vector CTPOP, keep the CTPOP if it is legal. - // TODO: Should we check if CTPOP is legal(or custom) for scalars? - if (VT.isVector() && TLI.isOperationLegal(ISD::CTPOP, CTVT)) - return SDValue(); - + // Expand a power-of-2-or-zero comparison based on ctpop: // (ctpop x) u< 2 -> (x & x-1) == 0 // (ctpop x) u> 1 -> (x & x-1) != 0 if (Cond == ISD::SETULT || Cond == ISD::SETUGT) { + // Keep the CTPOP if it is a legal vector op. + if (CTVT.isVector() && TLI.isOperationLegal(ISD::CTPOP, CTVT)) + return SDValue(); + unsigned CostLimit = TLI.getCustomCtpopCost(CTVT, Cond); if (C1.ugt(CostLimit + (Cond == ISD::SETULT))) return SDValue(); @@ -3994,16 +4067,14 @@ static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT, return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC); } - // If ctpop is not supported, expand a power-of-2 comparison based on it. + // Expand a power-of-2 comparison based on ctpop: + // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0) + // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0) if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) { - // For scalars, keep CTPOP if it is legal or custom. - if (!VT.isVector() && TLI.isOperationLegalOrCustom(ISD::CTPOP, CTVT)) + // Keep the CTPOP if it is legal. + if (TLI.isOperationLegal(ISD::CTPOP, CTVT)) return SDValue(); - // This is based on X86's custom lowering for CTPOP which produces more - // instructions than the expansion here. - // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0) - // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0) SDValue Zero = DAG.getConstant(0, dl, CTVT); SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT); assert(CTVT.isInteger()); @@ -4137,6 +4208,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, SelectionDAG &DAG = DCI.DAG; const DataLayout &Layout = DAG.getDataLayout(); EVT OpVT = N0.getValueType(); + AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); // Constant fold or commute setcc. if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl)) @@ -4181,6 +4253,23 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (SDValue V = simplifySetCCWithCTPOP(*this, VT, N0, C1, Cond, dl, DAG)) return V; + // For equality to 0 of a no-wrap multiply, decompose and test each op: + // X * Y == 0 --> (X == 0) || (Y == 0) + // X * Y != 0 --> (X != 0) && (Y != 0) + // TODO: This bails out if minsize is set, but if the target doesn't have a + // single instruction multiply for this type, it would likely be + // smaller to decompose. + if (C1.isZero() && (Cond == ISD::SETEQ || Cond == ISD::SETNE) && + N0.getOpcode() == ISD::MUL && N0.hasOneUse() && + (N0->getFlags().hasNoUnsignedWrap() || + N0->getFlags().hasNoSignedWrap()) && + !Attr.hasFnAttr(Attribute::MinSize)) { + SDValue IsXZero = DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond); + SDValue IsYZero = DAG.getSetCC(dl, VT, N0.getOperand(1), N1, Cond); + unsigned LogicOp = Cond == ISD::SETEQ ? ISD::OR : ISD::AND; + return DAG.getNode(LogicOp, dl, VT, IsXZero, IsYZero); + } + // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an // equality comparison, then we're just comparing whether X itself is // zero. @@ -4970,8 +5059,6 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // Fold remainder of division by a constant. if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) && N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { - AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); - // When division is cheap or optimizing for minimum size, // fall through to DIVREM creation by skipping this fold. if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttr(Attribute::MinSize)) { @@ -5221,6 +5308,12 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, } } +void TargetLowering::CollectTargetIntrinsicOperands(const CallInst &I, + SmallVectorImpl<SDValue> &Ops, + SelectionDAG &DAG) const { + return; +} + std::pair<unsigned, const TargetRegisterClass *> TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI, StringRef Constraint, @@ -5334,11 +5427,7 @@ TargetLowering::ParseConstraints(const DataLayout &DL, OpInfo.CallOperandVal = Call.getArgOperand(ArgNo); break; case InlineAsm::isLabel: - OpInfo.CallOperandVal = - cast<CallBrInst>(&Call)->getBlockAddressForIndirectDest(LabelNo); - OpInfo.ConstraintVT = - getAsmOperandValueType(DL, OpInfo.CallOperandVal->getType()) - .getSimpleVT(); + OpInfo.CallOperandVal = cast<CallBrInst>(&Call)->getIndirectDest(LabelNo); ++LabelNo; continue; case InlineAsm::isClobber: @@ -5944,54 +6033,68 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, return SDValue(); } - bool UseNPQ = false; + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + // Try to use leading zeros of the dividend to reduce the multiplier and + // avoid expensive fixups. + // TODO: Support vectors. + unsigned LeadingZeros = 0; + if (!VT.isVector() && isa<ConstantSDNode>(N1)) { + assert(!isOneConstant(N1) && "Unexpected divisor"); + LeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros(); + // UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros in + // the dividend exceeds the leading zeros for the divisor. + LeadingZeros = + std::min(LeadingZeros, + cast<ConstantSDNode>(N1)->getAPIntValue().countLeadingZeros()); + } + + bool UseNPQ = false, UsePreShift = false, UsePostShift = false; SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors; auto BuildUDIVPattern = [&](ConstantSDNode *C) { if (C->isZero()) return false; - // FIXME: We should use a narrower constant when the upper - // bits are known to be zero. const APInt& Divisor = C->getAPIntValue(); - UnsignedDivisionByConstantInfo magics = - UnsignedDivisionByConstantInfo::get(Divisor); - unsigned PreShift = 0, PostShift = 0; - - // If the divisor is even, we can avoid using the expensive fixup by - // shifting the divided value upfront. - if (magics.IsAdd && !Divisor[0]) { - PreShift = Divisor.countTrailingZeros(); - // Get magic number for the shifted divisor. - magics = - UnsignedDivisionByConstantInfo::get(Divisor.lshr(PreShift), PreShift); - assert(!magics.IsAdd && "Should use cheap fixup now"); - } - - unsigned SelNPQ; - if (!magics.IsAdd || Divisor.isOne()) { - assert(magics.ShiftAmount < Divisor.getBitWidth() && - "We shouldn't generate an undefined shift!"); - PostShift = magics.ShiftAmount; - SelNPQ = false; + + SDValue PreShift, MagicFactor, NPQFactor, PostShift; + + // Magic algorithm doesn't work for division by 1. We need to emit a select + // at the end. + if (Divisor.isOne()) { + PreShift = PostShift = DAG.getUNDEF(ShSVT); + MagicFactor = NPQFactor = DAG.getUNDEF(SVT); } else { - PostShift = magics.ShiftAmount - 1; - SelNPQ = true; - } + UnsignedDivisionByConstantInfo magics = + UnsignedDivisionByConstantInfo::get(Divisor, LeadingZeros); - PreShifts.push_back(DAG.getConstant(PreShift, dl, ShSVT)); - MagicFactors.push_back(DAG.getConstant(magics.Magic, dl, SVT)); - NPQFactors.push_back( - DAG.getConstant(SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1) - : APInt::getZero(EltBits), - dl, SVT)); - PostShifts.push_back(DAG.getConstant(PostShift, dl, ShSVT)); - UseNPQ |= SelNPQ; + MagicFactor = DAG.getConstant(magics.Magic, dl, SVT); + + assert(magics.PreShift < Divisor.getBitWidth() && + "We shouldn't generate an undefined shift!"); + assert(magics.PostShift < Divisor.getBitWidth() && + "We shouldn't generate an undefined shift!"); + assert((!magics.IsAdd || magics.PreShift == 0) && + "Unexpected pre-shift"); + PreShift = DAG.getConstant(magics.PreShift, dl, ShSVT); + PostShift = DAG.getConstant(magics.PostShift, dl, ShSVT); + NPQFactor = DAG.getConstant( + magics.IsAdd ? APInt::getOneBitSet(EltBits, EltBits - 1) + : APInt::getZero(EltBits), + dl, SVT); + UseNPQ |= magics.IsAdd; + UsePreShift |= magics.PreShift != 0; + UsePostShift |= magics.PostShift != 0; + } + + PreShifts.push_back(PreShift); + MagicFactors.push_back(MagicFactor); + NPQFactors.push_back(NPQFactor); + PostShifts.push_back(PostShift); return true; }; - SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); - // Collect the shifts/magic values from each element. if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern)) return SDValue(); @@ -6018,8 +6121,10 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, } SDValue Q = N0; - Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift); - Created.push_back(Q.getNode()); + if (UsePreShift) { + Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift); + Created.push_back(Q.getNode()); + } // FIXME: We should support doing a MUL in a wider type. auto GetMULHU = [&](SDValue X, SDValue Y) { @@ -6068,8 +6173,10 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, Created.push_back(Q.getNode()); } - Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift); - Created.push_back(Q.getNode()); + if (UsePostShift) { + Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift); + Created.push_back(Q.getNode()); + } EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); @@ -6921,6 +7028,41 @@ SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, OptForSize, Cost, Depth)) return DAG.getNode(ISD::FP_ROUND, DL, VT, NegV, Op.getOperand(1)); break; + case ISD::SELECT: + case ISD::VSELECT: { + // fold (fneg (select C, LHS, RHS)) -> (select C, (fneg LHS), (fneg RHS)) + // iff at least one cost is cheaper and the other is neutral/cheaper + SDValue LHS = Op.getOperand(1); + NegatibleCost CostLHS = NegatibleCost::Expensive; + SDValue NegLHS = + getNegatedExpression(LHS, DAG, LegalOps, OptForSize, CostLHS, Depth); + if (!NegLHS || CostLHS > NegatibleCost::Neutral) { + RemoveDeadNode(NegLHS); + break; + } + + // Prevent this node from being deleted by the next call. + Handles.emplace_back(NegLHS); + + SDValue RHS = Op.getOperand(2); + NegatibleCost CostRHS = NegatibleCost::Expensive; + SDValue NegRHS = + getNegatedExpression(RHS, DAG, LegalOps, OptForSize, CostRHS, Depth); + + // We're done with the handles. + Handles.clear(); + + if (!NegRHS || CostRHS > NegatibleCost::Neutral || + (CostLHS != NegatibleCost::Cheaper && + CostRHS != NegatibleCost::Cheaper)) { + RemoveDeadNode(NegLHS); + RemoveDeadNode(NegRHS); + break; + } + + Cost = std::min(CostLHS, CostRHS); + return DAG.getSelect(DL, VT, Op.getOperand(0), NegLHS, NegRHS); + } } return SDValue(); @@ -7002,8 +7144,8 @@ bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, } if (!VT.isVector() && Opcode == ISD::MUL && - DAG.ComputeNumSignBits(LHS) > InnerBitSize && - DAG.ComputeNumSignBits(RHS) > InnerBitSize) { + DAG.ComputeMaxSignificantBits(LHS) <= InnerBitSize && + DAG.ComputeMaxSignificantBits(RHS) <= InnerBitSize) { // The input values are both sign-extended. // TODO non-MUL case? if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) { @@ -7014,8 +7156,7 @@ bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, } unsigned ShiftAmount = OuterBitSize - InnerBitSize; - EVT ShiftAmountTy = getShiftAmountTy(VT, DAG.getDataLayout()); - SDValue Shift = DAG.getConstant(ShiftAmount, dl, ShiftAmountTy); + SDValue Shift = DAG.getShiftAmountConstant(ShiftAmount, VT, dl); if (!LH.getNode() && !RH.getNode() && isOperationLegalOrCustom(ISD::SRL, VT) && @@ -7122,6 +7263,190 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, return Ok; } +// Optimize unsigned division or remainder by constants for types twice as large +// as a legal VT. +// +// If (1 << (BitWidth / 2)) % Constant == 1, then the remainder +// can be computed +// as: +// Sum += __builtin_uadd_overflow(Lo, High, &Sum); +// Remainder = Sum % Constant +// This is based on "Remainder by Summing Digits" from Hacker's Delight. +// +// For division, we can compute the remainder using the algorithm described +// above, subtract it from the dividend to get an exact multiple of Constant. +// Then multiply that extact multiply by the multiplicative inverse modulo +// (1 << (BitWidth / 2)) to get the quotient. + +// If Constant is even, we can shift right the dividend and the divisor by the +// number of trailing zeros in Constant before applying the remainder algorithm. +// If we're after the quotient, we can subtract this value from the shifted +// dividend and multiply by the multiplicative inverse of the shifted divisor. +// If we want the remainder, we shift the value left by the number of trailing +// zeros and add the bits that were shifted out of the dividend. +bool TargetLowering::expandDIVREMByConstant(SDNode *N, + SmallVectorImpl<SDValue> &Result, + EVT HiLoVT, SelectionDAG &DAG, + SDValue LL, SDValue LH) const { + unsigned Opcode = N->getOpcode(); + EVT VT = N->getValueType(0); + + // TODO: Support signed division/remainder. + if (Opcode == ISD::SREM || Opcode == ISD::SDIV || Opcode == ISD::SDIVREM) + return false; + assert( + (Opcode == ISD::UREM || Opcode == ISD::UDIV || Opcode == ISD::UDIVREM) && + "Unexpected opcode"); + + auto *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)); + if (!CN) + return false; + + APInt Divisor = CN->getAPIntValue(); + unsigned BitWidth = Divisor.getBitWidth(); + unsigned HBitWidth = BitWidth / 2; + assert(VT.getScalarSizeInBits() == BitWidth && + HiLoVT.getScalarSizeInBits() == HBitWidth && "Unexpected VTs"); + + // Divisor needs to less than (1 << HBitWidth). + APInt HalfMaxPlus1 = APInt::getOneBitSet(BitWidth, HBitWidth); + if (Divisor.uge(HalfMaxPlus1)) + return false; + + // We depend on the UREM by constant optimization in DAGCombiner that requires + // high multiply. + if (!isOperationLegalOrCustom(ISD::MULHU, HiLoVT) && + !isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT)) + return false; + + // Don't expand if optimizing for size. + if (DAG.shouldOptForSize()) + return false; + + // Early out for 0 or 1 divisors. + if (Divisor.ule(1)) + return false; + + // If the divisor is even, shift it until it becomes odd. + unsigned TrailingZeros = 0; + if (!Divisor[0]) { + TrailingZeros = Divisor.countTrailingZeros(); + Divisor.lshrInPlace(TrailingZeros); + } + + SDLoc dl(N); + SDValue Sum; + SDValue PartialRem; + + // If (1 << HBitWidth) % divisor == 1, we can add the two halves together and + // then add in the carry. + // TODO: If we can't split it in half, we might be able to split into 3 or + // more pieces using a smaller bit width. + if (HalfMaxPlus1.urem(Divisor).isOneValue()) { + assert(!LL == !LH && "Expected both input halves or no input halves!"); + if (!LL) { + LL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiLoVT, N->getOperand(0), + DAG.getIntPtrConstant(0, dl)); + LH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiLoVT, N->getOperand(0), + DAG.getIntPtrConstant(1, dl)); + } + + // Shift the input by the number of TrailingZeros in the divisor. The + // shifted out bits will be added to the remainder later. + if (TrailingZeros) { + // Save the shifted off bits if we need the remainder. + if (Opcode != ISD::UDIV) { + APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros); + PartialRem = DAG.getNode(ISD::AND, dl, HiLoVT, LL, + DAG.getConstant(Mask, dl, HiLoVT)); + } + + LL = DAG.getNode( + ISD::OR, dl, HiLoVT, + DAG.getNode(ISD::SRL, dl, HiLoVT, LL, + DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl)), + DAG.getNode(ISD::SHL, dl, HiLoVT, LH, + DAG.getShiftAmountConstant(HBitWidth - TrailingZeros, + HiLoVT, dl))); + LH = DAG.getNode(ISD::SRL, dl, HiLoVT, LH, + DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl)); + } + + // Use addcarry if we can, otherwise use a compare to detect overflow. + EVT SetCCType = + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), HiLoVT); + if (isOperationLegalOrCustom(ISD::ADDCARRY, HiLoVT)) { + SDVTList VTList = DAG.getVTList(HiLoVT, SetCCType); + Sum = DAG.getNode(ISD::UADDO, dl, VTList, LL, LH); + Sum = DAG.getNode(ISD::ADDCARRY, dl, VTList, Sum, + DAG.getConstant(0, dl, HiLoVT), Sum.getValue(1)); + } else { + Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, LL, LH); + SDValue Carry = DAG.getSetCC(dl, SetCCType, Sum, LL, ISD::SETULT); + // If the boolean for the target is 0 or 1, we can add the setcc result + // directly. + if (getBooleanContents(HiLoVT) == + TargetLoweringBase::ZeroOrOneBooleanContent) + Carry = DAG.getZExtOrTrunc(Carry, dl, HiLoVT); + else + Carry = DAG.getSelect(dl, HiLoVT, Carry, DAG.getConstant(1, dl, HiLoVT), + DAG.getConstant(0, dl, HiLoVT)); + Sum = DAG.getNode(ISD::ADD, dl, HiLoVT, Sum, Carry); + } + } + + // If we didn't find a sum, we can't do the expansion. + if (!Sum) + return false; + + // Perform a HiLoVT urem on the Sum using truncated divisor. + SDValue RemL = + DAG.getNode(ISD::UREM, dl, HiLoVT, Sum, + DAG.getConstant(Divisor.trunc(HBitWidth), dl, HiLoVT)); + SDValue RemH = DAG.getConstant(0, dl, HiLoVT); + + if (Opcode != ISD::UREM) { + // Subtract the remainder from the shifted dividend. + SDValue Dividend = DAG.getNode(ISD::BUILD_PAIR, dl, VT, LL, LH); + SDValue Rem = DAG.getNode(ISD::BUILD_PAIR, dl, VT, RemL, RemH); + + Dividend = DAG.getNode(ISD::SUB, dl, VT, Dividend, Rem); + + // Multiply by the multiplicative inverse of the divisor modulo + // (1 << BitWidth). + APInt Mod = APInt::getSignedMinValue(BitWidth + 1); + APInt MulFactor = Divisor.zext(BitWidth + 1); + MulFactor = MulFactor.multiplicativeInverse(Mod); + MulFactor = MulFactor.trunc(BitWidth); + + SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend, + DAG.getConstant(MulFactor, dl, VT)); + + // Split the quotient into low and high parts. + SDValue QuotL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiLoVT, Quotient, + DAG.getIntPtrConstant(0, dl)); + SDValue QuotH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HiLoVT, Quotient, + DAG.getIntPtrConstant(1, dl)); + Result.push_back(QuotL); + Result.push_back(QuotH); + } + + if (Opcode != ISD::UDIV) { + // If we shifted the input, shift the remainder left and add the bits we + // shifted off the input. + if (TrailingZeros) { + APInt Mask = APInt::getLowBitsSet(HBitWidth, TrailingZeros); + RemL = DAG.getNode(ISD::SHL, dl, HiLoVT, RemL, + DAG.getShiftAmountConstant(TrailingZeros, HiLoVT, dl)); + RemL = DAG.getNode(ISD::ADD, dl, HiLoVT, RemL, PartialRem); + } + Result.push_back(RemL); + Result.push_back(DAG.getConstant(0, dl, HiLoVT)); + } + + return true; +} + // Check that (every element of) Z is undef or not an exact multiple of BW. static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) { return ISD::matchUnaryPredicate( @@ -7130,8 +7455,68 @@ static bool isNonZeroModBitWidthOrUndef(SDValue Z, unsigned BW) { true); } +static SDValue expandVPFunnelShift(SDNode *Node, SelectionDAG &DAG) { + EVT VT = Node->getValueType(0); + SDValue ShX, ShY; + SDValue ShAmt, InvShAmt; + SDValue X = Node->getOperand(0); + SDValue Y = Node->getOperand(1); + SDValue Z = Node->getOperand(2); + SDValue Mask = Node->getOperand(3); + SDValue VL = Node->getOperand(4); + + unsigned BW = VT.getScalarSizeInBits(); + bool IsFSHL = Node->getOpcode() == ISD::VP_FSHL; + SDLoc DL(SDValue(Node, 0)); + + EVT ShVT = Z.getValueType(); + if (isNonZeroModBitWidthOrUndef(Z, BW)) { + // fshl: X << C | Y >> (BW - C) + // fshr: X << (BW - C) | Y >> C + // where C = Z % BW is not zero + SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT); + ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL); + InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitWidthC, ShAmt, Mask, VL); + ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, IsFSHL ? ShAmt : InvShAmt, Mask, + VL); + ShY = DAG.getNode(ISD::VP_LSHR, DL, VT, Y, IsFSHL ? InvShAmt : ShAmt, Mask, + VL); + } else { + // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW)) + // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW) + SDValue BitMask = DAG.getConstant(BW - 1, DL, ShVT); + if (isPowerOf2_32(BW)) { + // Z % BW -> Z & (BW - 1) + ShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, Z, BitMask, Mask, VL); + // (BW - 1) - (Z % BW) -> ~Z & (BW - 1) + SDValue NotZ = DAG.getNode(ISD::VP_XOR, DL, ShVT, Z, + DAG.getAllOnesConstant(DL, ShVT), Mask, VL); + InvShAmt = DAG.getNode(ISD::VP_AND, DL, ShVT, NotZ, BitMask, Mask, VL); + } else { + SDValue BitWidthC = DAG.getConstant(BW, DL, ShVT); + ShAmt = DAG.getNode(ISD::VP_UREM, DL, ShVT, Z, BitWidthC, Mask, VL); + InvShAmt = DAG.getNode(ISD::VP_SUB, DL, ShVT, BitMask, ShAmt, Mask, VL); + } + + SDValue One = DAG.getConstant(1, DL, ShVT); + if (IsFSHL) { + ShX = DAG.getNode(ISD::VP_SHL, DL, VT, X, ShAmt, Mask, VL); + SDValue ShY1 = DAG.getNode(ISD::VP_LSHR, DL, VT, Y, One, Mask, VL); + ShY = DAG.getNode(ISD::VP_LSHR, DL, VT, ShY1, InvShAmt, Mask, VL); + } else { + SDValue ShX1 = DAG.getNode(ISD::VP_SHL, DL, VT, X, One, Mask, VL); + ShX = DAG.getNode(ISD::VP_SHL, DL, VT, ShX1, InvShAmt, Mask, VL); + ShY = DAG.getNode(ISD::VP_LSHR, DL, VT, Y, ShAmt, Mask, VL); + } + } + return DAG.getNode(ISD::VP_OR, DL, VT, ShX, ShY, Mask, VL); +} + SDValue TargetLowering::expandFunnelShift(SDNode *Node, SelectionDAG &DAG) const { + if (Node->isVPOpcode()) + return expandVPFunnelShift(Node, DAG); + EVT VT = Node->getValueType(0); if (VT.isVector() && (!isOperationLegalOrCustom(ISD::SHL, VT) || @@ -7919,6 +8304,63 @@ SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const { DAG.getConstant(Len - 8, dl, ShVT)); } +SDValue TargetLowering::expandVPCTPOP(SDNode *Node, SelectionDAG &DAG) const { + SDLoc dl(Node); + EVT VT = Node->getValueType(0); + EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); + SDValue Op = Node->getOperand(0); + SDValue Mask = Node->getOperand(1); + SDValue VL = Node->getOperand(2); + unsigned Len = VT.getScalarSizeInBits(); + assert(VT.isInteger() && "VP_CTPOP not implemented for this type."); + + // TODO: Add support for irregular type lengths. + if (!(Len <= 128 && Len % 8 == 0)) + return SDValue(); + + // This is same algorithm of expandCTPOP from + // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel + SDValue Mask55 = + DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT); + SDValue Mask33 = + DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT); + SDValue Mask0F = + DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT); + + SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5; + + // v = v - ((v >> 1) & 0x55555555...) + Tmp1 = DAG.getNode(ISD::VP_AND, dl, VT, + DAG.getNode(ISD::VP_LSHR, dl, VT, Op, + DAG.getConstant(1, dl, ShVT), Mask, VL), + Mask55, Mask, VL); + Op = DAG.getNode(ISD::VP_SUB, dl, VT, Op, Tmp1, Mask, VL); + + // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...) + Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Op, Mask33, Mask, VL); + Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, + DAG.getNode(ISD::VP_LSHR, dl, VT, Op, + DAG.getConstant(2, dl, ShVT), Mask, VL), + Mask33, Mask, VL); + Op = DAG.getNode(ISD::VP_ADD, dl, VT, Tmp2, Tmp3, Mask, VL); + + // v = (v + (v >> 4)) & 0x0F0F0F0F... + Tmp4 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(4, dl, ShVT), + Mask, VL), + Tmp5 = DAG.getNode(ISD::VP_ADD, dl, VT, Op, Tmp4, Mask, VL); + Op = DAG.getNode(ISD::VP_AND, dl, VT, Tmp5, Mask0F, Mask, VL); + + if (Len <= 8) + return Op; + + // v = (v * 0x01010101...) >> (Len - 8) + SDValue Mask01 = + DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT); + return DAG.getNode(ISD::VP_LSHR, dl, VT, + DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL), + DAG.getConstant(Len - 8, dl, ShVT), Mask, VL); +} + SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const { SDLoc dl(Node); EVT VT = Node->getValueType(0); @@ -7969,6 +8411,77 @@ SDValue TargetLowering::expandCTLZ(SDNode *Node, SelectionDAG &DAG) const { return DAG.getNode(ISD::CTPOP, dl, VT, Op); } +SDValue TargetLowering::expandVPCTLZ(SDNode *Node, SelectionDAG &DAG) const { + SDLoc dl(Node); + EVT VT = Node->getValueType(0); + EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout()); + SDValue Op = Node->getOperand(0); + SDValue Mask = Node->getOperand(1); + SDValue VL = Node->getOperand(2); + unsigned NumBitsPerElt = VT.getScalarSizeInBits(); + + // do this: + // x = x | (x >> 1); + // x = x | (x >> 2); + // ... + // x = x | (x >>16); + // x = x | (x >>32); // for 64-bit input + // return popcount(~x); + for (unsigned i = 0; (1U << i) < NumBitsPerElt; ++i) { + SDValue Tmp = DAG.getConstant(1ULL << i, dl, ShVT); + Op = DAG.getNode(ISD::VP_OR, dl, VT, Op, + DAG.getNode(ISD::VP_LSHR, dl, VT, Op, Tmp, Mask, VL), Mask, + VL); + } + Op = DAG.getNode(ISD::VP_XOR, dl, VT, Op, DAG.getConstant(-1, dl, VT), Mask, + VL); + return DAG.getNode(ISD::VP_CTPOP, dl, VT, Op, Mask, VL); +} + +SDValue TargetLowering::CTTZTableLookup(SDNode *Node, SelectionDAG &DAG, + const SDLoc &DL, EVT VT, SDValue Op, + unsigned BitWidth) const { + if (BitWidth != 32 && BitWidth != 64) + return SDValue(); + APInt DeBruijn = BitWidth == 32 ? APInt(32, 0x077CB531U) + : APInt(64, 0x0218A392CD3D5DBFULL); + const DataLayout &TD = DAG.getDataLayout(); + MachinePointerInfo PtrInfo = + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()); + unsigned ShiftAmt = BitWidth - Log2_32(BitWidth); + SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op); + SDValue Lookup = DAG.getNode( + ISD::SRL, DL, VT, + DAG.getNode(ISD::MUL, DL, VT, DAG.getNode(ISD::AND, DL, VT, Op, Neg), + DAG.getConstant(DeBruijn, DL, VT)), + DAG.getConstant(ShiftAmt, DL, VT)); + Lookup = DAG.getSExtOrTrunc(Lookup, DL, getPointerTy(TD)); + + SmallVector<uint8_t> Table(BitWidth, 0); + for (unsigned i = 0; i < BitWidth; i++) { + APInt Shl = DeBruijn.shl(i); + APInt Lshr = Shl.lshr(ShiftAmt); + Table[Lshr.getZExtValue()] = i; + } + + // Create a ConstantArray in Constant Pool + auto *CA = ConstantDataArray::get(*DAG.getContext(), Table); + SDValue CPIdx = DAG.getConstantPool(CA, getPointerTy(TD), + TD.getPrefTypeAlign(CA->getType())); + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, VT, DAG.getEntryNode(), + DAG.getMemBasePlusOffset(CPIdx, Lookup, DL), + PtrInfo, MVT::i8); + if (Node->getOpcode() == ISD::CTTZ_ZERO_UNDEF) + return ExtLoad; + + EVT SetCCVT = + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + SDValue Zero = DAG.getConstant(0, DL, VT); + SDValue SrcIsZero = DAG.getSetCC(DL, SetCCVT, Op, Zero, ISD::SETEQ); + return DAG.getSelect(DL, VT, SrcIsZero, + DAG.getConstant(BitWidth, DL, VT), ExtLoad); +} + SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const { SDLoc dl(Node); EVT VT = Node->getValueType(0); @@ -8002,6 +8515,12 @@ SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const { !isOperationLegalOrCustomOrPromote(ISD::XOR, VT))) return SDValue(); + // Emit Table Lookup if ISD::CTLZ and ISD::CTPOP are not legal. + if (!VT.isVector() && isOperationExpand(ISD::CTPOP, VT) && + !isOperationLegal(ISD::CTLZ, VT)) + if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt)) + return V; + // for now, we use: { return popcount(~x & (x - 1)); } // unless the target has ctlz but not ctpop, in which case we use: // { return 32 - nlz(~x & (x-1)); } @@ -8019,6 +8538,22 @@ SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const { return DAG.getNode(ISD::CTPOP, dl, VT, Tmp); } +SDValue TargetLowering::expandVPCTTZ(SDNode *Node, SelectionDAG &DAG) const { + SDValue Op = Node->getOperand(0); + SDValue Mask = Node->getOperand(1); + SDValue VL = Node->getOperand(2); + SDLoc dl(Node); + EVT VT = Node->getValueType(0); + + // Same as the vector part of expandCTTZ, use: popcount(~x & (x - 1)) + SDValue Not = DAG.getNode(ISD::VP_XOR, dl, VT, Op, + DAG.getConstant(-1, dl, VT), Mask, VL); + SDValue MinusOne = DAG.getNode(ISD::VP_SUB, dl, VT, Op, + DAG.getConstant(1, dl, VT), Mask, VL); + SDValue Tmp = DAG.getNode(ISD::VP_AND, dl, VT, Not, MinusOne, Mask, VL); + return DAG.getNode(ISD::VP_CTPOP, dl, VT, Tmp, Mask, VL); +} + SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative) const { SDLoc dl(N); @@ -8092,36 +8627,36 @@ SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const { return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); case MVT::i32: Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); - Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); + Tmp3 = DAG.getNode(ISD::AND, dl, VT, Op, + DAG.getConstant(0xFF00, dl, VT)); + Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT)); Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); - Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); - Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, - DAG.getConstant(0xFF0000, dl, VT)); Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT)); + Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3); Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1); return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2); case MVT::i64: Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT)); - Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(40, dl, SHVT)); - Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); - Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); + Tmp7 = DAG.getNode(ISD::AND, dl, VT, Op, + DAG.getConstant(255ULL<<8, dl, VT)); + Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT)); + Tmp6 = DAG.getNode(ISD::AND, dl, VT, Op, + DAG.getConstant(255ULL<<16, dl, VT)); + Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT)); + Tmp5 = DAG.getNode(ISD::AND, dl, VT, Op, + DAG.getConstant(255ULL<<24, dl, VT)); + Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT)); Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); - Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); - Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT)); - Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT)); - Tmp7 = DAG.getNode(ISD::AND, dl, VT, Tmp7, - DAG.getConstant(255ULL<<48, dl, VT)); - Tmp6 = DAG.getNode(ISD::AND, dl, VT, Tmp6, - DAG.getConstant(255ULL<<40, dl, VT)); - Tmp5 = DAG.getNode(ISD::AND, dl, VT, Tmp5, - DAG.getConstant(255ULL<<32, dl, VT)); Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4, DAG.getConstant(255ULL<<24, dl, VT)); + Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, DAG.getConstant(255ULL<<16, dl, VT)); + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT)); Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, - DAG.getConstant(255ULL<<8 , dl, VT)); + DAG.getConstant(255ULL<<8, dl, VT)); + Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT)); Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7); Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5); Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3); @@ -8132,6 +8667,82 @@ SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const { } } +SDValue TargetLowering::expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const { + SDLoc dl(N); + EVT VT = N->getValueType(0); + SDValue Op = N->getOperand(0); + SDValue Mask = N->getOperand(1); + SDValue EVL = N->getOperand(2); + + if (!VT.isSimple()) + return SDValue(); + + EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout()); + SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8; + switch (VT.getSimpleVT().getScalarType().SimpleTy) { + default: + return SDValue(); + case MVT::i16: + Tmp1 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT), + Mask, EVL); + Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(8, dl, SHVT), + Mask, EVL); + return DAG.getNode(ISD::VP_OR, dl, VT, Tmp1, Tmp2, Mask, EVL); + case MVT::i32: + Tmp4 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT), + Mask, EVL); + Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Op, DAG.getConstant(0xFF00, dl, VT), + Mask, EVL); + Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(8, dl, SHVT), + Mask, EVL); + Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(8, dl, SHVT), + Mask, EVL); + Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2, + DAG.getConstant(0xFF00, dl, VT), Mask, EVL); + Tmp1 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(24, dl, SHVT), + Mask, EVL); + Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL); + Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL); + return DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL); + case MVT::i64: + Tmp8 = DAG.getNode(ISD::VP_SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT), + Mask, EVL); + Tmp7 = DAG.getNode(ISD::VP_AND, dl, VT, Op, + DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL); + Tmp7 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp7, DAG.getConstant(40, dl, SHVT), + Mask, EVL); + Tmp6 = DAG.getNode(ISD::VP_AND, dl, VT, Op, + DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL); + Tmp6 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp6, DAG.getConstant(24, dl, SHVT), + Mask, EVL); + Tmp5 = DAG.getNode(ISD::VP_AND, dl, VT, Op, + DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL); + Tmp5 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp5, DAG.getConstant(8, dl, SHVT), + Mask, EVL); + Tmp4 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(8, dl, SHVT), + Mask, EVL); + Tmp4 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp4, + DAG.getConstant(255ULL << 24, dl, VT), Mask, EVL); + Tmp3 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(24, dl, SHVT), + Mask, EVL); + Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp3, + DAG.getConstant(255ULL << 16, dl, VT), Mask, EVL); + Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(40, dl, SHVT), + Mask, EVL); + Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2, + DAG.getConstant(255ULL << 8, dl, VT), Mask, EVL); + Tmp1 = DAG.getNode(ISD::VP_LSHR, dl, VT, Op, DAG.getConstant(56, dl, SHVT), + Mask, EVL); + Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp7, Mask, EVL); + Tmp6 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp6, Tmp5, Mask, EVL); + Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp3, Mask, EVL); + Tmp2 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp1, Mask, EVL); + Tmp8 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp6, Mask, EVL); + Tmp4 = DAG.getNode(ISD::VP_OR, dl, VT, Tmp4, Tmp2, Mask, EVL); + return DAG.getNode(ISD::VP_OR, dl, VT, Tmp8, Tmp4, Mask, EVL); + } +} + SDValue TargetLowering::expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const { SDLoc dl(N); EVT VT = N->getValueType(0); @@ -8194,6 +8805,68 @@ SDValue TargetLowering::expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const { return Tmp; } +SDValue TargetLowering::expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const { + assert(N->getOpcode() == ISD::VP_BITREVERSE); + + SDLoc dl(N); + EVT VT = N->getValueType(0); + SDValue Op = N->getOperand(0); + SDValue Mask = N->getOperand(1); + SDValue EVL = N->getOperand(2); + EVT SHVT = getShiftAmountTy(VT, DAG.getDataLayout()); + unsigned Sz = VT.getScalarSizeInBits(); + + SDValue Tmp, Tmp2, Tmp3; + + // If we can, perform BSWAP first and then the mask+swap the i4, then i2 + // and finally the i1 pairs. + // TODO: We can easily support i4/i2 legal types if any target ever does. + if (Sz >= 8 && isPowerOf2_32(Sz)) { + // Create the masks - repeating the pattern every byte. + APInt Mask4 = APInt::getSplat(Sz, APInt(8, 0x0F)); + APInt Mask2 = APInt::getSplat(Sz, APInt(8, 0x33)); + APInt Mask1 = APInt::getSplat(Sz, APInt(8, 0x55)); + + // BSWAP if the type is wider than a single byte. + Tmp = (Sz > 8 ? DAG.getNode(ISD::VP_BSWAP, dl, VT, Op, Mask, EVL) : Op); + + // swap i4: ((V >> 4) & 0x0F) | ((V & 0x0F) << 4) + Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Tmp, DAG.getConstant(4, dl, SHVT), + Mask, EVL); + Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2, + DAG.getConstant(Mask4, dl, VT), Mask, EVL); + Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask4, dl, VT), + Mask, EVL); + Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, SHVT), + Mask, EVL); + Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL); + + // swap i2: ((V >> 2) & 0x33) | ((V & 0x33) << 2) + Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Tmp, DAG.getConstant(2, dl, SHVT), + Mask, EVL); + Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2, + DAG.getConstant(Mask2, dl, VT), Mask, EVL); + Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask2, dl, VT), + Mask, EVL); + Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, SHVT), + Mask, EVL); + Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL); + + // swap i1: ((V >> 1) & 0x55) | ((V & 0x55) << 1) + Tmp2 = DAG.getNode(ISD::VP_LSHR, dl, VT, Tmp, DAG.getConstant(1, dl, SHVT), + Mask, EVL); + Tmp2 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp2, + DAG.getConstant(Mask1, dl, VT), Mask, EVL); + Tmp3 = DAG.getNode(ISD::VP_AND, dl, VT, Tmp, DAG.getConstant(Mask1, dl, VT), + Mask, EVL); + Tmp3 = DAG.getNode(ISD::VP_SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, SHVT), + Mask, EVL); + Tmp = DAG.getNode(ISD::VP_OR, dl, VT, Tmp2, Tmp3, Mask, EVL); + return Tmp; + } + return SDValue(); +} + std::pair<SDValue, SDValue> TargetLowering::scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const { @@ -8671,7 +9344,7 @@ TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask, } else if (DataVT.isScalableVector()) { Increment = DAG.getVScale(DL, AddrVT, APInt(AddrVT.getFixedSizeInBits(), - DataVT.getStoreSize().getKnownMinSize())); + DataVT.getStoreSize().getKnownMinValue())); } else Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT); @@ -8957,9 +9630,13 @@ SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const { assert(VT == RHS.getValueType() && "Expected operands to be the same type"); assert(VT.isInteger() && "Expected operands to be integers"); + if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT)) + return DAG.UnrollVectorOp(Node); + // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate. unsigned BW = VT.getScalarSizeInBits(); + EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); SDValue Result = DAG.getNode(ISD::SHL, dl, VT, LHS, RHS); SDValue Orig = DAG.getNode(IsSigned ? ISD::SRA : ISD::SRL, dl, VT, Result, RHS); @@ -8968,14 +9645,14 @@ SDValue TargetLowering::expandShlSat(SDNode *Node, SelectionDAG &DAG) const { if (IsSigned) { SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(BW), dl, VT); SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(BW), dl, VT); - SatVal = DAG.getSelectCC(dl, LHS, DAG.getConstant(0, dl, VT), - SatMin, SatMax, ISD::SETLT); + SDValue Cond = + DAG.getSetCC(dl, BoolVT, LHS, DAG.getConstant(0, dl, VT), ISD::SETLT); + SatVal = DAG.getSelect(dl, VT, Cond, SatMin, SatMax); } else { SatVal = DAG.getConstant(APInt::getMaxValue(BW), dl, VT); } - Result = DAG.getSelectCC(dl, LHS, Orig, SatVal, Result, ISD::SETNE); - - return Result; + SDValue Cond = DAG.getSetCC(dl, BoolVT, LHS, Orig, ISD::SETNE); + return DAG.getSelect(dl, VT, Cond, SatVal, Result); } SDValue @@ -9665,7 +10342,7 @@ SDValue TargetLowering::expandVectorSplice(SDNode *Node, // Store the hi part of CONCAT_VECTORS(V1, V2) SDValue OffsetToV2 = DAG.getVScale( DL, PtrVT, - APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinSize())); + APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinValue())); SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, OffsetToV2); SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo); @@ -9686,9 +10363,10 @@ SDValue TargetLowering::expandVectorSplice(SDNode *Node, DAG.getConstant(TrailingElts * EltByteSize, DL, PtrVT); if (TrailingElts > VT.getVectorMinNumElements()) { - SDValue VLBytes = DAG.getVScale( - DL, PtrVT, - APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinSize())); + SDValue VLBytes = + DAG.getVScale(DL, PtrVT, + APInt(PtrVT.getFixedSizeInBits(), + VT.getStoreSize().getKnownMinValue())); TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VLBytes); } @@ -9757,7 +10435,7 @@ bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) && "If SETUE is expanded, SETOEQ or SETUNE must be legal!"); NeedInvert = true; - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::SETO: assert(TLI.isCondCodeLegal(ISD::SETOEQ, OpVT) && "If SETO is expanded, SETOEQ must be legal!"); @@ -9781,7 +10459,7 @@ bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, NeedInvert = ((unsigned)CCCode & 0x8U); break; } - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::SETOEQ: case ISD::SETOGT: case ISD::SETOGE: @@ -9802,7 +10480,7 @@ bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, break; } // Fallthrough if we are unsigned integer. - LLVM_FALLTHROUGH; + [[fallthrough]]; case ISD::SETLE: case ISD::SETGT: case ISD::SETGE: |