diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2024-01-24 19:17:23 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2024-04-19 21:24:44 +0000 |
commit | ab50317e96e57dee5b3ff4ad3f16f205b2a3359e (patch) | |
tree | 4b1f388eb6a07e574417aaacecd3ec4a83550718 /contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp | |
parent | 412542983a5ba62902141a8a7e155cceb9196a66 (diff) |
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 698 |
1 files changed, 503 insertions, 195 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index cb9ffabc4123..47c6cd6e5487 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -83,9 +83,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, const RISCVSubtarget &STI) : TargetLowering(TM), Subtarget(STI) { - if (Subtarget.isRVE()) - report_fatal_error("Codegen not yet implemented for RVE"); - RISCVABI::ABI ABI = Subtarget.getTargetABI(); assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI"); @@ -107,6 +104,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, default: report_fatal_error("Don't know how to lower this ABI"); case RISCVABI::ABI_ILP32: + case RISCVABI::ABI_ILP32E: + case RISCVABI::ABI_LP64E: case RISCVABI::ABI_ILP32F: case RISCVABI::ABI_ILP32D: case RISCVABI::ABI_LP64: @@ -688,7 +687,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS, - ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE}; + ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::EXPERIMENTAL_VP_REVERSE, + ISD::EXPERIMENTAL_VP_SPLICE}; static const unsigned IntegerVecReduceOps[] = { ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, @@ -928,7 +928,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::VP_FMINNUM, ISD::VP_FMAXNUM, ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT, - ISD::VP_FNEARBYINT, ISD::VP_SETCC}; + ISD::VP_FNEARBYINT, ISD::VP_SETCC, ISD::VP_FMINIMUM, + ISD::VP_FMAXIMUM}; // Sets common operation actions on RVV floating-point vector types. const auto SetCommonVFPActions = [&](MVT VT) { @@ -1374,8 +1375,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setPrefLoopAlignment(Subtarget.getPrefLoopAlignment()); setTargetDAGCombine({ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN, - ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::AND, - ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT}); + ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::MUL, + ISD::AND, ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT}); if (Subtarget.is64Bit()) setTargetDAGCombine(ISD::SRA); @@ -2593,11 +2594,12 @@ static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL, static SDValue getVLOp(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { - // If we know the exact VLEN, our VL is exactly equal to VLMAX, and - // we can't encode the AVL as an immediate, use the VLMAX encoding. + // If we know the exact VLEN, and our VL is exactly equal to VLMAX, + // canonicalize the representation. InsertVSETVLI will pick the immediate + // encoding later if profitable. const auto [MinVLMAX, MaxVLMAX] = RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget); - if (MinVLMAX == MaxVLMAX && NumElts == MinVLMAX && NumElts > 31) + if (MinVLMAX == MaxVLMAX && NumElts == MinVLMAX) return DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()); return DAG.getConstant(NumElts, DL, Subtarget.getXLenVT()); @@ -4040,19 +4042,23 @@ static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT && isNullConstant(Scalar.getOperand(1))) { SDValue ExtractedVal = Scalar.getOperand(0); - MVT ExtractedVT = ExtractedVal.getSimpleValueType(); - MVT ExtractedContainerVT = ExtractedVT; - if (ExtractedContainerVT.isFixedLengthVector()) { - ExtractedContainerVT = getContainerForFixedLengthVector( - DAG, ExtractedContainerVT, Subtarget); - ExtractedVal = convertToScalableVector(ExtractedContainerVT, ExtractedVal, - DAG, Subtarget); - } - if (ExtractedContainerVT.bitsLE(VT)) - return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, ExtractedVal, + // The element types must be the same. + if (ExtractedVal.getValueType().getVectorElementType() == + VT.getVectorElementType()) { + MVT ExtractedVT = ExtractedVal.getSimpleValueType(); + MVT ExtractedContainerVT = ExtractedVT; + if (ExtractedContainerVT.isFixedLengthVector()) { + ExtractedContainerVT = getContainerForFixedLengthVector( + DAG, ExtractedContainerVT, Subtarget); + ExtractedVal = convertToScalableVector(ExtractedContainerVT, + ExtractedVal, DAG, Subtarget); + } + if (ExtractedContainerVT.bitsLE(VT)) + return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, + ExtractedVal, DAG.getConstant(0, DL, XLenVT)); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtractedVal, DAG.getConstant(0, DL, XLenVT)); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtractedVal, - DAG.getConstant(0, DL, XLenVT)); + } } @@ -4646,6 +4652,85 @@ static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN, return DAG.getBitcast(VT, Rotate); } +// If compiling with an exactly known VLEN, see if we can split a +// shuffle on m2 or larger into a small number of m1 sized shuffles +// which write each destination registers exactly once. +static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN, + SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + SDLoc DL(SVN); + MVT VT = SVN->getSimpleValueType(0); + SDValue V1 = SVN->getOperand(0); + SDValue V2 = SVN->getOperand(1); + ArrayRef<int> Mask = SVN->getMask(); + unsigned NumElts = VT.getVectorNumElements(); + + // If we don't know exact data layout, not much we can do. If this + // is already m1 or smaller, no point in splitting further. + const unsigned MinVLen = Subtarget.getRealMinVLen(); + const unsigned MaxVLen = Subtarget.getRealMaxVLen(); + if (MinVLen != MaxVLen || VT.getSizeInBits().getFixedValue() <= MinVLen) + return SDValue(); + + MVT ElemVT = VT.getVectorElementType(); + unsigned ElemsPerVReg = MinVLen / ElemVT.getFixedSizeInBits(); + unsigned VRegsPerSrc = NumElts / ElemsPerVReg; + + SmallVector<std::pair<int, SmallVector<int>>> + OutMasks(VRegsPerSrc, {-1, {}}); + + // Check if our mask can be done as a 1-to-1 mapping from source + // to destination registers in the group without needing to + // write each destination more than once. + for (unsigned DstIdx = 0; DstIdx < Mask.size(); DstIdx++) { + int DstVecIdx = DstIdx / ElemsPerVReg; + int DstSubIdx = DstIdx % ElemsPerVReg; + int SrcIdx = Mask[DstIdx]; + if (SrcIdx < 0 || (unsigned)SrcIdx >= 2 * NumElts) + continue; + int SrcVecIdx = SrcIdx / ElemsPerVReg; + int SrcSubIdx = SrcIdx % ElemsPerVReg; + if (OutMasks[DstVecIdx].first == -1) + OutMasks[DstVecIdx].first = SrcVecIdx; + if (OutMasks[DstVecIdx].first != SrcVecIdx) + // Note: This case could easily be handled by keeping track of a chain + // of source values and generating two element shuffles below. This is + // less an implementation question, and more a profitability one. + return SDValue(); + + OutMasks[DstVecIdx].second.resize(ElemsPerVReg, -1); + OutMasks[DstVecIdx].second[DstSubIdx] = SrcSubIdx; + } + + EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget); + MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg); + MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget); + assert(M1VT == getLMUL1VT(M1VT)); + unsigned NumOpElts = M1VT.getVectorMinNumElements(); + SDValue Vec = DAG.getUNDEF(ContainerVT); + // The following semantically builds up a fixed length concat_vector + // of the component shuffle_vectors. We eagerly lower to scalable here + // to avoid DAG combining it back to a large shuffle_vector again. + V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget); + V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget); + for (unsigned DstVecIdx = 0 ; DstVecIdx < OutMasks.size(); DstVecIdx++) { + auto &[SrcVecIdx, SrcSubMask] = OutMasks[DstVecIdx]; + if (SrcVecIdx == -1) + continue; + unsigned ExtractIdx = (SrcVecIdx % VRegsPerSrc) * NumOpElts; + SDValue SrcVec = (unsigned)SrcVecIdx > VRegsPerSrc ? V2 : V1; + SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, SrcVec, + DAG.getVectorIdxConstant(ExtractIdx, DL)); + SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget); + SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec, SubVec, SrcSubMask); + SubVec = convertToScalableVector(M1VT, SubVec, DAG, Subtarget); + unsigned InsertIdx = DstVecIdx * NumOpElts; + Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubVec, + DAG.getVectorIdxConstant(InsertIdx, DL)); + } + return convertFromScalableVector(VT, Vec, DAG, Subtarget); +} + static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { SDValue V1 = Op.getOperand(0); @@ -4753,6 +4838,11 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, } } + // For exact VLEN m2 or greater, try to split to m1 operations if we + // can split cleanly. + if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget)) + return V; + ArrayRef<int> Mask = SVN->getMask(); if (SDValue V = @@ -4846,54 +4936,28 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, assert(!V1.isUndef() && "Unexpected shuffle canonicalization"); - SmallVector<SDValue> MaskVals; - // As a backup, shuffles can be lowered via a vrgather instruction, possibly - // merged with a second vrgather. - SmallVector<SDValue> GatherIndicesLHS, GatherIndicesRHS; - // By default we preserve the original operand order, and use a mask to // select LHS as true and RHS as false. However, since RVV vector selects may // feature splats but only on the LHS, we may choose to invert our mask and // instead select between RHS and LHS. bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1); - bool InvertMask = IsSelect == SwapOps; - - // Keep a track of which non-undef indices are used by each LHS/RHS shuffle - // half. - DenseMap<int, unsigned> LHSIndexCounts, RHSIndexCounts; - // Now construct the mask that will be used by the vselect or blended - // vrgather operation. For vrgathers, construct the appropriate indices into - // each vector. - for (int MaskIndex : Mask) { - bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ InvertMask; - MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT)); - if (!IsSelect) { - bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts; - GatherIndicesLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0 - ? DAG.getConstant(MaskIndex, DL, XLenVT) - : DAG.getUNDEF(XLenVT)); - GatherIndicesRHS.push_back( - IsLHSOrUndefIndex ? DAG.getUNDEF(XLenVT) - : DAG.getConstant(MaskIndex - NumElts, DL, XLenVT)); - if (IsLHSOrUndefIndex && MaskIndex >= 0) - ++LHSIndexCounts[MaskIndex]; - if (!IsLHSOrUndefIndex) - ++RHSIndexCounts[MaskIndex - NumElts]; + if (IsSelect) { + // Now construct the mask that will be used by the vselect operation. + SmallVector<SDValue> MaskVals; + for (int MaskIndex : Mask) { + bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ SwapOps; + MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT)); } - } - if (SwapOps) { - std::swap(V1, V2); - std::swap(GatherIndicesLHS, GatherIndicesRHS); - } - - assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle"); - MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts); - SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals); + if (SwapOps) + std::swap(V1, V2); - if (IsSelect) + assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle"); + MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts); + SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals); return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2); + } // We might be able to express the shuffle as a bitrotate. But even if we // don't have Zvkb and have to expand, the expanded sequence of approx. 2 @@ -4909,6 +4973,43 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, return SDValue(); } + // As a backup, shuffles can be lowered via a vrgather instruction, possibly + // merged with a second vrgather. + SmallVector<SDValue> GatherIndicesLHS, GatherIndicesRHS; + + // Keep a track of which non-undef indices are used by each LHS/RHS shuffle + // half. + DenseMap<int, unsigned> LHSIndexCounts, RHSIndexCounts; + + SmallVector<SDValue> MaskVals; + + // Now construct the mask that will be used by the blended vrgather operation. + // Cconstruct the appropriate indices into each vector. + for (int MaskIndex : Mask) { + bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps; + MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT)); + bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts; + GatherIndicesLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0 + ? DAG.getConstant(MaskIndex, DL, XLenVT) + : DAG.getUNDEF(XLenVT)); + GatherIndicesRHS.push_back( + IsLHSOrUndefIndex ? DAG.getUNDEF(XLenVT) + : DAG.getConstant(MaskIndex - NumElts, DL, XLenVT)); + if (IsLHSOrUndefIndex && MaskIndex >= 0) + ++LHSIndexCounts[MaskIndex]; + if (!IsLHSOrUndefIndex) + ++RHSIndexCounts[MaskIndex - NumElts]; + } + + if (SwapOps) { + std::swap(V1, V2); + std::swap(GatherIndicesLHS, GatherIndicesRHS); + } + + assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle"); + MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts); + SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals); + unsigned GatherVXOpc = RISCVISD::VRGATHER_VX_VL; unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL; MVT IndexVT = VT.changeTypeToInteger(); @@ -4932,56 +5033,60 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, MVT IndexContainerVT = ContainerVT.changeVectorElementType(IndexVT.getScalarType()); - SDValue Gather; - // TODO: This doesn't trigger for i64 vectors on RV32, since there we - // encounter a bitcasted BUILD_VECTOR with low/high i32 values. - if (SDValue SplatValue = DAG.getSplatValue(V1, /*LegalTypes*/ true)) { - Gather = lowerScalarSplat(SDValue(), SplatValue, VL, ContainerVT, DL, DAG, - Subtarget); - } else { + // Base case for the recursion just below - handle the worst case + // single source permutation. Note that all the splat variants + // are handled above. + if (V2.isUndef()) { V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget); - // If only one index is used, we can use a "splat" vrgather. - // TODO: We can splat the most-common index and fix-up any stragglers, if - // that's beneficial. - if (LHSIndexCounts.size() == 1) { - int SplatIndex = LHSIndexCounts.begin()->getFirst(); - Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V1, - DAG.getConstant(SplatIndex, DL, XLenVT), - DAG.getUNDEF(ContainerVT), TrueMask, VL); - } else { - SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS); - LHSIndices = - convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget); - - Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices, - DAG.getUNDEF(ContainerVT), TrueMask, VL); + SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS); + LHSIndices = convertToScalableVector(IndexContainerVT, LHSIndices, DAG, + Subtarget); + SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices, + DAG.getUNDEF(ContainerVT), TrueMask, VL); + return convertFromScalableVector(VT, Gather, DAG, Subtarget); + } + + // Translate the gather index we computed above (and possibly swapped) + // back to a shuffle mask. This step should disappear once we complete + // the migration to recursive design. + SmallVector<int> ShuffleMaskLHS; + ShuffleMaskLHS.reserve(GatherIndicesLHS.size()); + for (SDValue GatherIndex : GatherIndicesLHS) { + if (GatherIndex.isUndef()) { + ShuffleMaskLHS.push_back(-1); + continue; } + auto *IdxC = cast<ConstantSDNode>(GatherIndex); + ShuffleMaskLHS.push_back(IdxC->getZExtValue()); } - // If a second vector operand is used by this shuffle, blend it in with an - // additional vrgather. - if (!V2.isUndef()) { - V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget); + // Recursively invoke lowering for the LHS as if there were no RHS. + // This allows us to leverage all of our single source permute tricks. + SDValue Gather = + DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS); + Gather = convertToScalableVector(ContainerVT, Gather, DAG, Subtarget); - MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1); - SelectMask = - convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget); + // Blend in second vector source with an additional vrgather. + V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget); - // If only one index is used, we can use a "splat" vrgather. - // TODO: We can splat the most-common index and fix-up any stragglers, if - // that's beneficial. - if (RHSIndexCounts.size() == 1) { - int SplatIndex = RHSIndexCounts.begin()->getFirst(); - Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2, - DAG.getConstant(SplatIndex, DL, XLenVT), Gather, - SelectMask, VL); - } else { - SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS); - RHSIndices = - convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget); - Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices, Gather, - SelectMask, VL); - } + MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1); + SelectMask = + convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget); + + // If only one index is used, we can use a "splat" vrgather. + // TODO: We can splat the most-common index and fix-up any stragglers, if + // that's beneficial. + if (RHSIndexCounts.size() == 1) { + int SplatIndex = RHSIndexCounts.begin()->getFirst(); + Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2, + DAG.getConstant(SplatIndex, DL, XLenVT), Gather, + SelectMask, VL); + } else { + SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS); + RHSIndices = + convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget); + Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices, Gather, + SelectMask, VL); } return convertFromScalableVector(VT, Gather, DAG, Subtarget); @@ -5401,7 +5506,16 @@ static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget); } - auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); + SDValue Mask, VL; + if (Op->isVPOpcode()) { + Mask = Op.getOperand(2); + if (VT.isFixedLengthVector()) + Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG, + Subtarget); + VL = Op.getOperand(3); + } else { + std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget); + } SDValue NewY = Y; if (!XIsNeverNan) { @@ -5422,7 +5536,9 @@ static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG, } unsigned Opc = - Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::VFMAX_VL : RISCVISD::VFMIN_VL; + Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM + ? RISCVISD::VFMAX_VL + : RISCVISD::VFMIN_VL; SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY, DAG.getUNDEF(ContainerVT), Mask, VL); if (VT.isFixedLengthVector()) @@ -6651,6 +6767,13 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, !Subtarget.hasVInstructionsF16())) return SplitVPOp(Op, DAG); return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget); + case ISD::VP_FMAXIMUM: + case ISD::VP_FMINIMUM: + if (Op.getValueType() == MVT::nxv32f16 && + (Subtarget.hasVInstructionsF16Minimal() && + !Subtarget.hasVInstructionsF16())) + return SplitVPOp(Op, DAG); + return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget); case ISD::EXPERIMENTAL_VP_SPLICE: return lowerVPSpliceExperimental(Op, DAG); case ISD::EXPERIMENTAL_VP_REVERSE: @@ -6859,6 +6982,23 @@ SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N, return LowerCallTo(CLI).first; } +SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N, + SelectionDAG &DAG) const { + SDLoc DL(N); + EVT Ty = getPointerTy(DAG.getDataLayout()); + const GlobalValue *GV = N->getGlobal(); + + // Use a PC-relative addressing mode to access the global dynamic GOT address. + // This generates the pattern (PseudoLA_TLSDESC sym), which expands to + // + // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol) + // lw tY, tX, %tlsdesc_lo_load(label) // R_RISCV_TLSDESC_LOAD_LO12_I(label) + // addi a0, tX, %tlsdesc_lo_add(label) // R_RISCV_TLSDESC_ADD_LO12_I(label) + // jalr t0, tY // R_RISCV_TLSDESC_CALL(label) + SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0); + return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0); +} + SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); @@ -6883,7 +7023,8 @@ SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op, break; case TLSModel::LocalDynamic: case TLSModel::GeneralDynamic: - Addr = getDynamicTLSAddr(N, DAG); + Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG) + : getDynamicTLSAddr(N, DAG); break; } @@ -8500,6 +8641,9 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal, Vec, DAG.getUNDEF(VT), VL); } + case Intrinsic::riscv_vfmv_s_f: + return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getSimpleValueType(), + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); // EGS * EEW >= 128 bits case Intrinsic::riscv_vaesdf_vv: case Intrinsic::riscv_vaesdf_vs: @@ -12762,14 +12906,14 @@ static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &D SDValue N0 = N.getOperand(0); if (N0.getOpcode() != ISD::ZERO_EXTEND && N0.getOpcode() != RISCVISD::VZEXT_VL) - return false;; + return false; if (!N0->hasOneUse()) - return false;; + return false; APInt ShAmt; SDValue N1 = N.getOperand(1); if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt)) - return false;; + return false; SDValue Src = N0.getOperand(0); EVT SrcVT = Src.getValueType(); @@ -12863,9 +13007,9 @@ struct CombineResult; /// Helper class for folding sign/zero extensions. /// In particular, this class is used for the following combines: -/// add_vl -> vwadd(u) | vwadd(u)_w -/// sub_vl -> vwsub(u) | vwsub(u)_w -/// mul_vl -> vwmul(u) | vwmul_su +/// add | add_vl -> vwadd(u) | vwadd(u)_w +/// sub | sub_vl -> vwsub(u) | vwsub(u)_w +/// mul | mul_vl -> vwmul(u) | vwmul_su /// /// An object of this class represents an operand of the operation we want to /// combine. @@ -12910,6 +13054,8 @@ struct NodeExtensionHelper { /// E.g., for zext(a), this would return a. SDValue getSource() const { switch (OrigOperand.getOpcode()) { + case ISD::ZERO_EXTEND: + case ISD::SIGN_EXTEND: case RISCVISD::VSEXT_VL: case RISCVISD::VZEXT_VL: return OrigOperand.getOperand(0); @@ -12926,7 +13072,8 @@ struct NodeExtensionHelper { /// Get or create a value that can feed \p Root with the given extension \p /// SExt. If \p SExt is std::nullopt, this returns the source of this operand. /// \see ::getSource(). - SDValue getOrCreateExtendedOp(const SDNode *Root, SelectionDAG &DAG, + SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget, std::optional<bool> SExt) const { if (!SExt.has_value()) return OrigOperand; @@ -12941,8 +13088,10 @@ struct NodeExtensionHelper { // If we need an extension, we should be changing the type. SDLoc DL(Root); - auto [Mask, VL] = getMaskAndVL(Root); + auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget); switch (OrigOperand.getOpcode()) { + case ISD::ZERO_EXTEND: + case ISD::SIGN_EXTEND: case RISCVISD::VSEXT_VL: case RISCVISD::VZEXT_VL: return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL); @@ -12982,12 +13131,15 @@ struct NodeExtensionHelper { /// \pre \p Opcode represents a supported root (\see ::isSupportedRoot()). static unsigned getSameExtensionOpcode(unsigned Opcode, bool IsSExt) { switch (Opcode) { + case ISD::ADD: case RISCVISD::ADD_VL: case RISCVISD::VWADD_W_VL: case RISCVISD::VWADDU_W_VL: return IsSExt ? RISCVISD::VWADD_VL : RISCVISD::VWADDU_VL; + case ISD::MUL: case RISCVISD::MUL_VL: return IsSExt ? RISCVISD::VWMUL_VL : RISCVISD::VWMULU_VL; + case ISD::SUB: case RISCVISD::SUB_VL: case RISCVISD::VWSUB_W_VL: case RISCVISD::VWSUBU_W_VL: @@ -13000,7 +13152,8 @@ struct NodeExtensionHelper { /// Get the opcode to materialize \p Opcode(sext(a), zext(b)) -> /// newOpcode(a, b). static unsigned getSUOpcode(unsigned Opcode) { - assert(Opcode == RISCVISD::MUL_VL && "SU is only supported for MUL"); + assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) && + "SU is only supported for MUL"); return RISCVISD::VWMULSU_VL; } @@ -13008,8 +13161,10 @@ struct NodeExtensionHelper { /// newOpcode(a, b). static unsigned getWOpcode(unsigned Opcode, bool IsSExt) { switch (Opcode) { + case ISD::ADD: case RISCVISD::ADD_VL: return IsSExt ? RISCVISD::VWADD_W_VL : RISCVISD::VWADDU_W_VL; + case ISD::SUB: case RISCVISD::SUB_VL: return IsSExt ? RISCVISD::VWSUB_W_VL : RISCVISD::VWSUBU_W_VL; default: @@ -13019,19 +13174,49 @@ struct NodeExtensionHelper { using CombineToTry = std::function<std::optional<CombineResult>( SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/, - const NodeExtensionHelper & /*RHS*/)>; + const NodeExtensionHelper & /*RHS*/, SelectionDAG &, + const RISCVSubtarget &)>; /// Check if this node needs to be fully folded or extended for all users. bool needToPromoteOtherUsers() const { return EnforceOneUse; } /// Helper method to set the various fields of this struct based on the /// type of \p Root. - void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG) { + void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { SupportsZExt = false; SupportsSExt = false; EnforceOneUse = true; CheckMask = true; - switch (OrigOperand.getOpcode()) { + unsigned Opc = OrigOperand.getOpcode(); + switch (Opc) { + case ISD::ZERO_EXTEND: + case ISD::SIGN_EXTEND: { + MVT VT = OrigOperand.getSimpleValueType(); + if (!VT.isVector()) + break; + + SDValue NarrowElt = OrigOperand.getOperand(0); + MVT NarrowVT = NarrowElt.getSimpleValueType(); + + unsigned ScalarBits = VT.getScalarSizeInBits(); + unsigned NarrowScalarBits = NarrowVT.getScalarSizeInBits(); + + // Ensure the narrowing element type is legal + if (!Subtarget.getTargetLowering()->isTypeLegal(NarrowElt.getValueType())) + break; + + // Ensure the extension's semantic is equivalent to rvv vzext or vsext. + if (ScalarBits != NarrowScalarBits * 2) + break; + + SupportsZExt = Opc == ISD::ZERO_EXTEND; + SupportsSExt = Opc == ISD::SIGN_EXTEND; + + SDLoc DL(Root); + std::tie(Mask, VL) = getDefaultScalableVLOps(VT, DL, DAG, Subtarget); + break; + } case RISCVISD::VZEXT_VL: SupportsZExt = true; Mask = OrigOperand.getOperand(1); @@ -13087,8 +13272,16 @@ struct NodeExtensionHelper { } /// Check if \p Root supports any extension folding combines. - static bool isSupportedRoot(const SDNode *Root) { + static bool isSupportedRoot(const SDNode *Root, const SelectionDAG &DAG) { switch (Root->getOpcode()) { + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!TLI.isTypeLegal(Root->getValueType(0))) + return false; + return Root->getValueType(0).isScalableVector(); + } case RISCVISD::ADD_VL: case RISCVISD::MUL_VL: case RISCVISD::VWADD_W_VL: @@ -13103,9 +13296,10 @@ struct NodeExtensionHelper { } /// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx). - NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG) { - assert(isSupportedRoot(Root) && "Trying to build an helper with an " - "unsupported root"); + NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + assert(isSupportedRoot(Root, DAG) && "Trying to build an helper with an " + "unsupported root"); assert(OperandIdx < 2 && "Requesting something else than LHS or RHS"); OrigOperand = Root->getOperand(OperandIdx); @@ -13121,7 +13315,7 @@ struct NodeExtensionHelper { SupportsZExt = Opc == RISCVISD::VWADDU_W_VL || Opc == RISCVISD::VWSUBU_W_VL; SupportsSExt = !SupportsZExt; - std::tie(Mask, VL) = getMaskAndVL(Root); + std::tie(Mask, VL) = getMaskAndVL(Root, DAG, Subtarget); CheckMask = true; // There's no existing extension here, so we don't have to worry about // making sure it gets removed. @@ -13130,7 +13324,7 @@ struct NodeExtensionHelper { } [[fallthrough]]; default: - fillUpExtensionSupport(Root, DAG); + fillUpExtensionSupport(Root, DAG, Subtarget); break; } } @@ -13146,14 +13340,27 @@ struct NodeExtensionHelper { } /// Helper function to get the Mask and VL from \p Root. - static std::pair<SDValue, SDValue> getMaskAndVL(const SDNode *Root) { - assert(isSupportedRoot(Root) && "Unexpected root"); - return std::make_pair(Root->getOperand(3), Root->getOperand(4)); + static std::pair<SDValue, SDValue> + getMaskAndVL(const SDNode *Root, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + assert(isSupportedRoot(Root, DAG) && "Unexpected root"); + switch (Root->getOpcode()) { + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: { + SDLoc DL(Root); + MVT VT = Root->getSimpleValueType(0); + return getDefaultScalableVLOps(VT, DL, DAG, Subtarget); + } + default: + return std::make_pair(Root->getOperand(3), Root->getOperand(4)); + } } /// Check if the Mask and VL of this operand are compatible with \p Root. - bool areVLAndMaskCompatible(const SDNode *Root) const { - auto [Mask, VL] = getMaskAndVL(Root); + bool areVLAndMaskCompatible(SDNode *Root, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) const { + auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget); return isMaskCompatible(Mask) && isVLCompatible(VL); } @@ -13161,11 +13368,14 @@ struct NodeExtensionHelper { /// foldings that are supported by this class. static bool isCommutative(const SDNode *N) { switch (N->getOpcode()) { + case ISD::ADD: + case ISD::MUL: case RISCVISD::ADD_VL: case RISCVISD::MUL_VL: case RISCVISD::VWADD_W_VL: case RISCVISD::VWADDU_W_VL: return true; + case ISD::SUB: case RISCVISD::SUB_VL: case RISCVISD::VWSUB_W_VL: case RISCVISD::VWSUBU_W_VL: @@ -13210,14 +13420,25 @@ struct CombineResult { /// Return a value that uses TargetOpcode and that can be used to replace /// Root. /// The actual replacement is *not* done in that method. - SDValue materialize(SelectionDAG &DAG) const { + SDValue materialize(SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) const { SDValue Mask, VL, Merge; - std::tie(Mask, VL) = NodeExtensionHelper::getMaskAndVL(Root); - Merge = Root->getOperand(2); + std::tie(Mask, VL) = + NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget); + switch (Root->getOpcode()) { + default: + Merge = Root->getOperand(2); + break; + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: + Merge = DAG.getUNDEF(Root->getValueType(0)); + break; + } return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0), - LHS.getOrCreateExtendedOp(Root, DAG, SExtLHS), - RHS.getOrCreateExtendedOp(Root, DAG, SExtRHS), Merge, - Mask, VL); + LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, SExtLHS), + RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, SExtRHS), + Merge, Mask, VL); } }; @@ -13234,15 +13455,16 @@ struct CombineResult { static std::optional<CombineResult> canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS, const NodeExtensionHelper &RHS, bool AllowSExt, - bool AllowZExt) { + bool AllowZExt, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { assert((AllowSExt || AllowZExt) && "Forgot to set what you want?"); - if (!LHS.areVLAndMaskCompatible(Root) || !RHS.areVLAndMaskCompatible(Root)) + if (!LHS.areVLAndMaskCompatible(Root, DAG, Subtarget) || + !RHS.areVLAndMaskCompatible(Root, DAG, Subtarget)) return std::nullopt; if (AllowZExt && LHS.SupportsZExt && RHS.SupportsZExt) return CombineResult(NodeExtensionHelper::getSameExtensionOpcode( Root->getOpcode(), /*IsSExt=*/false), - Root, LHS, /*SExtLHS=*/false, RHS, - /*SExtRHS=*/false); + Root, LHS, /*SExtLHS=*/false, RHS, /*SExtRHS=*/false); if (AllowSExt && LHS.SupportsSExt && RHS.SupportsSExt) return CombineResult(NodeExtensionHelper::getSameExtensionOpcode( Root->getOpcode(), /*IsSExt=*/true), @@ -13259,9 +13481,10 @@ canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS, /// can be used to apply the pattern. static std::optional<CombineResult> canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS, - const NodeExtensionHelper &RHS) { + const NodeExtensionHelper &RHS, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/true, - /*AllowZExt=*/true); + /*AllowZExt=*/true, DAG, Subtarget); } /// Check if \p Root follows a pattern Root(LHS, ext(RHS)) @@ -13270,8 +13493,9 @@ canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS, /// can be used to apply the pattern. static std::optional<CombineResult> canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS, - const NodeExtensionHelper &RHS) { - if (!RHS.areVLAndMaskCompatible(Root)) + const NodeExtensionHelper &RHS, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + if (!RHS.areVLAndMaskCompatible(Root, DAG, Subtarget)) return std::nullopt; // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar @@ -13295,9 +13519,10 @@ canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS, /// can be used to apply the pattern. static std::optional<CombineResult> canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS, - const NodeExtensionHelper &RHS) { + const NodeExtensionHelper &RHS, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/true, - /*AllowZExt=*/false); + /*AllowZExt=*/false, DAG, Subtarget); } /// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS)) @@ -13306,9 +13531,10 @@ canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS, /// can be used to apply the pattern. static std::optional<CombineResult> canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS, - const NodeExtensionHelper &RHS) { + const NodeExtensionHelper &RHS, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/false, - /*AllowZExt=*/true); + /*AllowZExt=*/true, DAG, Subtarget); } /// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS)) @@ -13317,10 +13543,13 @@ canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS, /// can be used to apply the pattern. static std::optional<CombineResult> canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS, - const NodeExtensionHelper &RHS) { + const NodeExtensionHelper &RHS, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + if (!LHS.SupportsSExt || !RHS.SupportsZExt) return std::nullopt; - if (!LHS.areVLAndMaskCompatible(Root) || !RHS.areVLAndMaskCompatible(Root)) + if (!LHS.areVLAndMaskCompatible(Root, DAG, Subtarget) || + !RHS.areVLAndMaskCompatible(Root, DAG, Subtarget)) return std::nullopt; return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()), Root, LHS, /*SExtLHS=*/true, RHS, /*SExtRHS=*/false); @@ -13330,6 +13559,8 @@ SmallVector<NodeExtensionHelper::CombineToTry> NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) { SmallVector<CombineToTry> Strategies; switch (Root->getOpcode()) { + case ISD::ADD: + case ISD::SUB: case RISCVISD::ADD_VL: case RISCVISD::SUB_VL: // add|sub -> vwadd(u)|vwsub(u) @@ -13337,6 +13568,7 @@ NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) { // add|sub -> vwadd(u)_w|vwsub(u)_w Strategies.push_back(canFoldToVW_W); break; + case ISD::MUL: case RISCVISD::MUL_VL: // mul -> vwmul(u) Strategies.push_back(canFoldToVWWithSameExtension); @@ -13367,12 +13599,14 @@ NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) { /// mul_vl -> vwmul(u) | vwmul_su /// vwadd_w(u) -> vwadd(u) /// vwub_w(u) -> vwadd(u) -static SDValue -combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { +static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const RISCVSubtarget &Subtarget) { SelectionDAG &DAG = DCI.DAG; - assert(NodeExtensionHelper::isSupportedRoot(N) && - "Shouldn't have called this method"); + if (!NodeExtensionHelper::isSupportedRoot(N, DAG)) + return SDValue(); + SmallVector<SDNode *> Worklist; SmallSet<SDNode *, 8> Inserted; Worklist.push_back(N); @@ -13381,11 +13615,11 @@ combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { while (!Worklist.empty()) { SDNode *Root = Worklist.pop_back_val(); - if (!NodeExtensionHelper::isSupportedRoot(Root)) + if (!NodeExtensionHelper::isSupportedRoot(Root, DAG)) return SDValue(); - NodeExtensionHelper LHS(N, 0, DAG); - NodeExtensionHelper RHS(N, 1, DAG); + NodeExtensionHelper LHS(N, 0, DAG, Subtarget); + NodeExtensionHelper RHS(N, 1, DAG, Subtarget); auto AppendUsersIfNeeded = [&Worklist, &Inserted](const NodeExtensionHelper &Op) { if (Op.needToPromoteOtherUsers()) { @@ -13412,7 +13646,8 @@ combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { for (NodeExtensionHelper::CombineToTry FoldingStrategy : FoldingStrategies) { - std::optional<CombineResult> Res = FoldingStrategy(N, LHS, RHS); + std::optional<CombineResult> Res = + FoldingStrategy(N, LHS, RHS, DAG, Subtarget); if (Res) { Matched = true; CombinesToApply.push_back(*Res); @@ -13441,7 +13676,7 @@ combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SmallVector<std::pair<SDValue, SDValue>> ValuesToReplace; ValuesToReplace.reserve(CombinesToApply.size()); for (CombineResult Res : CombinesToApply) { - SDValue NewValue = Res.materialize(DAG); + SDValue NewValue = Res.materialize(DAG, Subtarget); if (!InputRootReplacement) { assert(Res.Root == N && "First element is expected to be the current node"); @@ -14713,13 +14948,20 @@ static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG, static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { - assert(N->getOpcode() == RISCVISD::ADD_VL); + + assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD); + + if (N->getValueType(0).isFixedLengthVector()) + return SDValue(); + SDValue Addend = N->getOperand(0); SDValue MulOp = N->getOperand(1); - SDValue AddMergeOp = N->getOperand(2); - if (!AddMergeOp.isUndef()) - return SDValue(); + if (N->getOpcode() == RISCVISD::ADD_VL) { + SDValue AddMergeOp = N->getOperand(2); + if (!AddMergeOp.isUndef()) + return SDValue(); + } auto IsVWMulOpc = [](unsigned Opc) { switch (Opc) { @@ -14743,8 +14985,16 @@ static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG, if (!MulMergeOp.isUndef()) return SDValue(); - SDValue AddMask = N->getOperand(3); - SDValue AddVL = N->getOperand(4); + auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + if (N->getOpcode() == ISD::ADD) { + SDLoc DL(N); + return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG, + Subtarget); + } + return std::make_pair(N->getOperand(3), N->getOperand(4)); + }(N, DAG, Subtarget); + SDValue MulMask = MulOp.getOperand(3); SDValue MulVL = MulOp.getOperand(4); @@ -15010,10 +15260,18 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, return DAG.getNode(ISD::AND, DL, VT, NewFMV, DAG.getConstant(~SignBit, DL, VT)); } - case ISD::ADD: + case ISD::ADD: { + if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) + return V; + if (SDValue V = combineToVWMACC(N, DAG, Subtarget)) + return V; return performADDCombine(N, DAG, Subtarget); - case ISD::SUB: + } + case ISD::SUB: { + if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) + return V; return performSUBCombine(N, DAG, Subtarget); + } case ISD::AND: return performANDCombine(N, DCI, Subtarget); case ISD::OR: @@ -15021,6 +15279,8 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, case ISD::XOR: return performXORCombine(N, DAG, Subtarget); case ISD::MUL: + if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) + return V; return performMULCombine(N, DAG); case ISD::FADD: case ISD::UMAX: @@ -15497,7 +15757,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, break; } case RISCVISD::ADD_VL: - if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI)) + if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget)) return V; return combineToVWMACC(N, DAG, Subtarget); case RISCVISD::SUB_VL: @@ -15506,7 +15766,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, case RISCVISD::VWSUB_W_VL: case RISCVISD::VWSUBU_W_VL: case RISCVISD::MUL_VL: - return combineBinOp_VLToVWBinOp_VL(N, DCI); + return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget); case RISCVISD::VFMADD_VL: case RISCVISD::VFNMADD_VL: case RISCVISD::VFMSUB_VL: @@ -15729,6 +15989,17 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, break; } + case RISCVISD::VMV_X_S: { + SDValue Vec = N->getOperand(0); + MVT VecVT = N->getOperand(0).getSimpleValueType(); + const MVT M1VT = getLMUL1VT(VecVT); + if (M1VT.bitsLT(VecVT)) { + Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec, + DAG.getVectorIdxConstant(0, DL)); + return DAG.getNode(RISCVISD::VMV_X_S, DL, N->getSimpleValueType(0), Vec); + } + break; + } case ISD::INTRINSIC_VOID: case ISD::INTRINSIC_W_CHAIN: case ISD::INTRINSIC_WO_CHAIN: { @@ -17047,12 +17318,39 @@ static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4, RISCV::V20M4}; static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8}; -ArrayRef<MCPhysReg> RISCV::getArgGPRs() { - static const MCPhysReg ArgGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12, - RISCV::X13, RISCV::X14, RISCV::X15, - RISCV::X16, RISCV::X17}; +ArrayRef<MCPhysReg> RISCV::getArgGPRs(const RISCVABI::ABI ABI) { + // The GPRs used for passing arguments in the ILP32* and LP64* ABIs, except + // the ILP32E ABI. + static const MCPhysReg ArgIGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12, + RISCV::X13, RISCV::X14, RISCV::X15, + RISCV::X16, RISCV::X17}; + // The GPRs used for passing arguments in the ILP32E/ILP64E ABI. + static const MCPhysReg ArgEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12, + RISCV::X13, RISCV::X14, RISCV::X15}; + + if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E) + return ArrayRef(ArgEGPRs); - return ArrayRef(ArgGPRs); + return ArrayRef(ArgIGPRs); +} + +static ArrayRef<MCPhysReg> getFastCCArgGPRs(const RISCVABI::ABI ABI) { + // The GPRs used for passing arguments in the FastCC, X5 and X6 might be used + // for save-restore libcall, so we don't use them. + static const MCPhysReg FastCCIGPRs[] = { + RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, + RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28, + RISCV::X29, RISCV::X30, RISCV::X31}; + + // The GPRs used for passing arguments in the FastCC when using ILP32E/ILP64E. + static const MCPhysReg FastCCEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12, + RISCV::X13, RISCV::X14, RISCV::X15, + RISCV::X7}; + + if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E) + return ArrayRef(FastCCEGPRs); + + return ArrayRef(FastCCIGPRs); } // Pass a 2*XLEN argument that has been split into two XLEN values through @@ -17060,17 +17358,23 @@ ArrayRef<MCPhysReg> RISCV::getArgGPRs() { static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1, ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2, MVT ValVT2, MVT LocVT2, - ISD::ArgFlagsTy ArgFlags2) { + ISD::ArgFlagsTy ArgFlags2, bool EABI) { unsigned XLenInBytes = XLen / 8; - ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs(); + const RISCVSubtarget &STI = + State.getMachineFunction().getSubtarget<RISCVSubtarget>(); + ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs(STI.getTargetABI()); + if (Register Reg = State.AllocateReg(ArgGPRs)) { // At least one half can be passed via register. State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg, VA1.getLocVT(), CCValAssign::Full)); } else { // Both halves must be passed on the stack, with proper alignment. - Align StackAlign = - std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign()); + // TODO: To be compatible with GCC's behaviors, we force them to have 4-byte + // alignment. This behavior may be changed when RV32E/ILP32E is ratified. + Align StackAlign(XLenInBytes); + if (!EABI || XLen != 32) + StackAlign = std::max(StackAlign, ArgFlags1.getNonZeroOrigAlign()); State.addLoc( CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(), State.AllocateStack(XLenInBytes, StackAlign), @@ -17151,7 +17455,9 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, default: llvm_unreachable("Unexpected ABI"); case RISCVABI::ABI_ILP32: + case RISCVABI::ABI_ILP32E: case RISCVABI::ABI_LP64: + case RISCVABI::ABI_LP64E: break; case RISCVABI::ABI_ILP32F: case RISCVABI::ABI_LP64F: @@ -17183,7 +17489,7 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, LocInfo = CCValAssign::BCvt; } - ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs(); + ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs(ABI); // If this is a variadic argument, the RISC-V calling convention requires // that it is assigned an 'even' or 'aligned' register if it has 8-byte @@ -17192,9 +17498,13 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, // legalisation or not. The argument will not be passed by registers if the // original type is larger than 2*XLEN, so the register alignment rule does // not apply. + // TODO: To be compatible with GCC's behaviors, we don't align registers + // currently if we are using ILP32E calling convention. This behavior may be + // changed when RV32E/ILP32E is ratified. unsigned TwoXLenInBytes = (2 * XLen) / 8; if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes && - DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) { + DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes && + ABI != RISCVABI::ABI_ILP32E) { unsigned RegIdx = State.getFirstUnallocated(ArgGPRs); // Skip 'odd' register if necessary. if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1) @@ -17267,8 +17577,9 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo, ISD::ArgFlagsTy AF = PendingArgFlags[0]; PendingLocs.clear(); PendingArgFlags.clear(); - return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT, - ArgFlags); + return CC_RISCVAssign2XLen( + XLen, State, VA, AF, ValNo, ValVT, LocVT, ArgFlags, + ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E); } // Allocate to a register if possible, or else a stack slot. @@ -17594,15 +17905,8 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, bool IsFixed, bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI, std::optional<unsigned> FirstMaskArgument) { - - // X5 and X6 might be used for save-restore libcall. - static const MCPhysReg GPRList[] = { - RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, - RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28, - RISCV::X29, RISCV::X30, RISCV::X31}; - if (LocVT == MVT::i32 || LocVT == MVT::i64) { - if (unsigned Reg = State.AllocateReg(GPRList)) { + if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; } @@ -17653,7 +17957,7 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, (LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) || (LocVT == MVT::f64 && Subtarget.is64Bit() && Subtarget.hasStdExtZdinx())) { - if (unsigned Reg = State.AllocateReg(GPRList)) { + if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) { State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); return false; } @@ -17687,7 +17991,7 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI, State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); } else { // Try and pass the address via a "fast" GPR. - if (unsigned GPRReg = State.AllocateReg(GPRList)) { + if (unsigned GPRReg = State.AllocateReg(getFastCCArgGPRs(ABI))) { LocInfo = CCValAssign::Indirect; LocVT = TLI.getSubtarget().getXLenVT(); State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo)); @@ -17788,6 +18092,8 @@ SDValue RISCVTargetLowering::LowerFormalArguments( case CallingConv::GRAAL: break; case CallingConv::GHC: + if (Subtarget.isRVE()) + report_fatal_error("GHC calling convention is not supported on RVE!"); if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx()) report_fatal_error("GHC calling convention requires the (Zfinx/F) and " "(Zdinx/D) instruction set extensions"); @@ -17870,7 +18176,7 @@ SDValue RISCVTargetLowering::LowerFormalArguments( MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall(); if (IsVarArg) { - ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(); + ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI()); unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs); const TargetRegisterClass *RC = &RISCV::GPRRegClass; MachineFrameInfo &MFI = MF.getFrameInfo(); @@ -18023,9 +18329,11 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI, SmallVector<CCValAssign, 16> ArgLocs; CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); - if (CallConv == CallingConv::GHC) + if (CallConv == CallingConv::GHC) { + if (Subtarget.isRVE()) + report_fatal_error("GHC calling convention is not supported on RVE!"); ArgCCInfo.AnalyzeCallOperands(Outs, RISCV::CC_RISCV_GHC); - else + } else analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI, CallConv == CallingConv::Fast ? RISCV::CC_RISCV_FastCC : RISCV::CC_RISCV); |