aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2024-01-24 19:17:23 +0000
committerDimitry Andric <dim@FreeBSD.org>2024-04-19 21:24:44 +0000
commitab50317e96e57dee5b3ff4ad3f16f205b2a3359e (patch)
tree4b1f388eb6a07e574417aaacecd3ec4a83550718 /contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
parent412542983a5ba62902141a8a7e155cceb9196a66 (diff)
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp698
1 files changed, 503 insertions, 195 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index cb9ffabc4123..47c6cd6e5487 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -83,9 +83,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
const RISCVSubtarget &STI)
: TargetLowering(TM), Subtarget(STI) {
- if (Subtarget.isRVE())
- report_fatal_error("Codegen not yet implemented for RVE");
-
RISCVABI::ABI ABI = Subtarget.getTargetABI();
assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
@@ -107,6 +104,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
default:
report_fatal_error("Don't know how to lower this ABI");
case RISCVABI::ABI_ILP32:
+ case RISCVABI::ABI_ILP32E:
+ case RISCVABI::ABI_LP64E:
case RISCVABI::ABI_ILP32F:
case RISCVABI::ABI_ILP32D:
case RISCVABI::ABI_LP64:
@@ -688,7 +687,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,
ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,
ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,
- ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE};
+ ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::EXPERIMENTAL_VP_REVERSE,
+ ISD::EXPERIMENTAL_VP_SPLICE};
static const unsigned IntegerVecReduceOps[] = {
ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR,
@@ -928,7 +928,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::VP_FMINNUM, ISD::VP_FMAXNUM, ISD::VP_FCEIL,
ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN,
ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT,
- ISD::VP_FNEARBYINT, ISD::VP_SETCC};
+ ISD::VP_FNEARBYINT, ISD::VP_SETCC, ISD::VP_FMINIMUM,
+ ISD::VP_FMAXIMUM};
// Sets common operation actions on RVV floating-point vector types.
const auto SetCommonVFPActions = [&](MVT VT) {
@@ -1374,8 +1375,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());
setTargetDAGCombine({ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN,
- ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::AND,
- ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT});
+ ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::MUL,
+ ISD::AND, ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT});
if (Subtarget.is64Bit())
setTargetDAGCombine(ISD::SRA);
@@ -2593,11 +2594,12 @@ static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,
static SDValue getVLOp(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,
SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {
- // If we know the exact VLEN, our VL is exactly equal to VLMAX, and
- // we can't encode the AVL as an immediate, use the VLMAX encoding.
+ // If we know the exact VLEN, and our VL is exactly equal to VLMAX,
+ // canonicalize the representation. InsertVSETVLI will pick the immediate
+ // encoding later if profitable.
const auto [MinVLMAX, MaxVLMAX] =
RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
- if (MinVLMAX == MaxVLMAX && NumElts == MinVLMAX && NumElts > 31)
+ if (MinVLMAX == MaxVLMAX && NumElts == MinVLMAX)
return DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());
return DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());
@@ -4040,19 +4042,23 @@ static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT,
if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
isNullConstant(Scalar.getOperand(1))) {
SDValue ExtractedVal = Scalar.getOperand(0);
- MVT ExtractedVT = ExtractedVal.getSimpleValueType();
- MVT ExtractedContainerVT = ExtractedVT;
- if (ExtractedContainerVT.isFixedLengthVector()) {
- ExtractedContainerVT = getContainerForFixedLengthVector(
- DAG, ExtractedContainerVT, Subtarget);
- ExtractedVal = convertToScalableVector(ExtractedContainerVT, ExtractedVal,
- DAG, Subtarget);
- }
- if (ExtractedContainerVT.bitsLE(VT))
- return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, ExtractedVal,
+ // The element types must be the same.
+ if (ExtractedVal.getValueType().getVectorElementType() ==
+ VT.getVectorElementType()) {
+ MVT ExtractedVT = ExtractedVal.getSimpleValueType();
+ MVT ExtractedContainerVT = ExtractedVT;
+ if (ExtractedContainerVT.isFixedLengthVector()) {
+ ExtractedContainerVT = getContainerForFixedLengthVector(
+ DAG, ExtractedContainerVT, Subtarget);
+ ExtractedVal = convertToScalableVector(ExtractedContainerVT,
+ ExtractedVal, DAG, Subtarget);
+ }
+ if (ExtractedContainerVT.bitsLE(VT))
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru,
+ ExtractedVal, DAG.getConstant(0, DL, XLenVT));
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtractedVal,
DAG.getConstant(0, DL, XLenVT));
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtractedVal,
- DAG.getConstant(0, DL, XLenVT));
+ }
}
@@ -4646,6 +4652,85 @@ static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN,
return DAG.getBitcast(VT, Rotate);
}
+// If compiling with an exactly known VLEN, see if we can split a
+// shuffle on m2 or larger into a small number of m1 sized shuffles
+// which write each destination registers exactly once.
+static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN,
+ SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ SDLoc DL(SVN);
+ MVT VT = SVN->getSimpleValueType(0);
+ SDValue V1 = SVN->getOperand(0);
+ SDValue V2 = SVN->getOperand(1);
+ ArrayRef<int> Mask = SVN->getMask();
+ unsigned NumElts = VT.getVectorNumElements();
+
+ // If we don't know exact data layout, not much we can do. If this
+ // is already m1 or smaller, no point in splitting further.
+ const unsigned MinVLen = Subtarget.getRealMinVLen();
+ const unsigned MaxVLen = Subtarget.getRealMaxVLen();
+ if (MinVLen != MaxVLen || VT.getSizeInBits().getFixedValue() <= MinVLen)
+ return SDValue();
+
+ MVT ElemVT = VT.getVectorElementType();
+ unsigned ElemsPerVReg = MinVLen / ElemVT.getFixedSizeInBits();
+ unsigned VRegsPerSrc = NumElts / ElemsPerVReg;
+
+ SmallVector<std::pair<int, SmallVector<int>>>
+ OutMasks(VRegsPerSrc, {-1, {}});
+
+ // Check if our mask can be done as a 1-to-1 mapping from source
+ // to destination registers in the group without needing to
+ // write each destination more than once.
+ for (unsigned DstIdx = 0; DstIdx < Mask.size(); DstIdx++) {
+ int DstVecIdx = DstIdx / ElemsPerVReg;
+ int DstSubIdx = DstIdx % ElemsPerVReg;
+ int SrcIdx = Mask[DstIdx];
+ if (SrcIdx < 0 || (unsigned)SrcIdx >= 2 * NumElts)
+ continue;
+ int SrcVecIdx = SrcIdx / ElemsPerVReg;
+ int SrcSubIdx = SrcIdx % ElemsPerVReg;
+ if (OutMasks[DstVecIdx].first == -1)
+ OutMasks[DstVecIdx].first = SrcVecIdx;
+ if (OutMasks[DstVecIdx].first != SrcVecIdx)
+ // Note: This case could easily be handled by keeping track of a chain
+ // of source values and generating two element shuffles below. This is
+ // less an implementation question, and more a profitability one.
+ return SDValue();
+
+ OutMasks[DstVecIdx].second.resize(ElemsPerVReg, -1);
+ OutMasks[DstVecIdx].second[DstSubIdx] = SrcSubIdx;
+ }
+
+ EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
+ MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
+ MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
+ assert(M1VT == getLMUL1VT(M1VT));
+ unsigned NumOpElts = M1VT.getVectorMinNumElements();
+ SDValue Vec = DAG.getUNDEF(ContainerVT);
+ // The following semantically builds up a fixed length concat_vector
+ // of the component shuffle_vectors. We eagerly lower to scalable here
+ // to avoid DAG combining it back to a large shuffle_vector again.
+ V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
+ V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
+ for (unsigned DstVecIdx = 0 ; DstVecIdx < OutMasks.size(); DstVecIdx++) {
+ auto &[SrcVecIdx, SrcSubMask] = OutMasks[DstVecIdx];
+ if (SrcVecIdx == -1)
+ continue;
+ unsigned ExtractIdx = (SrcVecIdx % VRegsPerSrc) * NumOpElts;
+ SDValue SrcVec = (unsigned)SrcVecIdx > VRegsPerSrc ? V2 : V1;
+ SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, SrcVec,
+ DAG.getVectorIdxConstant(ExtractIdx, DL));
+ SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);
+ SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec, SubVec, SrcSubMask);
+ SubVec = convertToScalableVector(M1VT, SubVec, DAG, Subtarget);
+ unsigned InsertIdx = DstVecIdx * NumOpElts;
+ Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubVec,
+ DAG.getVectorIdxConstant(InsertIdx, DL));
+ }
+ return convertFromScalableVector(VT, Vec, DAG, Subtarget);
+}
+
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
SDValue V1 = Op.getOperand(0);
@@ -4753,6 +4838,11 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
}
}
+ // For exact VLEN m2 or greater, try to split to m1 operations if we
+ // can split cleanly.
+ if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))
+ return V;
+
ArrayRef<int> Mask = SVN->getMask();
if (SDValue V =
@@ -4846,54 +4936,28 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
- SmallVector<SDValue> MaskVals;
- // As a backup, shuffles can be lowered via a vrgather instruction, possibly
- // merged with a second vrgather.
- SmallVector<SDValue> GatherIndicesLHS, GatherIndicesRHS;
-
// By default we preserve the original operand order, and use a mask to
// select LHS as true and RHS as false. However, since RVV vector selects may
// feature splats but only on the LHS, we may choose to invert our mask and
// instead select between RHS and LHS.
bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
- bool InvertMask = IsSelect == SwapOps;
-
- // Keep a track of which non-undef indices are used by each LHS/RHS shuffle
- // half.
- DenseMap<int, unsigned> LHSIndexCounts, RHSIndexCounts;
- // Now construct the mask that will be used by the vselect or blended
- // vrgather operation. For vrgathers, construct the appropriate indices into
- // each vector.
- for (int MaskIndex : Mask) {
- bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ InvertMask;
- MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
- if (!IsSelect) {
- bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
- GatherIndicesLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
- ? DAG.getConstant(MaskIndex, DL, XLenVT)
- : DAG.getUNDEF(XLenVT));
- GatherIndicesRHS.push_back(
- IsLHSOrUndefIndex ? DAG.getUNDEF(XLenVT)
- : DAG.getConstant(MaskIndex - NumElts, DL, XLenVT));
- if (IsLHSOrUndefIndex && MaskIndex >= 0)
- ++LHSIndexCounts[MaskIndex];
- if (!IsLHSOrUndefIndex)
- ++RHSIndexCounts[MaskIndex - NumElts];
+ if (IsSelect) {
+ // Now construct the mask that will be used by the vselect operation.
+ SmallVector<SDValue> MaskVals;
+ for (int MaskIndex : Mask) {
+ bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ SwapOps;
+ MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
}
- }
- if (SwapOps) {
- std::swap(V1, V2);
- std::swap(GatherIndicesLHS, GatherIndicesRHS);
- }
-
- assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
- MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
- SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
+ if (SwapOps)
+ std::swap(V1, V2);
- if (IsSelect)
+ assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
+ SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
+ }
// We might be able to express the shuffle as a bitrotate. But even if we
// don't have Zvkb and have to expand, the expanded sequence of approx. 2
@@ -4909,6 +4973,43 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
return SDValue();
}
+ // As a backup, shuffles can be lowered via a vrgather instruction, possibly
+ // merged with a second vrgather.
+ SmallVector<SDValue> GatherIndicesLHS, GatherIndicesRHS;
+
+ // Keep a track of which non-undef indices are used by each LHS/RHS shuffle
+ // half.
+ DenseMap<int, unsigned> LHSIndexCounts, RHSIndexCounts;
+
+ SmallVector<SDValue> MaskVals;
+
+ // Now construct the mask that will be used by the blended vrgather operation.
+ // Cconstruct the appropriate indices into each vector.
+ for (int MaskIndex : Mask) {
+ bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;
+ MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
+ bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;
+ GatherIndicesLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0
+ ? DAG.getConstant(MaskIndex, DL, XLenVT)
+ : DAG.getUNDEF(XLenVT));
+ GatherIndicesRHS.push_back(
+ IsLHSOrUndefIndex ? DAG.getUNDEF(XLenVT)
+ : DAG.getConstant(MaskIndex - NumElts, DL, XLenVT));
+ if (IsLHSOrUndefIndex && MaskIndex >= 0)
+ ++LHSIndexCounts[MaskIndex];
+ if (!IsLHSOrUndefIndex)
+ ++RHSIndexCounts[MaskIndex - NumElts];
+ }
+
+ if (SwapOps) {
+ std::swap(V1, V2);
+ std::swap(GatherIndicesLHS, GatherIndicesRHS);
+ }
+
+ assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
+ SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
+
unsigned GatherVXOpc = RISCVISD::VRGATHER_VX_VL;
unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;
MVT IndexVT = VT.changeTypeToInteger();
@@ -4932,56 +5033,60 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
MVT IndexContainerVT =
ContainerVT.changeVectorElementType(IndexVT.getScalarType());
- SDValue Gather;
- // TODO: This doesn't trigger for i64 vectors on RV32, since there we
- // encounter a bitcasted BUILD_VECTOR with low/high i32 values.
- if (SDValue SplatValue = DAG.getSplatValue(V1, /*LegalTypes*/ true)) {
- Gather = lowerScalarSplat(SDValue(), SplatValue, VL, ContainerVT, DL, DAG,
- Subtarget);
- } else {
+ // Base case for the recursion just below - handle the worst case
+ // single source permutation. Note that all the splat variants
+ // are handled above.
+ if (V2.isUndef()) {
V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
- // If only one index is used, we can use a "splat" vrgather.
- // TODO: We can splat the most-common index and fix-up any stragglers, if
- // that's beneficial.
- if (LHSIndexCounts.size() == 1) {
- int SplatIndex = LHSIndexCounts.begin()->getFirst();
- Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V1,
- DAG.getConstant(SplatIndex, DL, XLenVT),
- DAG.getUNDEF(ContainerVT), TrueMask, VL);
- } else {
- SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
- LHSIndices =
- convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
-
- Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
- DAG.getUNDEF(ContainerVT), TrueMask, VL);
+ SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
+ LHSIndices = convertToScalableVector(IndexContainerVT, LHSIndices, DAG,
+ Subtarget);
+ SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
+ DAG.getUNDEF(ContainerVT), TrueMask, VL);
+ return convertFromScalableVector(VT, Gather, DAG, Subtarget);
+ }
+
+ // Translate the gather index we computed above (and possibly swapped)
+ // back to a shuffle mask. This step should disappear once we complete
+ // the migration to recursive design.
+ SmallVector<int> ShuffleMaskLHS;
+ ShuffleMaskLHS.reserve(GatherIndicesLHS.size());
+ for (SDValue GatherIndex : GatherIndicesLHS) {
+ if (GatherIndex.isUndef()) {
+ ShuffleMaskLHS.push_back(-1);
+ continue;
}
+ auto *IdxC = cast<ConstantSDNode>(GatherIndex);
+ ShuffleMaskLHS.push_back(IdxC->getZExtValue());
}
- // If a second vector operand is used by this shuffle, blend it in with an
- // additional vrgather.
- if (!V2.isUndef()) {
- V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
+ // Recursively invoke lowering for the LHS as if there were no RHS.
+ // This allows us to leverage all of our single source permute tricks.
+ SDValue Gather =
+ DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);
+ Gather = convertToScalableVector(ContainerVT, Gather, DAG, Subtarget);
- MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
- SelectMask =
- convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget);
+ // Blend in second vector source with an additional vrgather.
+ V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
- // If only one index is used, we can use a "splat" vrgather.
- // TODO: We can splat the most-common index and fix-up any stragglers, if
- // that's beneficial.
- if (RHSIndexCounts.size() == 1) {
- int SplatIndex = RHSIndexCounts.begin()->getFirst();
- Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2,
- DAG.getConstant(SplatIndex, DL, XLenVT), Gather,
- SelectMask, VL);
- } else {
- SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS);
- RHSIndices =
- convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget);
- Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices, Gather,
- SelectMask, VL);
- }
+ MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
+ SelectMask =
+ convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget);
+
+ // If only one index is used, we can use a "splat" vrgather.
+ // TODO: We can splat the most-common index and fix-up any stragglers, if
+ // that's beneficial.
+ if (RHSIndexCounts.size() == 1) {
+ int SplatIndex = RHSIndexCounts.begin()->getFirst();
+ Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2,
+ DAG.getConstant(SplatIndex, DL, XLenVT), Gather,
+ SelectMask, VL);
+ } else {
+ SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS);
+ RHSIndices =
+ convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget);
+ Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices, Gather,
+ SelectMask, VL);
}
return convertFromScalableVector(VT, Gather, DAG, Subtarget);
@@ -5401,7 +5506,16 @@ static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG,
Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);
}
- auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
+ SDValue Mask, VL;
+ if (Op->isVPOpcode()) {
+ Mask = Op.getOperand(2);
+ if (VT.isFixedLengthVector())
+ Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,
+ Subtarget);
+ VL = Op.getOperand(3);
+ } else {
+ std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
+ }
SDValue NewY = Y;
if (!XIsNeverNan) {
@@ -5422,7 +5536,9 @@ static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG,
}
unsigned Opc =
- Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::VFMAX_VL : RISCVISD::VFMIN_VL;
+ Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM
+ ? RISCVISD::VFMAX_VL
+ : RISCVISD::VFMIN_VL;
SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,
DAG.getUNDEF(ContainerVT), Mask, VL);
if (VT.isFixedLengthVector())
@@ -6651,6 +6767,13 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
!Subtarget.hasVInstructionsF16()))
return SplitVPOp(Op, DAG);
return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);
+ case ISD::VP_FMAXIMUM:
+ case ISD::VP_FMINIMUM:
+ if (Op.getValueType() == MVT::nxv32f16 &&
+ (Subtarget.hasVInstructionsF16Minimal() &&
+ !Subtarget.hasVInstructionsF16()))
+ return SplitVPOp(Op, DAG);
+ return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);
case ISD::EXPERIMENTAL_VP_SPLICE:
return lowerVPSpliceExperimental(Op, DAG);
case ISD::EXPERIMENTAL_VP_REVERSE:
@@ -6859,6 +6982,23 @@ SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
return LowerCallTo(CLI).first;
}
+SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,
+ SelectionDAG &DAG) const {
+ SDLoc DL(N);
+ EVT Ty = getPointerTy(DAG.getDataLayout());
+ const GlobalValue *GV = N->getGlobal();
+
+ // Use a PC-relative addressing mode to access the global dynamic GOT address.
+ // This generates the pattern (PseudoLA_TLSDESC sym), which expands to
+ //
+ // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol)
+ // lw tY, tX, %tlsdesc_lo_load(label) // R_RISCV_TLSDESC_LOAD_LO12_I(label)
+ // addi a0, tX, %tlsdesc_lo_add(label) // R_RISCV_TLSDESC_ADD_LO12_I(label)
+ // jalr t0, tY // R_RISCV_TLSDESC_CALL(label)
+ SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
+ return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0);
+}
+
SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
@@ -6883,7 +7023,8 @@ SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
break;
case TLSModel::LocalDynamic:
case TLSModel::GeneralDynamic:
- Addr = getDynamicTLSAddr(N, DAG);
+ Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)
+ : getDynamicTLSAddr(N, DAG);
break;
}
@@ -8500,6 +8641,9 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal,
Vec, DAG.getUNDEF(VT), VL);
}
+ case Intrinsic::riscv_vfmv_s_f:
+ return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getSimpleValueType(),
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
// EGS * EEW >= 128 bits
case Intrinsic::riscv_vaesdf_vv:
case Intrinsic::riscv_vaesdf_vs:
@@ -12762,14 +12906,14 @@ static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &D
SDValue N0 = N.getOperand(0);
if (N0.getOpcode() != ISD::ZERO_EXTEND &&
N0.getOpcode() != RISCVISD::VZEXT_VL)
- return false;;
+ return false;
if (!N0->hasOneUse())
- return false;;
+ return false;
APInt ShAmt;
SDValue N1 = N.getOperand(1);
if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
- return false;;
+ return false;
SDValue Src = N0.getOperand(0);
EVT SrcVT = Src.getValueType();
@@ -12863,9 +13007,9 @@ struct CombineResult;
/// Helper class for folding sign/zero extensions.
/// In particular, this class is used for the following combines:
-/// add_vl -> vwadd(u) | vwadd(u)_w
-/// sub_vl -> vwsub(u) | vwsub(u)_w
-/// mul_vl -> vwmul(u) | vwmul_su
+/// add | add_vl -> vwadd(u) | vwadd(u)_w
+/// sub | sub_vl -> vwsub(u) | vwsub(u)_w
+/// mul | mul_vl -> vwmul(u) | vwmul_su
///
/// An object of this class represents an operand of the operation we want to
/// combine.
@@ -12910,6 +13054,8 @@ struct NodeExtensionHelper {
/// E.g., for zext(a), this would return a.
SDValue getSource() const {
switch (OrigOperand.getOpcode()) {
+ case ISD::ZERO_EXTEND:
+ case ISD::SIGN_EXTEND:
case RISCVISD::VSEXT_VL:
case RISCVISD::VZEXT_VL:
return OrigOperand.getOperand(0);
@@ -12926,7 +13072,8 @@ struct NodeExtensionHelper {
/// Get or create a value that can feed \p Root with the given extension \p
/// SExt. If \p SExt is std::nullopt, this returns the source of this operand.
/// \see ::getSource().
- SDValue getOrCreateExtendedOp(const SDNode *Root, SelectionDAG &DAG,
+ SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget,
std::optional<bool> SExt) const {
if (!SExt.has_value())
return OrigOperand;
@@ -12941,8 +13088,10 @@ struct NodeExtensionHelper {
// If we need an extension, we should be changing the type.
SDLoc DL(Root);
- auto [Mask, VL] = getMaskAndVL(Root);
+ auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
switch (OrigOperand.getOpcode()) {
+ case ISD::ZERO_EXTEND:
+ case ISD::SIGN_EXTEND:
case RISCVISD::VSEXT_VL:
case RISCVISD::VZEXT_VL:
return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);
@@ -12982,12 +13131,15 @@ struct NodeExtensionHelper {
/// \pre \p Opcode represents a supported root (\see ::isSupportedRoot()).
static unsigned getSameExtensionOpcode(unsigned Opcode, bool IsSExt) {
switch (Opcode) {
+ case ISD::ADD:
case RISCVISD::ADD_VL:
case RISCVISD::VWADD_W_VL:
case RISCVISD::VWADDU_W_VL:
return IsSExt ? RISCVISD::VWADD_VL : RISCVISD::VWADDU_VL;
+ case ISD::MUL:
case RISCVISD::MUL_VL:
return IsSExt ? RISCVISD::VWMUL_VL : RISCVISD::VWMULU_VL;
+ case ISD::SUB:
case RISCVISD::SUB_VL:
case RISCVISD::VWSUB_W_VL:
case RISCVISD::VWSUBU_W_VL:
@@ -13000,7 +13152,8 @@ struct NodeExtensionHelper {
/// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->
/// newOpcode(a, b).
static unsigned getSUOpcode(unsigned Opcode) {
- assert(Opcode == RISCVISD::MUL_VL && "SU is only supported for MUL");
+ assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) &&
+ "SU is only supported for MUL");
return RISCVISD::VWMULSU_VL;
}
@@ -13008,8 +13161,10 @@ struct NodeExtensionHelper {
/// newOpcode(a, b).
static unsigned getWOpcode(unsigned Opcode, bool IsSExt) {
switch (Opcode) {
+ case ISD::ADD:
case RISCVISD::ADD_VL:
return IsSExt ? RISCVISD::VWADD_W_VL : RISCVISD::VWADDU_W_VL;
+ case ISD::SUB:
case RISCVISD::SUB_VL:
return IsSExt ? RISCVISD::VWSUB_W_VL : RISCVISD::VWSUBU_W_VL;
default:
@@ -13019,19 +13174,49 @@ struct NodeExtensionHelper {
using CombineToTry = std::function<std::optional<CombineResult>(
SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,
- const NodeExtensionHelper & /*RHS*/)>;
+ const NodeExtensionHelper & /*RHS*/, SelectionDAG &,
+ const RISCVSubtarget &)>;
/// Check if this node needs to be fully folded or extended for all users.
bool needToPromoteOtherUsers() const { return EnforceOneUse; }
/// Helper method to set the various fields of this struct based on the
/// type of \p Root.
- void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG) {
+ void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
SupportsZExt = false;
SupportsSExt = false;
EnforceOneUse = true;
CheckMask = true;
- switch (OrigOperand.getOpcode()) {
+ unsigned Opc = OrigOperand.getOpcode();
+ switch (Opc) {
+ case ISD::ZERO_EXTEND:
+ case ISD::SIGN_EXTEND: {
+ MVT VT = OrigOperand.getSimpleValueType();
+ if (!VT.isVector())
+ break;
+
+ SDValue NarrowElt = OrigOperand.getOperand(0);
+ MVT NarrowVT = NarrowElt.getSimpleValueType();
+
+ unsigned ScalarBits = VT.getScalarSizeInBits();
+ unsigned NarrowScalarBits = NarrowVT.getScalarSizeInBits();
+
+ // Ensure the narrowing element type is legal
+ if (!Subtarget.getTargetLowering()->isTypeLegal(NarrowElt.getValueType()))
+ break;
+
+ // Ensure the extension's semantic is equivalent to rvv vzext or vsext.
+ if (ScalarBits != NarrowScalarBits * 2)
+ break;
+
+ SupportsZExt = Opc == ISD::ZERO_EXTEND;
+ SupportsSExt = Opc == ISD::SIGN_EXTEND;
+
+ SDLoc DL(Root);
+ std::tie(Mask, VL) = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
+ break;
+ }
case RISCVISD::VZEXT_VL:
SupportsZExt = true;
Mask = OrigOperand.getOperand(1);
@@ -13087,8 +13272,16 @@ struct NodeExtensionHelper {
}
/// Check if \p Root supports any extension folding combines.
- static bool isSupportedRoot(const SDNode *Root) {
+ static bool isSupportedRoot(const SDNode *Root, const SelectionDAG &DAG) {
switch (Root->getOpcode()) {
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL: {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.isTypeLegal(Root->getValueType(0)))
+ return false;
+ return Root->getValueType(0).isScalableVector();
+ }
case RISCVISD::ADD_VL:
case RISCVISD::MUL_VL:
case RISCVISD::VWADD_W_VL:
@@ -13103,9 +13296,10 @@ struct NodeExtensionHelper {
}
/// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).
- NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG) {
- assert(isSupportedRoot(Root) && "Trying to build an helper with an "
- "unsupported root");
+ NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ assert(isSupportedRoot(Root, DAG) && "Trying to build an helper with an "
+ "unsupported root");
assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");
OrigOperand = Root->getOperand(OperandIdx);
@@ -13121,7 +13315,7 @@ struct NodeExtensionHelper {
SupportsZExt =
Opc == RISCVISD::VWADDU_W_VL || Opc == RISCVISD::VWSUBU_W_VL;
SupportsSExt = !SupportsZExt;
- std::tie(Mask, VL) = getMaskAndVL(Root);
+ std::tie(Mask, VL) = getMaskAndVL(Root, DAG, Subtarget);
CheckMask = true;
// There's no existing extension here, so we don't have to worry about
// making sure it gets removed.
@@ -13130,7 +13324,7 @@ struct NodeExtensionHelper {
}
[[fallthrough]];
default:
- fillUpExtensionSupport(Root, DAG);
+ fillUpExtensionSupport(Root, DAG, Subtarget);
break;
}
}
@@ -13146,14 +13340,27 @@ struct NodeExtensionHelper {
}
/// Helper function to get the Mask and VL from \p Root.
- static std::pair<SDValue, SDValue> getMaskAndVL(const SDNode *Root) {
- assert(isSupportedRoot(Root) && "Unexpected root");
- return std::make_pair(Root->getOperand(3), Root->getOperand(4));
+ static std::pair<SDValue, SDValue>
+ getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ assert(isSupportedRoot(Root, DAG) && "Unexpected root");
+ switch (Root->getOpcode()) {
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL: {
+ SDLoc DL(Root);
+ MVT VT = Root->getSimpleValueType(0);
+ return getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
+ }
+ default:
+ return std::make_pair(Root->getOperand(3), Root->getOperand(4));
+ }
}
/// Check if the Mask and VL of this operand are compatible with \p Root.
- bool areVLAndMaskCompatible(const SDNode *Root) const {
- auto [Mask, VL] = getMaskAndVL(Root);
+ bool areVLAndMaskCompatible(SDNode *Root, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) const {
+ auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);
return isMaskCompatible(Mask) && isVLCompatible(VL);
}
@@ -13161,11 +13368,14 @@ struct NodeExtensionHelper {
/// foldings that are supported by this class.
static bool isCommutative(const SDNode *N) {
switch (N->getOpcode()) {
+ case ISD::ADD:
+ case ISD::MUL:
case RISCVISD::ADD_VL:
case RISCVISD::MUL_VL:
case RISCVISD::VWADD_W_VL:
case RISCVISD::VWADDU_W_VL:
return true;
+ case ISD::SUB:
case RISCVISD::SUB_VL:
case RISCVISD::VWSUB_W_VL:
case RISCVISD::VWSUBU_W_VL:
@@ -13210,14 +13420,25 @@ struct CombineResult {
/// Return a value that uses TargetOpcode and that can be used to replace
/// Root.
/// The actual replacement is *not* done in that method.
- SDValue materialize(SelectionDAG &DAG) const {
+ SDValue materialize(SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) const {
SDValue Mask, VL, Merge;
- std::tie(Mask, VL) = NodeExtensionHelper::getMaskAndVL(Root);
- Merge = Root->getOperand(2);
+ std::tie(Mask, VL) =
+ NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);
+ switch (Root->getOpcode()) {
+ default:
+ Merge = Root->getOperand(2);
+ break;
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL:
+ Merge = DAG.getUNDEF(Root->getValueType(0));
+ break;
+ }
return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),
- LHS.getOrCreateExtendedOp(Root, DAG, SExtLHS),
- RHS.getOrCreateExtendedOp(Root, DAG, SExtRHS), Merge,
- Mask, VL);
+ LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, SExtLHS),
+ RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, SExtRHS),
+ Merge, Mask, VL);
}
};
@@ -13234,15 +13455,16 @@ struct CombineResult {
static std::optional<CombineResult>
canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
const NodeExtensionHelper &RHS, bool AllowSExt,
- bool AllowZExt) {
+ bool AllowZExt, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
assert((AllowSExt || AllowZExt) && "Forgot to set what you want?");
- if (!LHS.areVLAndMaskCompatible(Root) || !RHS.areVLAndMaskCompatible(Root))
+ if (!LHS.areVLAndMaskCompatible(Root, DAG, Subtarget) ||
+ !RHS.areVLAndMaskCompatible(Root, DAG, Subtarget))
return std::nullopt;
if (AllowZExt && LHS.SupportsZExt && RHS.SupportsZExt)
return CombineResult(NodeExtensionHelper::getSameExtensionOpcode(
Root->getOpcode(), /*IsSExt=*/false),
- Root, LHS, /*SExtLHS=*/false, RHS,
- /*SExtRHS=*/false);
+ Root, LHS, /*SExtLHS=*/false, RHS, /*SExtRHS=*/false);
if (AllowSExt && LHS.SupportsSExt && RHS.SupportsSExt)
return CombineResult(NodeExtensionHelper::getSameExtensionOpcode(
Root->getOpcode(), /*IsSExt=*/true),
@@ -13259,9 +13481,10 @@ canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,
/// can be used to apply the pattern.
static std::optional<CombineResult>
canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
- const NodeExtensionHelper &RHS) {
+ const NodeExtensionHelper &RHS, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/true,
- /*AllowZExt=*/true);
+ /*AllowZExt=*/true, DAG, Subtarget);
}
/// Check if \p Root follows a pattern Root(LHS, ext(RHS))
@@ -13270,8 +13493,9 @@ canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,
/// can be used to apply the pattern.
static std::optional<CombineResult>
canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
- const NodeExtensionHelper &RHS) {
- if (!RHS.areVLAndMaskCompatible(Root))
+ const NodeExtensionHelper &RHS, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ if (!RHS.areVLAndMaskCompatible(Root, DAG, Subtarget))
return std::nullopt;
// FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
@@ -13295,9 +13519,10 @@ canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,
/// can be used to apply the pattern.
static std::optional<CombineResult>
canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
- const NodeExtensionHelper &RHS) {
+ const NodeExtensionHelper &RHS, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/true,
- /*AllowZExt=*/false);
+ /*AllowZExt=*/false, DAG, Subtarget);
}
/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))
@@ -13306,9 +13531,10 @@ canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,
/// can be used to apply the pattern.
static std::optional<CombineResult>
canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
- const NodeExtensionHelper &RHS) {
+ const NodeExtensionHelper &RHS, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/false,
- /*AllowZExt=*/true);
+ /*AllowZExt=*/true, DAG, Subtarget);
}
/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))
@@ -13317,10 +13543,13 @@ canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,
/// can be used to apply the pattern.
static std::optional<CombineResult>
canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,
- const NodeExtensionHelper &RHS) {
+ const NodeExtensionHelper &RHS, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+
if (!LHS.SupportsSExt || !RHS.SupportsZExt)
return std::nullopt;
- if (!LHS.areVLAndMaskCompatible(Root) || !RHS.areVLAndMaskCompatible(Root))
+ if (!LHS.areVLAndMaskCompatible(Root, DAG, Subtarget) ||
+ !RHS.areVLAndMaskCompatible(Root, DAG, Subtarget))
return std::nullopt;
return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),
Root, LHS, /*SExtLHS=*/true, RHS, /*SExtRHS=*/false);
@@ -13330,6 +13559,8 @@ SmallVector<NodeExtensionHelper::CombineToTry>
NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
SmallVector<CombineToTry> Strategies;
switch (Root->getOpcode()) {
+ case ISD::ADD:
+ case ISD::SUB:
case RISCVISD::ADD_VL:
case RISCVISD::SUB_VL:
// add|sub -> vwadd(u)|vwsub(u)
@@ -13337,6 +13568,7 @@ NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
// add|sub -> vwadd(u)_w|vwsub(u)_w
Strategies.push_back(canFoldToVW_W);
break;
+ case ISD::MUL:
case RISCVISD::MUL_VL:
// mul -> vwmul(u)
Strategies.push_back(canFoldToVWWithSameExtension);
@@ -13367,12 +13599,14 @@ NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {
/// mul_vl -> vwmul(u) | vwmul_su
/// vwadd_w(u) -> vwadd(u)
/// vwub_w(u) -> vwadd(u)
-static SDValue
-combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
+static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const RISCVSubtarget &Subtarget) {
SelectionDAG &DAG = DCI.DAG;
- assert(NodeExtensionHelper::isSupportedRoot(N) &&
- "Shouldn't have called this method");
+ if (!NodeExtensionHelper::isSupportedRoot(N, DAG))
+ return SDValue();
+
SmallVector<SDNode *> Worklist;
SmallSet<SDNode *, 8> Inserted;
Worklist.push_back(N);
@@ -13381,11 +13615,11 @@ combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
while (!Worklist.empty()) {
SDNode *Root = Worklist.pop_back_val();
- if (!NodeExtensionHelper::isSupportedRoot(Root))
+ if (!NodeExtensionHelper::isSupportedRoot(Root, DAG))
return SDValue();
- NodeExtensionHelper LHS(N, 0, DAG);
- NodeExtensionHelper RHS(N, 1, DAG);
+ NodeExtensionHelper LHS(N, 0, DAG, Subtarget);
+ NodeExtensionHelper RHS(N, 1, DAG, Subtarget);
auto AppendUsersIfNeeded = [&Worklist,
&Inserted](const NodeExtensionHelper &Op) {
if (Op.needToPromoteOtherUsers()) {
@@ -13412,7 +13646,8 @@ combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
for (NodeExtensionHelper::CombineToTry FoldingStrategy :
FoldingStrategies) {
- std::optional<CombineResult> Res = FoldingStrategy(N, LHS, RHS);
+ std::optional<CombineResult> Res =
+ FoldingStrategy(N, LHS, RHS, DAG, Subtarget);
if (Res) {
Matched = true;
CombinesToApply.push_back(*Res);
@@ -13441,7 +13676,7 @@ combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
SmallVector<std::pair<SDValue, SDValue>> ValuesToReplace;
ValuesToReplace.reserve(CombinesToApply.size());
for (CombineResult Res : CombinesToApply) {
- SDValue NewValue = Res.materialize(DAG);
+ SDValue NewValue = Res.materialize(DAG, Subtarget);
if (!InputRootReplacement) {
assert(Res.Root == N &&
"First element is expected to be the current node");
@@ -14713,13 +14948,20 @@ static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG,
static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
- assert(N->getOpcode() == RISCVISD::ADD_VL);
+
+ assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);
+
+ if (N->getValueType(0).isFixedLengthVector())
+ return SDValue();
+
SDValue Addend = N->getOperand(0);
SDValue MulOp = N->getOperand(1);
- SDValue AddMergeOp = N->getOperand(2);
- if (!AddMergeOp.isUndef())
- return SDValue();
+ if (N->getOpcode() == RISCVISD::ADD_VL) {
+ SDValue AddMergeOp = N->getOperand(2);
+ if (!AddMergeOp.isUndef())
+ return SDValue();
+ }
auto IsVWMulOpc = [](unsigned Opc) {
switch (Opc) {
@@ -14743,8 +14985,16 @@ static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG,
if (!MulMergeOp.isUndef())
return SDValue();
- SDValue AddMask = N->getOperand(3);
- SDValue AddVL = N->getOperand(4);
+ auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ if (N->getOpcode() == ISD::ADD) {
+ SDLoc DL(N);
+ return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,
+ Subtarget);
+ }
+ return std::make_pair(N->getOperand(3), N->getOperand(4));
+ }(N, DAG, Subtarget);
+
SDValue MulMask = MulOp.getOperand(3);
SDValue MulVL = MulOp.getOperand(4);
@@ -15010,10 +15260,18 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return DAG.getNode(ISD::AND, DL, VT, NewFMV,
DAG.getConstant(~SignBit, DL, VT));
}
- case ISD::ADD:
+ case ISD::ADD: {
+ if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
+ return V;
+ if (SDValue V = combineToVWMACC(N, DAG, Subtarget))
+ return V;
return performADDCombine(N, DAG, Subtarget);
- case ISD::SUB:
+ }
+ case ISD::SUB: {
+ if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
+ return V;
return performSUBCombine(N, DAG, Subtarget);
+ }
case ISD::AND:
return performANDCombine(N, DCI, Subtarget);
case ISD::OR:
@@ -15021,6 +15279,8 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::XOR:
return performXORCombine(N, DAG, Subtarget);
case ISD::MUL:
+ if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
+ return V;
return performMULCombine(N, DAG);
case ISD::FADD:
case ISD::UMAX:
@@ -15497,7 +15757,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
break;
}
case RISCVISD::ADD_VL:
- if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI))
+ if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))
return V;
return combineToVWMACC(N, DAG, Subtarget);
case RISCVISD::SUB_VL:
@@ -15506,7 +15766,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
case RISCVISD::VWSUB_W_VL:
case RISCVISD::VWSUBU_W_VL:
case RISCVISD::MUL_VL:
- return combineBinOp_VLToVWBinOp_VL(N, DCI);
+ return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget);
case RISCVISD::VFMADD_VL:
case RISCVISD::VFNMADD_VL:
case RISCVISD::VFMSUB_VL:
@@ -15729,6 +15989,17 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
break;
}
+ case RISCVISD::VMV_X_S: {
+ SDValue Vec = N->getOperand(0);
+ MVT VecVT = N->getOperand(0).getSimpleValueType();
+ const MVT M1VT = getLMUL1VT(VecVT);
+ if (M1VT.bitsLT(VecVT)) {
+ Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,
+ DAG.getVectorIdxConstant(0, DL));
+ return DAG.getNode(RISCVISD::VMV_X_S, DL, N->getSimpleValueType(0), Vec);
+ }
+ break;
+ }
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN:
case ISD::INTRINSIC_WO_CHAIN: {
@@ -17047,12 +17318,39 @@ static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4,
RISCV::V20M4};
static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8};
-ArrayRef<MCPhysReg> RISCV::getArgGPRs() {
- static const MCPhysReg ArgGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
- RISCV::X13, RISCV::X14, RISCV::X15,
- RISCV::X16, RISCV::X17};
+ArrayRef<MCPhysReg> RISCV::getArgGPRs(const RISCVABI::ABI ABI) {
+ // The GPRs used for passing arguments in the ILP32* and LP64* ABIs, except
+ // the ILP32E ABI.
+ static const MCPhysReg ArgIGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
+ RISCV::X13, RISCV::X14, RISCV::X15,
+ RISCV::X16, RISCV::X17};
+ // The GPRs used for passing arguments in the ILP32E/ILP64E ABI.
+ static const MCPhysReg ArgEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
+ RISCV::X13, RISCV::X14, RISCV::X15};
+
+ if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
+ return ArrayRef(ArgEGPRs);
- return ArrayRef(ArgGPRs);
+ return ArrayRef(ArgIGPRs);
+}
+
+static ArrayRef<MCPhysReg> getFastCCArgGPRs(const RISCVABI::ABI ABI) {
+ // The GPRs used for passing arguments in the FastCC, X5 and X6 might be used
+ // for save-restore libcall, so we don't use them.
+ static const MCPhysReg FastCCIGPRs[] = {
+ RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
+ RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28,
+ RISCV::X29, RISCV::X30, RISCV::X31};
+
+ // The GPRs used for passing arguments in the FastCC when using ILP32E/ILP64E.
+ static const MCPhysReg FastCCEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
+ RISCV::X13, RISCV::X14, RISCV::X15,
+ RISCV::X7};
+
+ if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
+ return ArrayRef(FastCCEGPRs);
+
+ return ArrayRef(FastCCIGPRs);
}
// Pass a 2*XLEN argument that has been split into two XLEN values through
@@ -17060,17 +17358,23 @@ ArrayRef<MCPhysReg> RISCV::getArgGPRs() {
static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
MVT ValVT2, MVT LocVT2,
- ISD::ArgFlagsTy ArgFlags2) {
+ ISD::ArgFlagsTy ArgFlags2, bool EABI) {
unsigned XLenInBytes = XLen / 8;
- ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs();
+ const RISCVSubtarget &STI =
+ State.getMachineFunction().getSubtarget<RISCVSubtarget>();
+ ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs(STI.getTargetABI());
+
if (Register Reg = State.AllocateReg(ArgGPRs)) {
// At least one half can be passed via register.
State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
VA1.getLocVT(), CCValAssign::Full));
} else {
// Both halves must be passed on the stack, with proper alignment.
- Align StackAlign =
- std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign());
+ // TODO: To be compatible with GCC's behaviors, we force them to have 4-byte
+ // alignment. This behavior may be changed when RV32E/ILP32E is ratified.
+ Align StackAlign(XLenInBytes);
+ if (!EABI || XLen != 32)
+ StackAlign = std::max(StackAlign, ArgFlags1.getNonZeroOrigAlign());
State.addLoc(
CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
State.AllocateStack(XLenInBytes, StackAlign),
@@ -17151,7 +17455,9 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
default:
llvm_unreachable("Unexpected ABI");
case RISCVABI::ABI_ILP32:
+ case RISCVABI::ABI_ILP32E:
case RISCVABI::ABI_LP64:
+ case RISCVABI::ABI_LP64E:
break;
case RISCVABI::ABI_ILP32F:
case RISCVABI::ABI_LP64F:
@@ -17183,7 +17489,7 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
LocInfo = CCValAssign::BCvt;
}
- ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs();
+ ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs(ABI);
// If this is a variadic argument, the RISC-V calling convention requires
// that it is assigned an 'even' or 'aligned' register if it has 8-byte
@@ -17192,9 +17498,13 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
// legalisation or not. The argument will not be passed by registers if the
// original type is larger than 2*XLEN, so the register alignment rule does
// not apply.
+ // TODO: To be compatible with GCC's behaviors, we don't align registers
+ // currently if we are using ILP32E calling convention. This behavior may be
+ // changed when RV32E/ILP32E is ratified.
unsigned TwoXLenInBytes = (2 * XLen) / 8;
if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
- DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) {
+ DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes &&
+ ABI != RISCVABI::ABI_ILP32E) {
unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
// Skip 'odd' register if necessary.
if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)
@@ -17267,8 +17577,9 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
ISD::ArgFlagsTy AF = PendingArgFlags[0];
PendingLocs.clear();
PendingArgFlags.clear();
- return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT,
- ArgFlags);
+ return CC_RISCVAssign2XLen(
+ XLen, State, VA, AF, ValNo, ValVT, LocVT, ArgFlags,
+ ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E);
}
// Allocate to a register if possible, or else a stack slot.
@@ -17594,15 +17905,8 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
bool IsFixed, bool IsRet, Type *OrigTy,
const RISCVTargetLowering &TLI,
std::optional<unsigned> FirstMaskArgument) {
-
- // X5 and X6 might be used for save-restore libcall.
- static const MCPhysReg GPRList[] = {
- RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
- RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28,
- RISCV::X29, RISCV::X30, RISCV::X31};
-
if (LocVT == MVT::i32 || LocVT == MVT::i64) {
- if (unsigned Reg = State.AllocateReg(GPRList)) {
+ if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
return false;
}
@@ -17653,7 +17957,7 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
(LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
(LocVT == MVT::f64 && Subtarget.is64Bit() &&
Subtarget.hasStdExtZdinx())) {
- if (unsigned Reg = State.AllocateReg(GPRList)) {
+ if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
return false;
}
@@ -17687,7 +17991,7 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
} else {
// Try and pass the address via a "fast" GPR.
- if (unsigned GPRReg = State.AllocateReg(GPRList)) {
+ if (unsigned GPRReg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
LocInfo = CCValAssign::Indirect;
LocVT = TLI.getSubtarget().getXLenVT();
State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo));
@@ -17788,6 +18092,8 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
case CallingConv::GRAAL:
break;
case CallingConv::GHC:
+ if (Subtarget.isRVE())
+ report_fatal_error("GHC calling convention is not supported on RVE!");
if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())
report_fatal_error("GHC calling convention requires the (Zfinx/F) and "
"(Zdinx/D) instruction set extensions");
@@ -17870,7 +18176,7 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();
if (IsVarArg) {
- ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs();
+ ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI());
unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
const TargetRegisterClass *RC = &RISCV::GPRRegClass;
MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -18023,9 +18329,11 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
SmallVector<CCValAssign, 16> ArgLocs;
CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
- if (CallConv == CallingConv::GHC)
+ if (CallConv == CallingConv::GHC) {
+ if (Subtarget.isRVE())
+ report_fatal_error("GHC calling convention is not supported on RVE!");
ArgCCInfo.AnalyzeCallOperands(Outs, RISCV::CC_RISCV_GHC);
- else
+ } else
analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,
CallConv == CallingConv::Fast ? RISCV::CC_RISCV_FastCC
: RISCV::CC_RISCV);