src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2024-01-24 19:17:23 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2024-04-19 21:24:44 +0000
commit	ab50317e96e57dee5b3ff4ad3f16f205b2a3359e (patch)
tree	4b1f388eb6a07e574417aaacecd3ec4a83550718 /contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
parent	412542983a5ba62902141a8a7e155cceb9196a66 (diff)

Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp')

-rw-r--r--

contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp

698

1 files changed, 503 insertions, 195 deletions

diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index cb9ffabc4123..47c6cd6e5487 100644
--- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp

@@ -83,9 +83,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,

const RISCVSubtarget &STI)

: TargetLowering(TM), Subtarget(STI) {

- if (Subtarget.isRVE())

- report_fatal_error("Codegen not yet implemented for RVE");

RISCVABI::ABI ABI = Subtarget.getTargetABI();

assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");

@@ -107,6 +104,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,

default:

report_fatal_error("Don't know how to lower this ABI");

case RISCVABI::ABI_ILP32:

+ case RISCVABI::ABI_ILP32E:

+ case RISCVABI::ABI_LP64E:

case RISCVABI::ABI_ILP32F:

case RISCVABI::ABI_ILP32D:

case RISCVABI::ABI_LP64:

@@ -688,7 +687,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,

ISD::VP_FCEIL, ISD::VP_FFLOOR, ISD::VP_FROUND,

ISD::VP_FROUNDEVEN, ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO,

ISD::VP_FRINT, ISD::VP_FNEARBYINT, ISD::VP_IS_FPCLASS,

- ISD::EXPERIMENTAL_VP_REVERSE, ISD::EXPERIMENTAL_VP_SPLICE};

+ ISD::VP_FMINIMUM, ISD::VP_FMAXIMUM, ISD::EXPERIMENTAL_VP_REVERSE,

+ ISD::EXPERIMENTAL_VP_SPLICE};

static const unsigned IntegerVecReduceOps[] = {

ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND, ISD::VECREDUCE_OR,

@@ -928,7 +928,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,

ISD::VP_FMINNUM, ISD::VP_FMAXNUM, ISD::VP_FCEIL,

ISD::VP_FFLOOR, ISD::VP_FROUND, ISD::VP_FROUNDEVEN,

ISD::VP_FCOPYSIGN, ISD::VP_FROUNDTOZERO, ISD::VP_FRINT,

- ISD::VP_FNEARBYINT, ISD::VP_SETCC};

+ ISD::VP_FNEARBYINT, ISD::VP_SETCC, ISD::VP_FMINIMUM,

+ ISD::VP_FMAXIMUM};

// Sets common operation actions on RVV floating-point vector types.

const auto SetCommonVFPActions = [&](MVT VT) {

@@ -1374,8 +1375,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,

setPrefLoopAlignment(Subtarget.getPrefLoopAlignment());

setTargetDAGCombine({ISD::INTRINSIC_VOID, ISD::INTRINSIC_W_CHAIN,

- ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::AND,

- ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT});

+ ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::MUL,

+ ISD::AND, ISD::OR, ISD::XOR, ISD::SETCC, ISD::SELECT});

if (Subtarget.is64Bit())

setTargetDAGCombine(ISD::SRA);

@@ -2593,11 +2594,12 @@ static SDValue getAllOnesMask(MVT VecVT, SDValue VL, const SDLoc &DL,

static SDValue getVLOp(uint64_t NumElts, MVT ContainerVT, const SDLoc &DL,

SelectionDAG &DAG, const RISCVSubtarget &Subtarget) {

- // If we know the exact VLEN, our VL is exactly equal to VLMAX, and

- // we can't encode the AVL as an immediate, use the VLMAX encoding.

+ // If we know the exact VLEN, and our VL is exactly equal to VLMAX,

+ // canonicalize the representation. InsertVSETVLI will pick the immediate

+ // encoding later if profitable.

const auto [MinVLMAX, MaxVLMAX] =

RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);

- if (MinVLMAX == MaxVLMAX && NumElts == MinVLMAX && NumElts > 31)

+ if (MinVLMAX == MaxVLMAX && NumElts == MinVLMAX)

return DAG.getRegister(RISCV::X0, Subtarget.getXLenVT());

return DAG.getConstant(NumElts, DL, Subtarget.getXLenVT());

@@ -4040,19 +4042,23 @@ static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT,

if (Scalar.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&

isNullConstant(Scalar.getOperand(1))) {

SDValue ExtractedVal = Scalar.getOperand(0);

- MVT ExtractedVT = ExtractedVal.getSimpleValueType();

- MVT ExtractedContainerVT = ExtractedVT;

- if (ExtractedContainerVT.isFixedLengthVector()) {

- ExtractedContainerVT = getContainerForFixedLengthVector(

- DAG, ExtractedContainerVT, Subtarget);

- ExtractedVal = convertToScalableVector(ExtractedContainerVT, ExtractedVal,

- DAG, Subtarget);

- }

- if (ExtractedContainerVT.bitsLE(VT))

- return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru, ExtractedVal,

+ // The element types must be the same.

+ if (ExtractedVal.getValueType().getVectorElementType() ==

+ VT.getVectorElementType()) {

+ MVT ExtractedVT = ExtractedVal.getSimpleValueType();

+ MVT ExtractedContainerVT = ExtractedVT;

+ if (ExtractedContainerVT.isFixedLengthVector()) {

+ ExtractedContainerVT = getContainerForFixedLengthVector(

+ DAG, ExtractedContainerVT, Subtarget);

+ ExtractedVal = convertToScalableVector(ExtractedContainerVT,

+ ExtractedVal, DAG, Subtarget);

+ }

+ if (ExtractedContainerVT.bitsLE(VT))

+ return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Passthru,

+ ExtractedVal, DAG.getConstant(0, DL, XLenVT));

+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtractedVal,

DAG.getConstant(0, DL, XLenVT));

- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, ExtractedVal,

- DAG.getConstant(0, DL, XLenVT));

+ }

}

@@ -4646,6 +4652,85 @@ static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN,

return DAG.getBitcast(VT, Rotate);

}

+// If compiling with an exactly known VLEN, see if we can split a

+// shuffle on m2 or larger into a small number of m1 sized shuffles

+// which write each destination registers exactly once.

+static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN,

+ SelectionDAG &DAG,

+ const RISCVSubtarget &Subtarget) {

+ SDLoc DL(SVN);

+ MVT VT = SVN->getSimpleValueType(0);

+ SDValue V1 = SVN->getOperand(0);

+ SDValue V2 = SVN->getOperand(1);

+ ArrayRef<int> Mask = SVN->getMask();

+ unsigned NumElts = VT.getVectorNumElements();

+ // If we don't know exact data layout, not much we can do. If this

+ // is already m1 or smaller, no point in splitting further.

+ const unsigned MinVLen = Subtarget.getRealMinVLen();

+ const unsigned MaxVLen = Subtarget.getRealMaxVLen();

+ if (MinVLen != MaxVLen || VT.getSizeInBits().getFixedValue() <= MinVLen)

+ return SDValue();

+ MVT ElemVT = VT.getVectorElementType();

+ unsigned ElemsPerVReg = MinVLen / ElemVT.getFixedSizeInBits();

+ unsigned VRegsPerSrc = NumElts / ElemsPerVReg;

+ SmallVector<std::pair<int, SmallVector<int>>>

+ OutMasks(VRegsPerSrc, {-1, {}});

+ // Check if our mask can be done as a 1-to-1 mapping from source

+ // to destination registers in the group without needing to

+ // write each destination more than once.

+ for (unsigned DstIdx = 0; DstIdx < Mask.size(); DstIdx++) {

+ int DstVecIdx = DstIdx / ElemsPerVReg;

+ int DstSubIdx = DstIdx % ElemsPerVReg;

+ int SrcIdx = Mask[DstIdx];

+ if (SrcIdx < 0 || (unsigned)SrcIdx >= 2 * NumElts)

+ continue;

+ int SrcVecIdx = SrcIdx / ElemsPerVReg;

+ int SrcSubIdx = SrcIdx % ElemsPerVReg;

+ if (OutMasks[DstVecIdx].first == -1)

+ OutMasks[DstVecIdx].first = SrcVecIdx;

+ if (OutMasks[DstVecIdx].first != SrcVecIdx)

+ // Note: This case could easily be handled by keeping track of a chain

+ // of source values and generating two element shuffles below. This is

+ // less an implementation question, and more a profitability one.

+ return SDValue();

+ OutMasks[DstVecIdx].second.resize(ElemsPerVReg, -1);

+ OutMasks[DstVecIdx].second[DstSubIdx] = SrcSubIdx;

+ }

+ EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);

+ MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);

+ MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);

+ assert(M1VT == getLMUL1VT(M1VT));

+ unsigned NumOpElts = M1VT.getVectorMinNumElements();

+ SDValue Vec = DAG.getUNDEF(ContainerVT);

+ // The following semantically builds up a fixed length concat_vector

+ // of the component shuffle_vectors. We eagerly lower to scalable here

+ // to avoid DAG combining it back to a large shuffle_vector again.

+ V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);

+ V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);

+ for (unsigned DstVecIdx = 0 ; DstVecIdx < OutMasks.size(); DstVecIdx++) {

+ auto &[SrcVecIdx, SrcSubMask] = OutMasks[DstVecIdx];

+ if (SrcVecIdx == -1)

+ continue;

+ unsigned ExtractIdx = (SrcVecIdx % VRegsPerSrc) * NumOpElts;

+ SDValue SrcVec = (unsigned)SrcVecIdx > VRegsPerSrc ? V2 : V1;

+ SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, SrcVec,

+ DAG.getVectorIdxConstant(ExtractIdx, DL));

+ SubVec = convertFromScalableVector(OneRegVT, SubVec, DAG, Subtarget);

+ SubVec = DAG.getVectorShuffle(OneRegVT, DL, SubVec, SubVec, SrcSubMask);

+ SubVec = convertToScalableVector(M1VT, SubVec, DAG, Subtarget);

+ unsigned InsertIdx = DstVecIdx * NumOpElts;

+ Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT, Vec, SubVec,

+ DAG.getVectorIdxConstant(InsertIdx, DL));

+ }

+ return convertFromScalableVector(VT, Vec, DAG, Subtarget);

static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,

const RISCVSubtarget &Subtarget) {

SDValue V1 = Op.getOperand(0);

@@ -4753,6 +4838,11 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,

}

+ // For exact VLEN m2 or greater, try to split to m1 operations if we

+ // can split cleanly.

+ if (SDValue V = lowerShuffleViaVRegSplitting(SVN, DAG, Subtarget))

+ return V;

ArrayRef<int> Mask = SVN->getMask();

if (SDValue V =

@@ -4846,54 +4936,28 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,

assert(!V1.isUndef() && "Unexpected shuffle canonicalization");

- SmallVector<SDValue> MaskVals;

- // As a backup, shuffles can be lowered via a vrgather instruction, possibly

- // merged with a second vrgather.

- SmallVector<SDValue> GatherIndicesLHS, GatherIndicesRHS;

// By default we preserve the original operand order, and use a mask to

// select LHS as true and RHS as false. However, since RVV vector selects may

// feature splats but only on the LHS, we may choose to invert our mask and

// instead select between RHS and LHS.

bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);

- bool InvertMask = IsSelect == SwapOps;

- // Keep a track of which non-undef indices are used by each LHS/RHS shuffle

- // half.

- DenseMap<int, unsigned> LHSIndexCounts, RHSIndexCounts;

- // Now construct the mask that will be used by the vselect or blended

- // vrgather operation. For vrgathers, construct the appropriate indices into

- // each vector.

- for (int MaskIndex : Mask) {

- bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ InvertMask;

- MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));

- if (!IsSelect) {

- bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;

- GatherIndicesLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0

- ? DAG.getConstant(MaskIndex, DL, XLenVT)

- : DAG.getUNDEF(XLenVT));

- GatherIndicesRHS.push_back(

- IsLHSOrUndefIndex ? DAG.getUNDEF(XLenVT)

- : DAG.getConstant(MaskIndex - NumElts, DL, XLenVT));

- if (IsLHSOrUndefIndex && MaskIndex >= 0)

- ++LHSIndexCounts[MaskIndex];

- if (!IsLHSOrUndefIndex)

- ++RHSIndexCounts[MaskIndex - NumElts];

+ if (IsSelect) {

+ // Now construct the mask that will be used by the vselect operation.

+ SmallVector<SDValue> MaskVals;

+ for (int MaskIndex : Mask) {

+ bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ SwapOps;

+ MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));

}

- }

- if (SwapOps) {

- std::swap(V1, V2);

- std::swap(GatherIndicesLHS, GatherIndicesRHS);

- }

- assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");

- MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);

- SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);

+ if (SwapOps)

+ std::swap(V1, V2);

- if (IsSelect)

+ assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");

+ MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);

+ SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);

return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);

+ }

// We might be able to express the shuffle as a bitrotate. But even if we

// don't have Zvkb and have to expand, the expanded sequence of approx. 2

@@ -4909,6 +4973,43 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,

return SDValue();

}

+ // As a backup, shuffles can be lowered via a vrgather instruction, possibly

+ // merged with a second vrgather.

+ SmallVector<SDValue> GatherIndicesLHS, GatherIndicesRHS;

+ // Keep a track of which non-undef indices are used by each LHS/RHS shuffle

+ // half.

+ DenseMap<int, unsigned> LHSIndexCounts, RHSIndexCounts;

+ SmallVector<SDValue> MaskVals;

+ // Now construct the mask that will be used by the blended vrgather operation.

+ // Cconstruct the appropriate indices into each vector.

+ for (int MaskIndex : Mask) {

+ bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ !SwapOps;

+ MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));

+ bool IsLHSOrUndefIndex = MaskIndex < (int)NumElts;

+ GatherIndicesLHS.push_back(IsLHSOrUndefIndex && MaskIndex >= 0

+ ? DAG.getConstant(MaskIndex, DL, XLenVT)

+ : DAG.getUNDEF(XLenVT));

+ GatherIndicesRHS.push_back(

+ IsLHSOrUndefIndex ? DAG.getUNDEF(XLenVT)

+ : DAG.getConstant(MaskIndex - NumElts, DL, XLenVT));

+ if (IsLHSOrUndefIndex && MaskIndex >= 0)

+ ++LHSIndexCounts[MaskIndex];

+ if (!IsLHSOrUndefIndex)

+ ++RHSIndexCounts[MaskIndex - NumElts];

+ }

+ if (SwapOps) {

+ std::swap(V1, V2);

+ std::swap(GatherIndicesLHS, GatherIndicesRHS);

+ }

+ assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");

+ MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);

+ SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);

unsigned GatherVXOpc = RISCVISD::VRGATHER_VX_VL;

unsigned GatherVVOpc = RISCVISD::VRGATHER_VV_VL;

MVT IndexVT = VT.changeTypeToInteger();

@@ -4932,56 +5033,60 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,

MVT IndexContainerVT =

ContainerVT.changeVectorElementType(IndexVT.getScalarType());

- SDValue Gather;

- // TODO: This doesn't trigger for i64 vectors on RV32, since there we

- // encounter a bitcasted BUILD_VECTOR with low/high i32 values.

- if (SDValue SplatValue = DAG.getSplatValue(V1, /*LegalTypes*/ true)) {

- Gather = lowerScalarSplat(SDValue(), SplatValue, VL, ContainerVT, DL, DAG,

- Subtarget);

- } else {

+ // Base case for the recursion just below - handle the worst case

+ // single source permutation. Note that all the splat variants

+ // are handled above.

+ if (V2.isUndef()) {

V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);

- // If only one index is used, we can use a "splat" vrgather.

- // TODO: We can splat the most-common index and fix-up any stragglers, if

- // that's beneficial.

- if (LHSIndexCounts.size() == 1) {

- int SplatIndex = LHSIndexCounts.begin()->getFirst();

- Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V1,

- DAG.getConstant(SplatIndex, DL, XLenVT),

- DAG.getUNDEF(ContainerVT), TrueMask, VL);

- } else {

- SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);

- LHSIndices =

- convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);

- Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,

- DAG.getUNDEF(ContainerVT), TrueMask, VL);

+ SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);

+ LHSIndices = convertToScalableVector(IndexContainerVT, LHSIndices, DAG,

+ Subtarget);

+ SDValue Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,

+ DAG.getUNDEF(ContainerVT), TrueMask, VL);

+ return convertFromScalableVector(VT, Gather, DAG, Subtarget);

+ }

+ // Translate the gather index we computed above (and possibly swapped)

+ // back to a shuffle mask. This step should disappear once we complete

+ // the migration to recursive design.

+ SmallVector<int> ShuffleMaskLHS;

+ ShuffleMaskLHS.reserve(GatherIndicesLHS.size());

+ for (SDValue GatherIndex : GatherIndicesLHS) {

+ if (GatherIndex.isUndef()) {

+ ShuffleMaskLHS.push_back(-1);

+ continue;

}

+ auto *IdxC = cast<ConstantSDNode>(GatherIndex);

+ ShuffleMaskLHS.push_back(IdxC->getZExtValue());

}

- // If a second vector operand is used by this shuffle, blend it in with an

- // additional vrgather.

- if (!V2.isUndef()) {

- V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);

+ // Recursively invoke lowering for the LHS as if there were no RHS.

+ // This allows us to leverage all of our single source permute tricks.

+ SDValue Gather =

+ DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), ShuffleMaskLHS);

+ Gather = convertToScalableVector(ContainerVT, Gather, DAG, Subtarget);

- MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);

- SelectMask =

- convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget);

+ // Blend in second vector source with an additional vrgather.

+ V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);

- // If only one index is used, we can use a "splat" vrgather.

- // TODO: We can splat the most-common index and fix-up any stragglers, if

- // that's beneficial.

- if (RHSIndexCounts.size() == 1) {

- int SplatIndex = RHSIndexCounts.begin()->getFirst();

- Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2,

- DAG.getConstant(SplatIndex, DL, XLenVT), Gather,

- SelectMask, VL);

- } else {

- SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS);

- RHSIndices =

- convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget);

- Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices, Gather,

- SelectMask, VL);

- }

+ MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);

+ SelectMask =

+ convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget);

+ // If only one index is used, we can use a "splat" vrgather.

+ // TODO: We can splat the most-common index and fix-up any stragglers, if

+ // that's beneficial.

+ if (RHSIndexCounts.size() == 1) {

+ int SplatIndex = RHSIndexCounts.begin()->getFirst();

+ Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2,

+ DAG.getConstant(SplatIndex, DL, XLenVT), Gather,

+ SelectMask, VL);

+ } else {

+ SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS);

+ RHSIndices =

+ convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget);

+ Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices, Gather,

+ SelectMask, VL);

}

return convertFromScalableVector(VT, Gather, DAG, Subtarget);

@@ -5401,7 +5506,16 @@ static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG,

Y = convertToScalableVector(ContainerVT, Y, DAG, Subtarget);

}

- auto [Mask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);

+ SDValue Mask, VL;

+ if (Op->isVPOpcode()) {

+ Mask = Op.getOperand(2);

+ if (VT.isFixedLengthVector())

+ Mask = convertToScalableVector(getMaskTypeFor(ContainerVT), Mask, DAG,

+ Subtarget);

+ VL = Op.getOperand(3);

+ } else {

+ std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);

+ }

SDValue NewY = Y;

if (!XIsNeverNan) {

@@ -5422,7 +5536,9 @@ static SDValue lowerFMAXIMUM_FMINIMUM(SDValue Op, SelectionDAG &DAG,

}

unsigned Opc =

- Op.getOpcode() == ISD::FMAXIMUM ? RISCVISD::VFMAX_VL : RISCVISD::VFMIN_VL;

+ Op.getOpcode() == ISD::FMAXIMUM || Op->getOpcode() == ISD::VP_FMAXIMUM

+ ? RISCVISD::VFMAX_VL

+ : RISCVISD::VFMIN_VL;

SDValue Res = DAG.getNode(Opc, DL, ContainerVT, NewX, NewY,

DAG.getUNDEF(ContainerVT), Mask, VL);

if (VT.isFixedLengthVector())

@@ -6651,6 +6767,13 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,

!Subtarget.hasVInstructionsF16()))

return SplitVPOp(Op, DAG);

return lowerVectorFTRUNC_FCEIL_FFLOOR_FROUND(Op, DAG, Subtarget);

+ case ISD::VP_FMAXIMUM:

+ case ISD::VP_FMINIMUM:

+ if (Op.getValueType() == MVT::nxv32f16 &&

+ (Subtarget.hasVInstructionsF16Minimal() &&

+ !Subtarget.hasVInstructionsF16()))

+ return SplitVPOp(Op, DAG);

+ return lowerFMAXIMUM_FMINIMUM(Op, DAG, Subtarget);

case ISD::EXPERIMENTAL_VP_SPLICE:

return lowerVPSpliceExperimental(Op, DAG);

case ISD::EXPERIMENTAL_VP_REVERSE:

@@ -6859,6 +6982,23 @@ SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,

return LowerCallTo(CLI).first;

}

+SDValue RISCVTargetLowering::getTLSDescAddr(GlobalAddressSDNode *N,

+ SelectionDAG &DAG) const {

+ SDLoc DL(N);

+ EVT Ty = getPointerTy(DAG.getDataLayout());

+ const GlobalValue *GV = N->getGlobal();

+ // Use a PC-relative addressing mode to access the global dynamic GOT address.

+ // This generates the pattern (PseudoLA_TLSDESC sym), which expands to

+ //

+ // auipc tX, %tlsdesc_hi(symbol) // R_RISCV_TLSDESC_HI20(symbol)

+ // lw tY, tX, %tlsdesc_lo_load(label) // R_RISCV_TLSDESC_LOAD_LO12_I(label)

+ // addi a0, tX, %tlsdesc_lo_add(label) // R_RISCV_TLSDESC_ADD_LO12_I(label)

+ // jalr t0, tY // R_RISCV_TLSDESC_CALL(label)

+ SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);

+ return SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLSDESC, DL, Ty, Addr), 0);

SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,

SelectionDAG &DAG) const {

GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);

@@ -6883,7 +7023,8 @@ SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,

break;

case TLSModel::LocalDynamic:

case TLSModel::GeneralDynamic:

- Addr = getDynamicTLSAddr(N, DAG);

+ Addr = DAG.getTarget().useTLSDESC() ? getTLSDescAddr(N, DAG)

+ : getDynamicTLSAddr(N, DAG);

break;

}

@@ -8500,6 +8641,9 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,

return DAG.getNode(RISCVISD::VMERGE_VL, DL, VT, SelectCond, SplattedVal,

Vec, DAG.getUNDEF(VT), VL);

}

+ case Intrinsic::riscv_vfmv_s_f:

+ return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, Op.getSimpleValueType(),

+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));

// EGS * EEW >= 128 bits

case Intrinsic::riscv_vaesdf_vv:

case Intrinsic::riscv_vaesdf_vs:

@@ -12762,14 +12906,14 @@ static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &D

SDValue N0 = N.getOperand(0);

if (N0.getOpcode() != ISD::ZERO_EXTEND &&

N0.getOpcode() != RISCVISD::VZEXT_VL)

- return false;;

+ return false;

if (!N0->hasOneUse())

- return false;;

+ return false;

APInt ShAmt;

SDValue N1 = N.getOperand(1);

if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))

- return false;;

+ return false;

SDValue Src = N0.getOperand(0);

EVT SrcVT = Src.getValueType();

@@ -12863,9 +13007,9 @@ struct CombineResult;

/// Helper class for folding sign/zero extensions.

/// In particular, this class is used for the following combines:

-/// add_vl -> vwadd(u) | vwadd(u)_w

-/// sub_vl -> vwsub(u) | vwsub(u)_w

-/// mul_vl -> vwmul(u) | vwmul_su

+/// add | add_vl -> vwadd(u) | vwadd(u)_w

+/// sub | sub_vl -> vwsub(u) | vwsub(u)_w

+/// mul | mul_vl -> vwmul(u) | vwmul_su

///

/// An object of this class represents an operand of the operation we want to

/// combine.

@@ -12910,6 +13054,8 @@ struct NodeExtensionHelper {

/// E.g., for zext(a), this would return a.

SDValue getSource() const {

switch (OrigOperand.getOpcode()) {

+ case ISD::ZERO_EXTEND:

+ case ISD::SIGN_EXTEND:

case RISCVISD::VSEXT_VL:

case RISCVISD::VZEXT_VL:

return OrigOperand.getOperand(0);

@@ -12926,7 +13072,8 @@ struct NodeExtensionHelper {

/// Get or create a value that can feed \p Root with the given extension \p

/// SExt. If \p SExt is std::nullopt, this returns the source of this operand.

/// \see ::getSource().

- SDValue getOrCreateExtendedOp(const SDNode *Root, SelectionDAG &DAG,

+ SDValue getOrCreateExtendedOp(SDNode *Root, SelectionDAG &DAG,

+ const RISCVSubtarget &Subtarget,

std::optional<bool> SExt) const {

if (!SExt.has_value())

return OrigOperand;

@@ -12941,8 +13088,10 @@ struct NodeExtensionHelper {

// If we need an extension, we should be changing the type.

SDLoc DL(Root);

- auto [Mask, VL] = getMaskAndVL(Root);

+ auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);

switch (OrigOperand.getOpcode()) {

+ case ISD::ZERO_EXTEND:

+ case ISD::SIGN_EXTEND:

case RISCVISD::VSEXT_VL:

case RISCVISD::VZEXT_VL:

return DAG.getNode(ExtOpc, DL, NarrowVT, Source, Mask, VL);

@@ -12982,12 +13131,15 @@ struct NodeExtensionHelper {

/// \pre \p Opcode represents a supported root (\see ::isSupportedRoot()).

static unsigned getSameExtensionOpcode(unsigned Opcode, bool IsSExt) {

switch (Opcode) {

+ case ISD::ADD:

case RISCVISD::ADD_VL:

case RISCVISD::VWADD_W_VL:

case RISCVISD::VWADDU_W_VL:

return IsSExt ? RISCVISD::VWADD_VL : RISCVISD::VWADDU_VL;

+ case ISD::MUL:

case RISCVISD::MUL_VL:

return IsSExt ? RISCVISD::VWMUL_VL : RISCVISD::VWMULU_VL;

+ case ISD::SUB:

case RISCVISD::SUB_VL:

case RISCVISD::VWSUB_W_VL:

case RISCVISD::VWSUBU_W_VL:

@@ -13000,7 +13152,8 @@ struct NodeExtensionHelper {

/// Get the opcode to materialize \p Opcode(sext(a), zext(b)) ->

/// newOpcode(a, b).

static unsigned getSUOpcode(unsigned Opcode) {

- assert(Opcode == RISCVISD::MUL_VL && "SU is only supported for MUL");

+ assert((Opcode == RISCVISD::MUL_VL || Opcode == ISD::MUL) &&

+ "SU is only supported for MUL");

return RISCVISD::VWMULSU_VL;

}

@@ -13008,8 +13161,10 @@ struct NodeExtensionHelper {

/// newOpcode(a, b).

static unsigned getWOpcode(unsigned Opcode, bool IsSExt) {

switch (Opcode) {

+ case ISD::ADD:

case RISCVISD::ADD_VL:

return IsSExt ? RISCVISD::VWADD_W_VL : RISCVISD::VWADDU_W_VL;

+ case ISD::SUB:

case RISCVISD::SUB_VL:

return IsSExt ? RISCVISD::VWSUB_W_VL : RISCVISD::VWSUBU_W_VL;

default:

@@ -13019,19 +13174,49 @@ struct NodeExtensionHelper {

using CombineToTry = std::function<std::optional<CombineResult>(

SDNode * /*Root*/, const NodeExtensionHelper & /*LHS*/,

- const NodeExtensionHelper & /*RHS*/)>;

+ const NodeExtensionHelper & /*RHS*/, SelectionDAG &,

+ const RISCVSubtarget &)>;

/// Check if this node needs to be fully folded or extended for all users.

bool needToPromoteOtherUsers() const { return EnforceOneUse; }

/// Helper method to set the various fields of this struct based on the

/// type of \p Root.

- void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG) {

+ void fillUpExtensionSupport(SDNode *Root, SelectionDAG &DAG,

+ const RISCVSubtarget &Subtarget) {

SupportsZExt = false;

SupportsSExt = false;

EnforceOneUse = true;

CheckMask = true;

- switch (OrigOperand.getOpcode()) {

+ unsigned Opc = OrigOperand.getOpcode();

+ switch (Opc) {

+ case ISD::ZERO_EXTEND:

+ case ISD::SIGN_EXTEND: {

+ MVT VT = OrigOperand.getSimpleValueType();

+ if (!VT.isVector())

+ break;

+ SDValue NarrowElt = OrigOperand.getOperand(0);

+ MVT NarrowVT = NarrowElt.getSimpleValueType();

+ unsigned ScalarBits = VT.getScalarSizeInBits();

+ unsigned NarrowScalarBits = NarrowVT.getScalarSizeInBits();

+ // Ensure the narrowing element type is legal

+ if (!Subtarget.getTargetLowering()->isTypeLegal(NarrowElt.getValueType()))

+ break;

+ // Ensure the extension's semantic is equivalent to rvv vzext or vsext.

+ if (ScalarBits != NarrowScalarBits * 2)

+ break;

+ SupportsZExt = Opc == ISD::ZERO_EXTEND;

+ SupportsSExt = Opc == ISD::SIGN_EXTEND;

+ SDLoc DL(Root);

+ std::tie(Mask, VL) = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);

+ break;

+ }

case RISCVISD::VZEXT_VL:

SupportsZExt = true;

Mask = OrigOperand.getOperand(1);

@@ -13087,8 +13272,16 @@ struct NodeExtensionHelper {

}

/// Check if \p Root supports any extension folding combines.

- static bool isSupportedRoot(const SDNode *Root) {

+ static bool isSupportedRoot(const SDNode *Root, const SelectionDAG &DAG) {

switch (Root->getOpcode()) {

+ case ISD::ADD:

+ case ISD::SUB:

+ case ISD::MUL: {

+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();

+ if (!TLI.isTypeLegal(Root->getValueType(0)))

+ return false;

+ return Root->getValueType(0).isScalableVector();

+ }

case RISCVISD::ADD_VL:

case RISCVISD::MUL_VL:

case RISCVISD::VWADD_W_VL:

@@ -13103,9 +13296,10 @@ struct NodeExtensionHelper {

}

/// Build a NodeExtensionHelper for \p Root.getOperand(\p OperandIdx).

- NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG) {

- assert(isSupportedRoot(Root) && "Trying to build an helper with an "

- "unsupported root");

+ NodeExtensionHelper(SDNode *Root, unsigned OperandIdx, SelectionDAG &DAG,

+ const RISCVSubtarget &Subtarget) {

+ assert(isSupportedRoot(Root, DAG) && "Trying to build an helper with an "

+ "unsupported root");

assert(OperandIdx < 2 && "Requesting something else than LHS or RHS");

OrigOperand = Root->getOperand(OperandIdx);

@@ -13121,7 +13315,7 @@ struct NodeExtensionHelper {

SupportsZExt =

Opc == RISCVISD::VWADDU_W_VL || Opc == RISCVISD::VWSUBU_W_VL;

SupportsSExt = !SupportsZExt;

- std::tie(Mask, VL) = getMaskAndVL(Root);

+ std::tie(Mask, VL) = getMaskAndVL(Root, DAG, Subtarget);

CheckMask = true;

// There's no existing extension here, so we don't have to worry about

// making sure it gets removed.

@@ -13130,7 +13324,7 @@ struct NodeExtensionHelper {

}

[[fallthrough]];

default:

- fillUpExtensionSupport(Root, DAG);

+ fillUpExtensionSupport(Root, DAG, Subtarget);

break;

}

@@ -13146,14 +13340,27 @@ struct NodeExtensionHelper {

}

/// Helper function to get the Mask and VL from \p Root.

- static std::pair<SDValue, SDValue> getMaskAndVL(const SDNode *Root) {

- assert(isSupportedRoot(Root) && "Unexpected root");

- return std::make_pair(Root->getOperand(3), Root->getOperand(4));

+ static std::pair<SDValue, SDValue>

+ getMaskAndVL(const SDNode *Root, SelectionDAG &DAG,

+ const RISCVSubtarget &Subtarget) {

+ assert(isSupportedRoot(Root, DAG) && "Unexpected root");

+ switch (Root->getOpcode()) {

+ case ISD::ADD:

+ case ISD::SUB:

+ case ISD::MUL: {

+ SDLoc DL(Root);

+ MVT VT = Root->getSimpleValueType(0);

+ return getDefaultScalableVLOps(VT, DL, DAG, Subtarget);

+ }

+ default:

+ return std::make_pair(Root->getOperand(3), Root->getOperand(4));

+ }

}

/// Check if the Mask and VL of this operand are compatible with \p Root.

- bool areVLAndMaskCompatible(const SDNode *Root) const {

- auto [Mask, VL] = getMaskAndVL(Root);

+ bool areVLAndMaskCompatible(SDNode *Root, SelectionDAG &DAG,

+ const RISCVSubtarget &Subtarget) const {

+ auto [Mask, VL] = getMaskAndVL(Root, DAG, Subtarget);

return isMaskCompatible(Mask) && isVLCompatible(VL);

}

@@ -13161,11 +13368,14 @@ struct NodeExtensionHelper {

/// foldings that are supported by this class.

static bool isCommutative(const SDNode *N) {

switch (N->getOpcode()) {

+ case ISD::ADD:

+ case ISD::MUL:

case RISCVISD::ADD_VL:

case RISCVISD::MUL_VL:

case RISCVISD::VWADD_W_VL:

case RISCVISD::VWADDU_W_VL:

return true;

+ case ISD::SUB:

case RISCVISD::SUB_VL:

case RISCVISD::VWSUB_W_VL:

case RISCVISD::VWSUBU_W_VL:

@@ -13210,14 +13420,25 @@ struct CombineResult {

/// Return a value that uses TargetOpcode and that can be used to replace

/// Root.

/// The actual replacement is *not* done in that method.

- SDValue materialize(SelectionDAG &DAG) const {

+ SDValue materialize(SelectionDAG &DAG,

+ const RISCVSubtarget &Subtarget) const {

SDValue Mask, VL, Merge;

- std::tie(Mask, VL) = NodeExtensionHelper::getMaskAndVL(Root);

- Merge = Root->getOperand(2);

+ std::tie(Mask, VL) =

+ NodeExtensionHelper::getMaskAndVL(Root, DAG, Subtarget);

+ switch (Root->getOpcode()) {

+ default:

+ Merge = Root->getOperand(2);

+ break;

+ case ISD::ADD:

+ case ISD::SUB:

+ case ISD::MUL:

+ Merge = DAG.getUNDEF(Root->getValueType(0));

+ break;

+ }

return DAG.getNode(TargetOpcode, SDLoc(Root), Root->getValueType(0),

- LHS.getOrCreateExtendedOp(Root, DAG, SExtLHS),

- RHS.getOrCreateExtendedOp(Root, DAG, SExtRHS), Merge,

- Mask, VL);

+ LHS.getOrCreateExtendedOp(Root, DAG, Subtarget, SExtLHS),

+ RHS.getOrCreateExtendedOp(Root, DAG, Subtarget, SExtRHS),

+ Merge, Mask, VL);

}

};

@@ -13234,15 +13455,16 @@ struct CombineResult {

static std::optional<CombineResult>

canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,

const NodeExtensionHelper &RHS, bool AllowSExt,

- bool AllowZExt) {

+ bool AllowZExt, SelectionDAG &DAG,

+ const RISCVSubtarget &Subtarget) {

assert((AllowSExt || AllowZExt) && "Forgot to set what you want?");

- if (!LHS.areVLAndMaskCompatible(Root) || !RHS.areVLAndMaskCompatible(Root))

+ if (!LHS.areVLAndMaskCompatible(Root, DAG, Subtarget) ||

+ !RHS.areVLAndMaskCompatible(Root, DAG, Subtarget))

return std::nullopt;

if (AllowZExt && LHS.SupportsZExt && RHS.SupportsZExt)

return CombineResult(NodeExtensionHelper::getSameExtensionOpcode(

Root->getOpcode(), /*IsSExt=*/false),

- Root, LHS, /*SExtLHS=*/false, RHS,

- /*SExtRHS=*/false);

+ Root, LHS, /*SExtLHS=*/false, RHS, /*SExtRHS=*/false);

if (AllowSExt && LHS.SupportsSExt && RHS.SupportsSExt)

return CombineResult(NodeExtensionHelper::getSameExtensionOpcode(

Root->getOpcode(), /*IsSExt=*/true),

@@ -13259,9 +13481,10 @@ canFoldToVWWithSameExtensionImpl(SDNode *Root, const NodeExtensionHelper &LHS,

/// can be used to apply the pattern.

static std::optional<CombineResult>

canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,

- const NodeExtensionHelper &RHS) {

+ const NodeExtensionHelper &RHS, SelectionDAG &DAG,

+ const RISCVSubtarget &Subtarget) {

return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/true,

- /*AllowZExt=*/true);

+ /*AllowZExt=*/true, DAG, Subtarget);

}

/// Check if \p Root follows a pattern Root(LHS, ext(RHS))

@@ -13270,8 +13493,9 @@ canFoldToVWWithSameExtension(SDNode *Root, const NodeExtensionHelper &LHS,

/// can be used to apply the pattern.

static std::optional<CombineResult>

canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,

- const NodeExtensionHelper &RHS) {

- if (!RHS.areVLAndMaskCompatible(Root))

+ const NodeExtensionHelper &RHS, SelectionDAG &DAG,

+ const RISCVSubtarget &Subtarget) {

+ if (!RHS.areVLAndMaskCompatible(Root, DAG, Subtarget))

return std::nullopt;

// FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar

@@ -13295,9 +13519,10 @@ canFoldToVW_W(SDNode *Root, const NodeExtensionHelper &LHS,

/// can be used to apply the pattern.

static std::optional<CombineResult>

canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,

- const NodeExtensionHelper &RHS) {

+ const NodeExtensionHelper &RHS, SelectionDAG &DAG,

+ const RISCVSubtarget &Subtarget) {

return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/true,

- /*AllowZExt=*/false);

+ /*AllowZExt=*/false, DAG, Subtarget);

}

/// Check if \p Root follows a pattern Root(zext(LHS), zext(RHS))

@@ -13306,9 +13531,10 @@ canFoldToVWWithSEXT(SDNode *Root, const NodeExtensionHelper &LHS,

/// can be used to apply the pattern.

static std::optional<CombineResult>

canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,

- const NodeExtensionHelper &RHS) {

+ const NodeExtensionHelper &RHS, SelectionDAG &DAG,

+ const RISCVSubtarget &Subtarget) {

return canFoldToVWWithSameExtensionImpl(Root, LHS, RHS, /*AllowSExt=*/false,

- /*AllowZExt=*/true);

+ /*AllowZExt=*/true, DAG, Subtarget);

}

/// Check if \p Root follows a pattern Root(sext(LHS), zext(RHS))

@@ -13317,10 +13543,13 @@ canFoldToVWWithZEXT(SDNode *Root, const NodeExtensionHelper &LHS,

/// can be used to apply the pattern.

static std::optional<CombineResult>

canFoldToVW_SU(SDNode *Root, const NodeExtensionHelper &LHS,

- const NodeExtensionHelper &RHS) {

+ const NodeExtensionHelper &RHS, SelectionDAG &DAG,

+ const RISCVSubtarget &Subtarget) {

if (!LHS.SupportsSExt || !RHS.SupportsZExt)

return std::nullopt;

- if (!LHS.areVLAndMaskCompatible(Root) || !RHS.areVLAndMaskCompatible(Root))

+ if (!LHS.areVLAndMaskCompatible(Root, DAG, Subtarget) ||

+ !RHS.areVLAndMaskCompatible(Root, DAG, Subtarget))

return std::nullopt;

return CombineResult(NodeExtensionHelper::getSUOpcode(Root->getOpcode()),

Root, LHS, /*SExtLHS=*/true, RHS, /*SExtRHS=*/false);

@@ -13330,6 +13559,8 @@ SmallVector<NodeExtensionHelper::CombineToTry>

NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {

SmallVector<CombineToTry> Strategies;

switch (Root->getOpcode()) {

+ case ISD::ADD:

+ case ISD::SUB:

case RISCVISD::ADD_VL:

case RISCVISD::SUB_VL:

// add|sub -> vwadd(u)|vwsub(u)

@@ -13337,6 +13568,7 @@ NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {

// add|sub -> vwadd(u)_w|vwsub(u)_w

Strategies.push_back(canFoldToVW_W);

break;

+ case ISD::MUL:

case RISCVISD::MUL_VL:

// mul -> vwmul(u)

Strategies.push_back(canFoldToVWWithSameExtension);

@@ -13367,12 +13599,14 @@ NodeExtensionHelper::getSupportedFoldings(const SDNode *Root) {

/// mul_vl -> vwmul(u) | vwmul_su

/// vwadd_w(u) -> vwadd(u)

/// vwub_w(u) -> vwadd(u)

-static SDValue

-combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {

+static SDValue combineBinOp_VLToVWBinOp_VL(SDNode *N,

+ TargetLowering::DAGCombinerInfo &DCI,

+ const RISCVSubtarget &Subtarget) {

SelectionDAG &DAG = DCI.DAG;

- assert(NodeExtensionHelper::isSupportedRoot(N) &&

- "Shouldn't have called this method");

+ if (!NodeExtensionHelper::isSupportedRoot(N, DAG))

+ return SDValue();

SmallVector<SDNode *> Worklist;

SmallSet<SDNode *, 8> Inserted;

Worklist.push_back(N);

@@ -13381,11 +13615,11 @@ combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {

while (!Worklist.empty()) {

SDNode *Root = Worklist.pop_back_val();

- if (!NodeExtensionHelper::isSupportedRoot(Root))

+ if (!NodeExtensionHelper::isSupportedRoot(Root, DAG))

return SDValue();

- NodeExtensionHelper LHS(N, 0, DAG);

- NodeExtensionHelper RHS(N, 1, DAG);

+ NodeExtensionHelper LHS(N, 0, DAG, Subtarget);

+ NodeExtensionHelper RHS(N, 1, DAG, Subtarget);

auto AppendUsersIfNeeded = [&Worklist,

&Inserted](const NodeExtensionHelper &Op) {

if (Op.needToPromoteOtherUsers()) {

@@ -13412,7 +13646,8 @@ combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {

for (NodeExtensionHelper::CombineToTry FoldingStrategy :

FoldingStrategies) {

- std::optional<CombineResult> Res = FoldingStrategy(N, LHS, RHS);

+ std::optional<CombineResult> Res =

+ FoldingStrategy(N, LHS, RHS, DAG, Subtarget);

if (Res) {

Matched = true;

CombinesToApply.push_back(*Res);

@@ -13441,7 +13676,7 @@ combineBinOp_VLToVWBinOp_VL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {

SmallVector<std::pair<SDValue, SDValue>> ValuesToReplace;

ValuesToReplace.reserve(CombinesToApply.size());

for (CombineResult Res : CombinesToApply) {

- SDValue NewValue = Res.materialize(DAG);

+ SDValue NewValue = Res.materialize(DAG, Subtarget);

if (!InputRootReplacement) {

assert(Res.Root == N &&

"First element is expected to be the current node");

@@ -14713,13 +14948,20 @@ static SDValue performCONCAT_VECTORSCombine(SDNode *N, SelectionDAG &DAG,

static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG,

const RISCVSubtarget &Subtarget) {

- assert(N->getOpcode() == RISCVISD::ADD_VL);

+ assert(N->getOpcode() == RISCVISD::ADD_VL || N->getOpcode() == ISD::ADD);

+ if (N->getValueType(0).isFixedLengthVector())

+ return SDValue();

SDValue Addend = N->getOperand(0);

SDValue MulOp = N->getOperand(1);

- SDValue AddMergeOp = N->getOperand(2);

- if (!AddMergeOp.isUndef())

- return SDValue();

+ if (N->getOpcode() == RISCVISD::ADD_VL) {

+ SDValue AddMergeOp = N->getOperand(2);

+ if (!AddMergeOp.isUndef())

+ return SDValue();

+ }

auto IsVWMulOpc = [](unsigned Opc) {

switch (Opc) {

@@ -14743,8 +14985,16 @@ static SDValue combineToVWMACC(SDNode *N, SelectionDAG &DAG,

if (!MulMergeOp.isUndef())

return SDValue();

- SDValue AddMask = N->getOperand(3);

- SDValue AddVL = N->getOperand(4);

+ auto [AddMask, AddVL] = [](SDNode *N, SelectionDAG &DAG,

+ const RISCVSubtarget &Subtarget) {

+ if (N->getOpcode() == ISD::ADD) {

+ SDLoc DL(N);

+ return getDefaultScalableVLOps(N->getSimpleValueType(0), DL, DAG,

+ Subtarget);

+ }

+ return std::make_pair(N->getOperand(3), N->getOperand(4));

+ }(N, DAG, Subtarget);

SDValue MulMask = MulOp.getOperand(3);

SDValue MulVL = MulOp.getOperand(4);

@@ -15010,10 +15260,18 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,

return DAG.getNode(ISD::AND, DL, VT, NewFMV,

DAG.getConstant(~SignBit, DL, VT));

}

- case ISD::ADD:

+ case ISD::ADD: {

+ if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))

+ return V;

+ if (SDValue V = combineToVWMACC(N, DAG, Subtarget))

+ return V;

return performADDCombine(N, DAG, Subtarget);

- case ISD::SUB:

+ }

+ case ISD::SUB: {

+ if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))

+ return V;

return performSUBCombine(N, DAG, Subtarget);

+ }

case ISD::AND:

return performANDCombine(N, DCI, Subtarget);

case ISD::OR:

@@ -15021,6 +15279,8 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,

case ISD::XOR:

return performXORCombine(N, DAG, Subtarget);

case ISD::MUL:

+ if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))

+ return V;

return performMULCombine(N, DAG);

case ISD::FADD:

case ISD::UMAX:

@@ -15497,7 +15757,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,

break;

}

case RISCVISD::ADD_VL:

- if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI))

+ if (SDValue V = combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget))

return V;

return combineToVWMACC(N, DAG, Subtarget);

case RISCVISD::SUB_VL:

@@ -15506,7 +15766,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,

case RISCVISD::VWSUB_W_VL:

case RISCVISD::VWSUBU_W_VL:

case RISCVISD::MUL_VL:

- return combineBinOp_VLToVWBinOp_VL(N, DCI);

+ return combineBinOp_VLToVWBinOp_VL(N, DCI, Subtarget);

case RISCVISD::VFMADD_VL:

case RISCVISD::VFNMADD_VL:

case RISCVISD::VFMSUB_VL:

@@ -15729,6 +15989,17 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,

break;

}

+ case RISCVISD::VMV_X_S: {

+ SDValue Vec = N->getOperand(0);

+ MVT VecVT = N->getOperand(0).getSimpleValueType();

+ const MVT M1VT = getLMUL1VT(VecVT);

+ if (M1VT.bitsLT(VecVT)) {

+ Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, M1VT, Vec,

+ DAG.getVectorIdxConstant(0, DL));

+ return DAG.getNode(RISCVISD::VMV_X_S, DL, N->getSimpleValueType(0), Vec);

+ }

+ break;

+ }

case ISD::INTRINSIC_VOID:

case ISD::INTRINSIC_W_CHAIN:

case ISD::INTRINSIC_WO_CHAIN: {

@@ -17047,12 +17318,39 @@ static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4,

RISCV::V20M4};

static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8};

-ArrayRef<MCPhysReg> RISCV::getArgGPRs() {

- static const MCPhysReg ArgGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,

- RISCV::X13, RISCV::X14, RISCV::X15,

- RISCV::X16, RISCV::X17};

+ArrayRef<MCPhysReg> RISCV::getArgGPRs(const RISCVABI::ABI ABI) {

+ // The GPRs used for passing arguments in the ILP32* and LP64* ABIs, except

+ // the ILP32E ABI.

+ static const MCPhysReg ArgIGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,

+ RISCV::X13, RISCV::X14, RISCV::X15,

+ RISCV::X16, RISCV::X17};

+ // The GPRs used for passing arguments in the ILP32E/ILP64E ABI.

+ static const MCPhysReg ArgEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,

+ RISCV::X13, RISCV::X14, RISCV::X15};

+ if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)

+ return ArrayRef(ArgEGPRs);

- return ArrayRef(ArgGPRs);

+ return ArrayRef(ArgIGPRs);

+static ArrayRef<MCPhysReg> getFastCCArgGPRs(const RISCVABI::ABI ABI) {

+ // The GPRs used for passing arguments in the FastCC, X5 and X6 might be used

+ // for save-restore libcall, so we don't use them.

+ static const MCPhysReg FastCCIGPRs[] = {

+ RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,

+ RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28,

+ RISCV::X29, RISCV::X30, RISCV::X31};

+ // The GPRs used for passing arguments in the FastCC when using ILP32E/ILP64E.

+ static const MCPhysReg FastCCEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,

+ RISCV::X13, RISCV::X14, RISCV::X15,

+ RISCV::X7};

+ if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)

+ return ArrayRef(FastCCEGPRs);

+ return ArrayRef(FastCCIGPRs);

}

// Pass a 2*XLEN argument that has been split into two XLEN values through

@@ -17060,17 +17358,23 @@ ArrayRef<MCPhysReg> RISCV::getArgGPRs() {

static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,

ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,

MVT ValVT2, MVT LocVT2,

- ISD::ArgFlagsTy ArgFlags2) {

+ ISD::ArgFlagsTy ArgFlags2, bool EABI) {

unsigned XLenInBytes = XLen / 8;

- ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs();

+ const RISCVSubtarget &STI =

+ State.getMachineFunction().getSubtarget<RISCVSubtarget>();

+ ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs(STI.getTargetABI());

if (Register Reg = State.AllocateReg(ArgGPRs)) {

// At least one half can be passed via register.

State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,

VA1.getLocVT(), CCValAssign::Full));

} else {

// Both halves must be passed on the stack, with proper alignment.

- Align StackAlign =

- std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign());

+ // TODO: To be compatible with GCC's behaviors, we force them to have 4-byte

+ // alignment. This behavior may be changed when RV32E/ILP32E is ratified.

+ Align StackAlign(XLenInBytes);

+ if (!EABI || XLen != 32)

+ StackAlign = std::max(StackAlign, ArgFlags1.getNonZeroOrigAlign());

State.addLoc(

CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),

State.AllocateStack(XLenInBytes, StackAlign),

@@ -17151,7 +17455,9 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,

default:

llvm_unreachable("Unexpected ABI");

case RISCVABI::ABI_ILP32:

+ case RISCVABI::ABI_ILP32E:

case RISCVABI::ABI_LP64:

+ case RISCVABI::ABI_LP64E:

break;

case RISCVABI::ABI_ILP32F:

case RISCVABI::ABI_LP64F:

@@ -17183,7 +17489,7 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,

LocInfo = CCValAssign::BCvt;

}

- ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs();

+ ArrayRef<MCPhysReg> ArgGPRs = RISCV::getArgGPRs(ABI);

// If this is a variadic argument, the RISC-V calling convention requires

// that it is assigned an 'even' or 'aligned' register if it has 8-byte

@@ -17192,9 +17498,13 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,

// legalisation or not. The argument will not be passed by registers if the

// original type is larger than 2*XLEN, so the register alignment rule does

// not apply.

+ // TODO: To be compatible with GCC's behaviors, we don't align registers

+ // currently if we are using ILP32E calling convention. This behavior may be

+ // changed when RV32E/ILP32E is ratified.

unsigned TwoXLenInBytes = (2 * XLen) / 8;

if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&

- DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) {

+ DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes &&

+ ABI != RISCVABI::ABI_ILP32E) {

unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);

// Skip 'odd' register if necessary.

if (RegIdx != std::size(ArgGPRs) && RegIdx % 2 == 1)

@@ -17267,8 +17577,9 @@ bool RISCV::CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,

ISD::ArgFlagsTy AF = PendingArgFlags[0];

PendingLocs.clear();

PendingArgFlags.clear();

- return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT,

- ArgFlags);

+ return CC_RISCVAssign2XLen(

+ XLen, State, VA, AF, ValNo, ValVT, LocVT, ArgFlags,

+ ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E);

}

// Allocate to a register if possible, or else a stack slot.

@@ -17594,15 +17905,8 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,

bool IsFixed, bool IsRet, Type *OrigTy,

const RISCVTargetLowering &TLI,

std::optional<unsigned> FirstMaskArgument) {

- // X5 and X6 might be used for save-restore libcall.

- static const MCPhysReg GPRList[] = {

- RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,

- RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28,

- RISCV::X29, RISCV::X30, RISCV::X31};

if (LocVT == MVT::i32 || LocVT == MVT::i64) {

- if (unsigned Reg = State.AllocateReg(GPRList)) {

+ if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {

State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));

return false;

}

@@ -17653,7 +17957,7 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,

(LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||

(LocVT == MVT::f64 && Subtarget.is64Bit() &&

Subtarget.hasStdExtZdinx())) {

- if (unsigned Reg = State.AllocateReg(GPRList)) {

+ if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {

State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));

return false;

}

@@ -17687,7 +17991,7 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,

State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));

} else {

// Try and pass the address via a "fast" GPR.

- if (unsigned GPRReg = State.AllocateReg(GPRList)) {

+ if (unsigned GPRReg = State.AllocateReg(getFastCCArgGPRs(ABI))) {

LocInfo = CCValAssign::Indirect;

LocVT = TLI.getSubtarget().getXLenVT();

State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo));

@@ -17788,6 +18092,8 @@ SDValue RISCVTargetLowering::LowerFormalArguments(

case CallingConv::GRAAL:

break;

case CallingConv::GHC:

+ if (Subtarget.isRVE())

+ report_fatal_error("GHC calling convention is not supported on RVE!");

if (!Subtarget.hasStdExtFOrZfinx() || !Subtarget.hasStdExtDOrZdinx())

report_fatal_error("GHC calling convention requires the (Zfinx/F) and "

"(Zdinx/D) instruction set extensions");

@@ -17870,7 +18176,7 @@ SDValue RISCVTargetLowering::LowerFormalArguments(

MF.getInfo<RISCVMachineFunctionInfo>()->setIsVectorCall();

if (IsVarArg) {

- ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs();

+ ArrayRef<MCPhysReg> ArgRegs = RISCV::getArgGPRs(Subtarget.getTargetABI());

unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);

const TargetRegisterClass *RC = &RISCV::GPRRegClass;

MachineFrameInfo &MFI = MF.getFrameInfo();

@@ -18023,9 +18329,11 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,

SmallVector<CCValAssign, 16> ArgLocs;

CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());

- if (CallConv == CallingConv::GHC)

+ if (CallConv == CallingConv::GHC) {

+ if (Subtarget.isRVE())

+ report_fatal_error("GHC calling convention is not supported on RVE!");

ArgCCInfo.AnalyzeCallOperands(Outs, RISCV::CC_RISCV_GHC);

- else

+ } else

analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI,

CallConv == CallingConv::Fast ? RISCV::CC_RISCV_FastCC

: RISCV::CC_RISCV);