Vendor import of llvm-project main llvmorg-15-init-15358-g53dc0f107877. - src

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2022-07-03 14:10:23 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2022-07-03 14:10:23 +0000
commit	145449b1e420787bb99721a429341fa6be3adfb6 (patch)
tree	1d56ae694a6de602e348dd80165cf881a36600ed /llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
parent	ecbca9f5fb7d7613d2b94982c4825eb0d33d6842 (diff)

vendor/llvm-project/llvmorg-15-init-15358-g53dc0f107877

Diffstat (limited to 'llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp')

-rw-r--r--

llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

970

1 files changed, 683 insertions, 287 deletions

diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 6f77428ae721..cfaafc7b53d2 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp

@@ -37,6 +37,7 @@ namespace RISCV {

#define GET_RISCVVSETable_IMPL

#define GET_RISCVVLXTable_IMPL

#define GET_RISCVVSXTable_IMPL

+#define GET_RISCVMaskedPseudosTable_IMPL

#include "RISCVGenSearchableTables.inc"

} // namespace RISCV

} // namespace llvm

@@ -47,17 +48,36 @@ void RISCVDAGToDAGISel::PreprocessISelDAG() {

I != E;) {

SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.

+ // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point

+ // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.

+ if (N->getOpcode() == ISD::SPLAT_VECTOR) {

+ MVT VT = N->getSimpleValueType(0);

+ unsigned Opc =

+ VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL;

+ SDLoc DL(N);

+ SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());

+ SDValue Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT),

+ N->getOperand(0), VL);

+ --I;

+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);

+ ++I;

+ CurDAG->DeleteNode(N);

+ continue;

+ }

// Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector

// load. Done after lowering and combining so that we have a chance to

// optimize this to VMV_V_X_VL when the upper bits aren't needed.

if (N->getOpcode() != RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL)

continue;

- assert(N->getNumOperands() == 3 && "Unexpected number of operands");

+ assert(N->getNumOperands() == 4 && "Unexpected number of operands");

MVT VT = N->getSimpleValueType(0);

- SDValue Lo = N->getOperand(0);

- SDValue Hi = N->getOperand(1);

- SDValue VL = N->getOperand(2);

+ SDValue Passthru = N->getOperand(0);

+ SDValue Lo = N->getOperand(1);

+ SDValue Hi = N->getOperand(2);

+ SDValue VL = N->getOperand(3);

assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&

Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&

"Unexpected VTs!");

@@ -88,7 +108,7 @@ void RISCVDAGToDAGISel::PreprocessISelDAG() {

CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);

SDValue Ops[] = {Chain,

IntID,

- CurDAG->getUNDEF(VT),

+ Passthru,

StackSlot,

CurDAG->getRegister(RISCV::X0, MVT::i64),

VL};

@@ -112,6 +132,7 @@ void RISCVDAGToDAGISel::PreprocessISelDAG() {

}

void RISCVDAGToDAGISel::PostprocessISelDAG() {

+ HandleSDNode Dummy(CurDAG->getRoot());

SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();

bool MadeChange = false;

@@ -123,57 +144,70 @@ void RISCVDAGToDAGISel::PostprocessISelDAG() {

MadeChange |= doPeepholeSExtW(N);

MadeChange |= doPeepholeLoadStoreADDI(N);

+ MadeChange |= doPeepholeMaskedRVV(N);

}

+ CurDAG->setRoot(Dummy.getValue());

if (MadeChange)

CurDAG->RemoveDeadNodes();

}

-static SDNode *selectImmWithConstantPool(SelectionDAG *CurDAG, const SDLoc &DL,

- const MVT VT, int64_t Imm,

- const RISCVSubtarget &Subtarget) {

- assert(VT == MVT::i64 && "Expecting MVT::i64");

- const RISCVTargetLowering *TLI = Subtarget.getTargetLowering();

- ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(CurDAG->getConstantPool(

- ConstantInt::get(EVT(VT).getTypeForEVT(*CurDAG->getContext()), Imm), VT));

- SDValue Addr = TLI->getAddr(CP, *CurDAG);

- SDValue Offset = CurDAG->getTargetConstant(0, DL, VT);

- // Since there is no data race, the chain can be the entry node.

- SDNode *Load = CurDAG->getMachineNode(RISCV::LD, DL, VT, Addr, Offset,

- CurDAG->getEntryNode());

- MachineFunction &MF = CurDAG->getMachineFunction();

- MachineMemOperand *MemOp = MF.getMachineMemOperand(

- MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad,

- LLT(VT), CP->getAlign());

- CurDAG->setNodeMemRefs(cast<MachineSDNode>(Load), {MemOp});

- return Load;

-static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,

- int64_t Imm, const RISCVSubtarget &Subtarget) {

- MVT XLenVT = Subtarget.getXLenVT();

- RISCVMatInt::InstSeq Seq =

- RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits());

+// Returns true if N is a MachineSDNode that has a reg and simm12 memory

+// operand. The indices of the base pointer and offset are returned in BaseOpIdx

+// and OffsetOpIdx.

+static bool hasMemOffset(SDNode *N, unsigned &BaseOpIdx,

+ unsigned &OffsetOpIdx) {

+ switch (N->getMachineOpcode()) {

+ case RISCV::LB:

+ case RISCV::LH:

+ case RISCV::LW:

+ case RISCV::LBU:

+ case RISCV::LHU:

+ case RISCV::LWU:

+ case RISCV::LD:

+ case RISCV::FLH:

+ case RISCV::FLW:

+ case RISCV::FLD:

+ BaseOpIdx = 0;

+ OffsetOpIdx = 1;

+ return true;

+ case RISCV::SB:

+ case RISCV::SH:

+ case RISCV::SW:

+ case RISCV::SD:

+ case RISCV::FSH:

+ case RISCV::FSW:

+ case RISCV::FSD:

+ BaseOpIdx = 1;

+ OffsetOpIdx = 2;

+ return true;

+ }

- // If Imm is expensive to build, then we put it into constant pool.

- if (Subtarget.useConstantPoolForLargeInts() &&

- Seq.size() > Subtarget.getMaxBuildIntsCost())

- return selectImmWithConstantPool(CurDAG, DL, VT, Imm, Subtarget);

+ return false;

+static SDNode *selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,

+ RISCVMatInt::InstSeq &Seq) {

SDNode *Result = nullptr;

- SDValue SrcReg = CurDAG->getRegister(RISCV::X0, XLenVT);

+ SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT);

for (RISCVMatInt::Inst &Inst : Seq) {

- SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, XLenVT);

- if (Inst.Opc == RISCV::LUI)

- Result = CurDAG->getMachineNode(RISCV::LUI, DL, XLenVT, SDImm);

- else if (Inst.Opc == RISCV::ADD_UW)

- Result = CurDAG->getMachineNode(RISCV::ADD_UW, DL, XLenVT, SrcReg,

- CurDAG->getRegister(RISCV::X0, XLenVT));

- else if (Inst.Opc == RISCV::SH1ADD || Inst.Opc == RISCV::SH2ADD ||

- Inst.Opc == RISCV::SH3ADD)

- Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SrcReg, SrcReg);

- else

- Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SrcReg, SDImm);

+ SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, VT);

+ switch (Inst.getOpndKind()) {

+ case RISCVMatInt::Imm:

+ Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SDImm);

+ break;

+ case RISCVMatInt::RegX0:

+ Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SrcReg,

+ CurDAG->getRegister(RISCV::X0, VT));

+ break;

+ case RISCVMatInt::RegReg:

+ Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SrcReg, SrcReg);

+ break;

+ case RISCVMatInt::RegImm:

+ Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SrcReg, SDImm);

+ break;

+ }

// Only the first instruction has X0 as its source.

SrcReg = SDValue(Result, 0);

@@ -182,51 +216,28 @@ static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,

return Result;

}

-static SDValue createTupleImpl(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,

- unsigned RegClassID, unsigned SubReg0) {

- assert(Regs.size() >= 2 && Regs.size() <= 8);

- SDLoc DL(Regs[0]);

- SmallVector<SDValue, 8> Ops;

- Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32));

+static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,

+ int64_t Imm, const RISCVSubtarget &Subtarget) {

+ RISCVMatInt::InstSeq Seq =

+ RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits());

- for (unsigned I = 0; I < Regs.size(); ++I) {

- Ops.push_back(Regs[I]);

- Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32));

- }

- SDNode *N =

- CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);

- return SDValue(N, 0);

+ return selectImmSeq(CurDAG, DL, VT, Seq);

}

-static SDValue createM1Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,

- unsigned NF) {

- static const unsigned RegClassIDs[] = {

+static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,

+ unsigned NF, RISCVII::VLMUL LMUL) {

+ static const unsigned M1TupleRegClassIDs[] = {

RISCV::VRN2M1RegClassID, RISCV::VRN3M1RegClassID, RISCV::VRN4M1RegClassID,

RISCV::VRN5M1RegClassID, RISCV::VRN6M1RegClassID, RISCV::VRN7M1RegClassID,

RISCV::VRN8M1RegClassID};

+ static const unsigned M2TupleRegClassIDs[] = {RISCV::VRN2M2RegClassID,

+ RISCV::VRN3M2RegClassID,

+ RISCV::VRN4M2RegClassID};

- return createTupleImpl(CurDAG, Regs, RegClassIDs[NF - 2], RISCV::sub_vrm1_0);

-static SDValue createM2Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,

- unsigned NF) {

- static const unsigned RegClassIDs[] = {RISCV::VRN2M2RegClassID,

- RISCV::VRN3M2RegClassID,

- RISCV::VRN4M2RegClassID};

- return createTupleImpl(CurDAG, Regs, RegClassIDs[NF - 2], RISCV::sub_vrm2_0);

-static SDValue createM4Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,

- unsigned NF) {

- return createTupleImpl(CurDAG, Regs, RISCV::VRN2M4RegClassID,

- RISCV::sub_vrm4_0);

+ assert(Regs.size() >= 2 && Regs.size() <= 8);

-static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,

- unsigned NF, RISCVII::VLMUL LMUL) {

+ unsigned RegClassID;

+ unsigned SubReg0;

switch (LMUL) {

default:

llvm_unreachable("Invalid LMUL.");

@@ -234,12 +245,37 @@ static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,

case RISCVII::VLMUL::LMUL_F4:

case RISCVII::VLMUL::LMUL_F2:

case RISCVII::VLMUL::LMUL_1:

- return createM1Tuple(CurDAG, Regs, NF);

+ static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,

+ "Unexpected subreg numbering");

+ SubReg0 = RISCV::sub_vrm1_0;

+ RegClassID = M1TupleRegClassIDs[NF - 2];

+ break;

case RISCVII::VLMUL::LMUL_2:

- return createM2Tuple(CurDAG, Regs, NF);

+ static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,

+ "Unexpected subreg numbering");

+ SubReg0 = RISCV::sub_vrm2_0;

+ RegClassID = M2TupleRegClassIDs[NF - 2];

+ break;

case RISCVII::VLMUL::LMUL_4:

- return createM4Tuple(CurDAG, Regs, NF);

+ static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,

+ "Unexpected subreg numbering");

+ SubReg0 = RISCV::sub_vrm4_0;

+ RegClassID = RISCV::VRN2M4RegClassID;

+ break;

+ }

+ SDLoc DL(Regs[0]);

+ SmallVector<SDValue, 8> Ops;

+ Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32));

+ for (unsigned I = 0; I < Regs.size(); ++I) {

+ Ops.push_back(Regs[I]);

+ Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32));

}

+ SDNode *N =

+ CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);

+ return SDValue(N, 0);

}

void RISCVDAGToDAGISel::addVectorLoadStoreOperands(

@@ -287,6 +323,10 @@ void RISCVDAGToDAGISel::addVectorLoadStoreOperands(

Operands.push_back(Glue);

}

+static bool isAllUndef(ArrayRef<SDValue> Values) {

+ return llvm::all_of(Values, [](SDValue V) { return V->isUndef(); });

void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked,

bool IsStrided) {

SDLoc DL(Node);

@@ -297,19 +337,21 @@ void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked,

unsigned CurOp = 2;

SmallVector<SDValue, 8> Operands;

- if (IsMasked) {

- SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,

- Node->op_begin() + CurOp + NF);

- SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);

- Operands.push_back(MaskedOff);

- CurOp += NF;

+ SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,

+ Node->op_begin() + CurOp + NF);

+ bool IsTU = IsMasked || !isAllUndef(Regs);

+ if (IsTU) {

+ SDValue Merge = createTuple(*CurDAG, Regs, NF, LMUL);

+ Operands.push_back(Merge);

}

+ CurOp += NF;

addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,

Operands, /*IsLoad=*/true);

const RISCV::VLSEGPseudo *P =

- RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW,

+ RISCV::getVLSEGPseudo(NF, IsMasked, IsTU, IsStrided, /*FF*/ false, Log2SEW,

static_cast<unsigned>(LMUL));

MachineSDNode *Load =

CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);

@@ -338,25 +380,25 @@ void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) {

unsigned CurOp = 2;

SmallVector<SDValue, 7> Operands;

- if (IsMasked) {

- SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,

- Node->op_begin() + CurOp + NF);

+ SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,

+ Node->op_begin() + CurOp + NF);

+ bool IsTU = IsMasked || !isAllUndef(Regs);

+ if (IsTU) {

SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);

Operands.push_back(MaskedOff);

- CurOp += NF;

}

+ CurOp += NF;

addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,

/*IsStridedOrIndexed*/ false, Operands,

/*IsLoad=*/true);

const RISCV::VLSEGPseudo *P =

- RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true,

+ RISCV::getVLSEGPseudo(NF, IsMasked, IsTU, /*Strided*/ false, /*FF*/ true,

Log2SEW, static_cast<unsigned>(LMUL));

MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped,

- MVT::Other, MVT::Glue, Operands);

- SDNode *ReadVL = CurDAG->getMachineNode(RISCV::PseudoReadVL, DL, XLenVT,

- /*Glue*/ SDValue(Load, 2));

+ XLenVT, MVT::Other, Operands);

if (auto *MemOp = dyn_cast<MemSDNode>(Node))

CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});

@@ -368,8 +410,8 @@ void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) {

CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));

}

- ReplaceUses(SDValue(Node, NF), SDValue(ReadVL, 0)); // VL

- ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 1)); // Chain

+ ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); // VL

+ ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 2)); // Chain

CurDAG->RemoveDeadNode(Node);

}

@@ -383,13 +425,15 @@ void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked,

unsigned CurOp = 2;

SmallVector<SDValue, 8> Operands;

- if (IsMasked) {

- SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,

- Node->op_begin() + CurOp + NF);

+ SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,

+ Node->op_begin() + CurOp + NF);

+ bool IsTU = IsMasked || !isAllUndef(Regs);

+ if (IsTU) {

SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);

Operands.push_back(MaskedOff);

- CurOp += NF;

}

+ CurOp += NF;

MVT IndexVT;

addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,

@@ -406,7 +450,7 @@ void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked,

"values when XLEN=32");

}

const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(

- NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),

+ NF, IsMasked, IsTU, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),

static_cast<unsigned>(IndexLMUL));

MachineSDNode *Load =

CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);

@@ -596,32 +640,125 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {

int64_t Imm = ConstNode->getSExtValue();

// If the upper XLen-16 bits are not used, try to convert this to a simm12

// by sign extending bit 15.

- if (isUInt<16>(Imm) && isInt<12>(SignExtend64(Imm, 16)) &&

+ if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) &&

hasAllHUsers(Node))

- Imm = SignExtend64(Imm, 16);

+ Imm = SignExtend64<16>(Imm);

// If the upper 32-bits are not used try to convert this into a simm32 by

// sign extending bit 32.

if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))

- Imm = SignExtend64(Imm, 32);

+ Imm = SignExtend64<32>(Imm);

ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget));

return;

}

- case ISD::FrameIndex: {

- SDValue Imm = CurDAG->getTargetConstant(0, DL, XLenVT);

- int FI = cast<FrameIndexSDNode>(Node)->getIndex();

- SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT);

- ReplaceNode(Node, CurDAG->getMachineNode(RISCV::ADDI, DL, VT, TFI, Imm));

+ case ISD::ADD: {

+ // Try to select ADD + immediate used as memory addresses to

+ // (ADDI (ADD X, Imm-Lo12), Lo12) if it will allow the ADDI to be removed by

+ // doPeepholeLoadStoreADDI.

+ // LHS should be an immediate.

+ auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));

+ if (!N1C)

+ break;

+ int64_t Offset = N1C->getSExtValue();

+ int64_t Lo12 = SignExtend64<12>(Offset);

+ // Don't do this if the lower 12 bits are 0 or we could use ADDI directly.

+ if (Lo12 == 0 || isInt<12>(Offset))

+ break;

+ // Don't do this if we can use a pair of ADDIs.

+ if (isInt<12>(Offset / 2) && isInt<12>(Offset - Offset / 2))

+ break;

+ RISCVMatInt::InstSeq Seq =

+ RISCVMatInt::generateInstSeq(Offset, Subtarget->getFeatureBits());

+ Offset -= Lo12;

+ // Restore sign bits for RV32.

+ if (!Subtarget->is64Bit())

+ Offset = SignExtend64<32>(Offset);

+ // We can fold if the last operation is an ADDI or its an ADDIW that could

+ // be treated as an ADDI.

+ if (Seq.back().Opc != RISCV::ADDI &&

+ !(Seq.back().Opc == RISCV::ADDIW && isInt<32>(Offset)))

+ break;

+ assert(Seq.back().Imm == Lo12 && "Expected immediate to match Lo12");

+ // Drop the last operation.

+ Seq.pop_back();

+ assert(!Seq.empty() && "Expected more instructions in sequence");

+ bool AllPointerUses = true;

+ for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) {

+ SDNode *User = *UI;

+ // Is this user a memory instruction that uses a register and immediate

+ // that has this ADD as its pointer.

+ unsigned BaseOpIdx, OffsetOpIdx;

+ if (!User->isMachineOpcode() ||

+ !hasMemOffset(User, BaseOpIdx, OffsetOpIdx) ||

+ UI.getOperandNo() != BaseOpIdx) {

+ AllPointerUses = false;

+ break;

+ }

+ // If the memory instruction already has an offset, make sure the combined

+ // offset is foldable.

+ int64_t MemOffs =

+ cast<ConstantSDNode>(User->getOperand(OffsetOpIdx))->getSExtValue();

+ MemOffs += Lo12;

+ if (!isInt<12>(MemOffs)) {

+ AllPointerUses = false;

+ break;

+ }

+ if (!AllPointerUses)

+ break;

+ // Emit (ADDI (ADD X, Hi), Lo)

+ SDNode *Imm = selectImmSeq(CurDAG, DL, VT, Seq);

+ SDNode *ADD = CurDAG->getMachineNode(RISCV::ADD, DL, VT,

+ Node->getOperand(0), SDValue(Imm, 0));

+ SDNode *ADDI =

+ CurDAG->getMachineNode(RISCV::ADDI, DL, VT, SDValue(ADD, 0),

+ CurDAG->getTargetConstant(Lo12, DL, VT));

+ ReplaceNode(Node, ADDI);

return;

}

+ case ISD::SHL: {

+ auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));

+ if (!N1C)

+ break;

+ SDValue N0 = Node->getOperand(0);

+ if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||

+ !isa<ConstantSDNode>(N0.getOperand(1)))

+ break;

+ unsigned ShAmt = N1C->getZExtValue();

+ uint64_t Mask = N0.getConstantOperandVal(1);

+ // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C) where C2 has

+ // 32 leading zeros and C3 trailing zeros.

+ if (ShAmt <= 32 && isShiftedMask_64(Mask)) {

+ unsigned XLen = Subtarget->getXLen();

+ unsigned LeadingZeros = XLen - (64 - countLeadingZeros(Mask));

+ unsigned TrailingZeros = countTrailingZeros(Mask);

+ if (TrailingZeros > 0 && LeadingZeros == 32) {

+ SDNode *SRLIW = CurDAG->getMachineNode(

+ RISCV::SRLIW, DL, VT, N0->getOperand(0),

+ CurDAG->getTargetConstant(TrailingZeros, DL, VT));

+ SDNode *SLLI = CurDAG->getMachineNode(

+ RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),

+ CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT));

+ ReplaceNode(Node, SLLI);

+ return;

+ }

+ break;

+ }

case ISD::SRL: {

- // Optimize (srl (and X, C2), C) ->

- // (srli (slli X, (XLen-C3), (XLen-C3) + C)

- // Where C2 is a mask with C3 trailing ones.

- // Taking into account that the C2 may have had lower bits unset by

- // SimplifyDemandedBits. This avoids materializing the C2 immediate.

- // This pattern occurs when type legalizing right shifts for types with

- // less than XLen bits.

auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));

if (!N1C)

break;

@@ -631,6 +768,32 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {

break;

unsigned ShAmt = N1C->getZExtValue();

uint64_t Mask = N0.getConstantOperandVal(1);

+ // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has

+ // 32 leading zeros and C3 trailing zeros.

+ if (isShiftedMask_64(Mask)) {

+ unsigned XLen = Subtarget->getXLen();

+ unsigned LeadingZeros = XLen - (64 - countLeadingZeros(Mask));

+ unsigned TrailingZeros = countTrailingZeros(Mask);

+ if (LeadingZeros == 32 && TrailingZeros > ShAmt) {

+ SDNode *SRLIW = CurDAG->getMachineNode(

+ RISCV::SRLIW, DL, VT, N0->getOperand(0),

+ CurDAG->getTargetConstant(TrailingZeros, DL, VT));

+ SDNode *SLLI = CurDAG->getMachineNode(

+ RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),

+ CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT));

+ ReplaceNode(Node, SLLI);

+ return;

+ }

+ // Optimize (srl (and X, C2), C) ->

+ // (srli (slli X, (XLen-C3), (XLen-C3) + C)

+ // Where C2 is a mask with C3 trailing ones.

+ // Taking into account that the C2 may have had lower bits unset by

+ // SimplifyDemandedBits. This avoids materializing the C2 immediate.

+ // This pattern occurs when type legalizing right shifts for types with

+ // less than XLen bits.

Mask |= maskTrailingOnes<uint64_t>(ShAmt);

if (!isMask_64(Mask))

break;

@@ -700,13 +863,12 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {

uint64_t C1 = N1C->getZExtValue();

- // Keep track of whether this is a andi, zext.h, or zext.w.

- bool ZExtOrANDI = isInt<12>(N1C->getSExtValue());

- if (C1 == UINT64_C(0xFFFF) &&

- (Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbp()))

- ZExtOrANDI = true;

- if (C1 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba())

- ZExtOrANDI = true;

+ // Keep track of whether this is a c.andi. If we can't use c.andi, the

+ // shift pair might offer more compression opportunities.

+ // TODO: We could check for C extension here, but we don't have many lit

+ // tests with the C extension enabled so not checking gets better coverage.

+ // TODO: What if ANDI faster than shift?

+ bool IsCANDI = isInt<6>(N1C->getSExtValue());

// Clear irrelevant bits in the mask.

if (LeftShift)

@@ -727,9 +889,8 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {

if (C2 < C3) {

// If the number of leading zeros is C2+32 this can be SRLIW.

if (C2 + 32 == C3) {

- SDNode *SRLIW =

- CurDAG->getMachineNode(RISCV::SRLIW, DL, XLenVT, X,

- CurDAG->getTargetConstant(C2, DL, XLenVT));

+ SDNode *SRLIW = CurDAG->getMachineNode(

+ RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));

ReplaceNode(Node, SRLIW);

return;

}

@@ -739,27 +900,33 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {

// This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type

// legalized and goes through DAG combine.

- SDValue Y;

if (C2 >= 32 && (C3 - C2) == 1 && N0.hasOneUse() &&

- selectSExti32(X, Y)) {

+ X.getOpcode() == ISD::SIGN_EXTEND_INREG &&

+ cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) {

SDNode *SRAIW =

- CurDAG->getMachineNode(RISCV::SRAIW, DL, XLenVT, Y,

- CurDAG->getTargetConstant(31, DL, XLenVT));

+ CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0),

+ CurDAG->getTargetConstant(31, DL, VT));

SDNode *SRLIW = CurDAG->getMachineNode(

- RISCV::SRLIW, DL, XLenVT, SDValue(SRAIW, 0),

- CurDAG->getTargetConstant(C3 - 32, DL, XLenVT));

+ RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0),

+ CurDAG->getTargetConstant(C3 - 32, DL, VT));

ReplaceNode(Node, SRLIW);

return;

}

// (srli (slli x, c3-c2), c3).

- if (OneUseOrZExtW && !ZExtOrANDI) {

+ // Skip if we could use (zext.w (sraiw X, C2)).

+ bool Skip = Subtarget->hasStdExtZba() && C3 == 32 &&

+ X.getOpcode() == ISD::SIGN_EXTEND_INREG &&

+ cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32;

+ // Also Skip if we can use bexti.

+ Skip |= Subtarget->hasStdExtZbs() && C3 == XLen - 1;

+ if (OneUseOrZExtW && !Skip) {

SDNode *SLLI = CurDAG->getMachineNode(

- RISCV::SLLI, DL, XLenVT, X,

- CurDAG->getTargetConstant(C3 - C2, DL, XLenVT));

+ RISCV::SLLI, DL, VT, X,

+ CurDAG->getTargetConstant(C3 - C2, DL, VT));

SDNode *SRLI =

- CurDAG->getMachineNode(RISCV::SRLI, DL, XLenVT, SDValue(SLLI, 0),

- CurDAG->getTargetConstant(C3, DL, XLenVT));

+ CurDAG->getMachineNode(RISCV::SRLI, DL, VT, SDValue(SLLI, 0),

+ CurDAG->getTargetConstant(C3, DL, VT));

ReplaceNode(Node, SRLI);

return;

}

@@ -775,21 +942,20 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {

C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + C3)) << C2)) {

// Use slli.uw when possible.

if ((XLen - (C2 + C3)) == 32 && Subtarget->hasStdExtZba()) {

- SDNode *SLLI_UW =

- CurDAG->getMachineNode(RISCV::SLLI_UW, DL, XLenVT, X,

- CurDAG->getTargetConstant(C2, DL, XLenVT));

+ SDNode *SLLI_UW = CurDAG->getMachineNode(

+ RISCV::SLLI_UW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));

ReplaceNode(Node, SLLI_UW);

return;

}

// (srli (slli c2+c3), c3)

- if (OneUseOrZExtW && !ZExtOrANDI) {

+ if (OneUseOrZExtW && !IsCANDI) {

SDNode *SLLI = CurDAG->getMachineNode(

- RISCV::SLLI, DL, XLenVT, X,

- CurDAG->getTargetConstant(C2 + C3, DL, XLenVT));

+ RISCV::SLLI, DL, VT, X,

+ CurDAG->getTargetConstant(C2 + C3, DL, VT));

SDNode *SRLI =

- CurDAG->getMachineNode(RISCV::SRLI, DL, XLenVT, SDValue(SLLI, 0),

- CurDAG->getTargetConstant(C3, DL, XLenVT));

+ CurDAG->getMachineNode(RISCV::SRLI, DL, VT, SDValue(SLLI, 0),

+ CurDAG->getTargetConstant(C3, DL, VT));

ReplaceNode(Node, SRLI);

return;

}

@@ -801,25 +967,31 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {

if (!LeftShift && isShiftedMask_64(C1)) {

uint64_t Leading = XLen - (64 - countLeadingZeros(C1));

uint64_t C3 = countTrailingZeros(C1);

- if (Leading == C2 && C2 + C3 < XLen && OneUseOrZExtW && !ZExtOrANDI) {

+ if (Leading == C2 && C2 + C3 < XLen && OneUseOrZExtW && !IsCANDI) {

+ unsigned SrliOpc = RISCV::SRLI;

+ // If the input is zexti32 we should use SRLIW.

+ if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) &&

+ X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) {

+ SrliOpc = RISCV::SRLIW;

+ X = X.getOperand(0);

+ }

SDNode *SRLI = CurDAG->getMachineNode(

- RISCV::SRLI, DL, XLenVT, X,

- CurDAG->getTargetConstant(C2 + C3, DL, XLenVT));

+ SrliOpc, DL, VT, X, CurDAG->getTargetConstant(C2 + C3, DL, VT));

SDNode *SLLI =

- CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLI, 0),

- CurDAG->getTargetConstant(C3, DL, XLenVT));

+ CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLI, 0),

+ CurDAG->getTargetConstant(C3, DL, VT));

ReplaceNode(Node, SLLI);

return;

}

// If the leading zero count is C2+32, we can use SRLIW instead of SRLI.

if (Leading > 32 && (Leading - 32) == C2 && C2 + C3 < 32 &&

- OneUseOrZExtW && !ZExtOrANDI) {

- SDNode *SRLIW = CurDAG->getMachineNode(

- RISCV::SRLIW, DL, XLenVT, X,

- CurDAG->getTargetConstant(C2 + C3, DL, XLenVT));

+ OneUseOrZExtW && !IsCANDI) {

+ SDNode *SRLIW =

+ CurDAG->getMachineNode(RISCV::SRLIW, DL, VT, X,

+ CurDAG->getTargetConstant(C2 + C3, DL, VT));

SDNode *SLLI =

- CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLIW, 0),

- CurDAG->getTargetConstant(C3, DL, XLenVT));

+ CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),

+ CurDAG->getTargetConstant(C3, DL, VT));

ReplaceNode(Node, SLLI);

return;

}

@@ -830,24 +1002,23 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {

if (LeftShift && isShiftedMask_64(C1)) {

uint64_t Leading = XLen - (64 - countLeadingZeros(C1));

uint64_t C3 = countTrailingZeros(C1);

- if (Leading == 0 && C2 < C3 && OneUseOrZExtW && !ZExtOrANDI) {

+ if (Leading == 0 && C2 < C3 && OneUseOrZExtW && !IsCANDI) {

SDNode *SRLI = CurDAG->getMachineNode(

- RISCV::SRLI, DL, XLenVT, X,

- CurDAG->getTargetConstant(C3 - C2, DL, XLenVT));

+ RISCV::SRLI, DL, VT, X, CurDAG->getTargetConstant(C3 - C2, DL, VT));

SDNode *SLLI =

- CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLI, 0),

- CurDAG->getTargetConstant(C3, DL, XLenVT));

+ CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLI, 0),

+ CurDAG->getTargetConstant(C3, DL, VT));

ReplaceNode(Node, SLLI);

return;

}

// If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.

- if (C2 < C3 && Leading + C2 == 32 && OneUseOrZExtW && !ZExtOrANDI) {

- SDNode *SRLIW = CurDAG->getMachineNode(

- RISCV::SRLIW, DL, XLenVT, X,

- CurDAG->getTargetConstant(C3 - C2, DL, XLenVT));

+ if (C2 < C3 && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {

+ SDNode *SRLIW =

+ CurDAG->getMachineNode(RISCV::SRLIW, DL, VT, X,

+ CurDAG->getTargetConstant(C3 - C2, DL, VT));

SDNode *SLLI =

- CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLIW, 0),

- CurDAG->getTargetConstant(C3, DL, XLenVT));

+ CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),

+ CurDAG->getTargetConstant(C3, DL, VT));

ReplaceNode(Node, SLLI);

return;

}

@@ -908,7 +1079,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {

uint64_t ShiftedC1 = C1 << ConstantShift;

// If this RV32, we need to sign extend the constant.

if (XLen == 32)

- ShiftedC1 = SignExtend64(ShiftedC1, 32);

+ ShiftedC1 = SignExtend64<32>(ShiftedC1);

// Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).

SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget);

@@ -1005,45 +1176,44 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {

}

MVT Src1VT = Src1.getSimpleValueType();

unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,

- VMSetOpcode, VMANDOpcode;

+ VMOROpcode;

switch (RISCVTargetLowering::getLMUL(Src1VT)) {

default:

llvm_unreachable("Unexpected LMUL!");

-#define CASE_VMSLT_VMSET_OPCODES(lmulenum, suffix, suffix_b) \

+#define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b) \

case RISCVII::VLMUL::lmulenum: \

VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \

: RISCV::PseudoVMSLT_VX_##suffix; \

VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \

: RISCV::PseudoVMSLT_VX_##suffix##_MASK; \

- VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b; \

break;

- CASE_VMSLT_VMSET_OPCODES(LMUL_F8, MF8, B1)

- CASE_VMSLT_VMSET_OPCODES(LMUL_F4, MF4, B2)

- CASE_VMSLT_VMSET_OPCODES(LMUL_F2, MF2, B4)

- CASE_VMSLT_VMSET_OPCODES(LMUL_1, M1, B8)

- CASE_VMSLT_VMSET_OPCODES(LMUL_2, M2, B16)

- CASE_VMSLT_VMSET_OPCODES(LMUL_4, M4, B32)

- CASE_VMSLT_VMSET_OPCODES(LMUL_8, M8, B64)

-#undef CASE_VMSLT_VMSET_OPCODES

+ CASE_VMSLT_OPCODES(LMUL_F8, MF8, B1)

+ CASE_VMSLT_OPCODES(LMUL_F4, MF4, B2)

+ CASE_VMSLT_OPCODES(LMUL_F2, MF2, B4)

+ CASE_VMSLT_OPCODES(LMUL_1, M1, B8)

+ CASE_VMSLT_OPCODES(LMUL_2, M2, B16)

+ CASE_VMSLT_OPCODES(LMUL_4, M4, B32)

+ CASE_VMSLT_OPCODES(LMUL_8, M8, B64)

+#undef CASE_VMSLT_OPCODES

}

// Mask operations use the LMUL from the mask type.

switch (RISCVTargetLowering::getLMUL(VT)) {

default:

llvm_unreachable("Unexpected LMUL!");

-#define CASE_VMXOR_VMANDN_VMAND_OPCODES(lmulenum, suffix) \

+#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \

case RISCVII::VLMUL::lmulenum: \

VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \

VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \

- VMANDOpcode = RISCV::PseudoVMAND_MM_##suffix; \

+ VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \

break;

- CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_F8, MF8)

- CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_F4, MF4)

- CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_F2, MF2)

- CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_1, M1)

- CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_2, M2)

- CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_4, M4)

- CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_8, M8)

-#undef CASE_VMXOR_VMANDN_VMAND_OPCODES

+ CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, MF8)

+ CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, MF4)

+ CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, MF2)

+ CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_1, M1)

+ CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_2, M2)

+ CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_4, M4)

+ CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_8, M8)

+#undef CASE_VMXOR_VMANDN_VMOR_OPCODES

}

SDValue SEW = CurDAG->getTargetConstant(

Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);

@@ -1053,12 +1223,17 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {

SDValue MaskedOff = Node->getOperand(1);

SDValue Mask = Node->getOperand(4);

- // If vmsgeu_mask with 0 immediate, expand it to {vmset, vmand}.

+ // If vmsgeu_mask with 0 immediate, expand it to vmor mask, maskedoff.

if (IsCmpUnsignedZero) {

- SDValue VMSet =

- SDValue(CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW), 0);

- ReplaceNode(Node, CurDAG->getMachineNode(VMANDOpcode, DL, VT,

- {Mask, VMSet, VL, MaskSEW}));

+ // We don't need vmor if the MaskedOff and the Mask are the same

+ // value.

+ if (Mask == MaskedOff) {

+ ReplaceUses(Node, Mask.getNode());

+ return;

+ }

+ ReplaceNode(Node,

+ CurDAG->getMachineNode(VMOROpcode, DL, VT,

+ {Mask, MaskedOff, VL, MaskSEW}));

return;

}

@@ -1082,10 +1257,14 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {

// Otherwise use

// vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0

+ // The result is mask undisturbed.

+ // We use the same instructions to emulate mask agnostic behavior, because

+ // the agnostic result can be either undisturbed or all 1.

SDValue Cmp = SDValue(

CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT,

{MaskedOff, Src1, Src2, V0, VL, SEW, Glue}),

0);

+ // vmxor.mm vd, vd, v0 is used to update active value.

ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT,

{Cmp, Mask, VL, MaskSEW}));

return;

@@ -1215,7 +1394,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {

unsigned CurOp = 2;

// Masked intrinsic only have TU version pseduo instructions.

- bool IsTU = IsMasked || (!IsMasked && !Node->getOperand(CurOp).isUndef());

+ bool IsTU = IsMasked || !Node->getOperand(CurOp).isUndef();

SmallVector<SDValue, 8> Operands;

if (IsTU)

Operands.push_back(Node->getOperand(CurOp++));

@@ -1267,9 +1446,8 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {

// The riscv_vlm intrinsic are always tail agnostic and no passthru operand.

bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;

// Masked intrinsic only have TU version pseduo instructions.

- bool IsTU =

- HasPassthruOperand &&

- ((!IsMasked && !Node->getOperand(CurOp).isUndef()) || IsMasked);

+ bool IsTU = HasPassthruOperand &&

+ (IsMasked || !Node->getOperand(CurOp).isUndef());

SmallVector<SDValue, 8> Operands;

if (IsTU)

Operands.push_back(Node->getOperand(CurOp++));

@@ -1302,7 +1480,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {

unsigned CurOp = 2;

// Masked intrinsic only have TU version pseduo instructions.

- bool IsTU = IsMasked || (!IsMasked && !Node->getOperand(CurOp).isUndef());

+ bool IsTU = IsMasked || !Node->getOperand(CurOp).isUndef();

SmallVector<SDValue, 7> Operands;

if (IsTU)

Operands.push_back(Node->getOperand(CurOp++));

@@ -1318,19 +1496,12 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {

const RISCV::VLEPseudo *P =

RISCV::getVLEPseudo(IsMasked, IsTU, /*Strided*/ false, /*FF*/ true,

Log2SEW, static_cast<unsigned>(LMUL));

- MachineSDNode *Load =

- CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0),

- MVT::Other, MVT::Glue, Operands);

- SDNode *ReadVL = CurDAG->getMachineNode(RISCV::PseudoReadVL, DL, XLenVT,

- /*Glue*/ SDValue(Load, 2));

+ MachineSDNode *Load = CurDAG->getMachineNode(

+ P->Pseudo, DL, Node->getVTList(), Operands);

if (auto *MemOp = dyn_cast<MemSDNode>(Node))

CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});

- ReplaceUses(SDValue(Node, 0), SDValue(Load, 0));

- ReplaceUses(SDValue(Node, 1), SDValue(ReadVL, 0)); // VL

- ReplaceUses(SDValue(Node, 2), SDValue(Load, 1)); // Chain

- CurDAG->RemoveDeadNode(Node);

+ ReplaceNode(Node, Load);

return;

}

@@ -1610,9 +1781,10 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {

// Try to match splat of a scalar load to a strided load with stride of x0.

bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||

Node->getOpcode() == RISCVISD::VFMV_S_F_VL;

- if (IsScalarMove && !Node->getOperand(0).isUndef())

+ bool HasPassthruOperand = Node->getOpcode() != ISD::SPLAT_VECTOR;

+ if (HasPassthruOperand && !Node->getOperand(0).isUndef())

break;

- SDValue Src = IsScalarMove ? Node->getOperand(1) : Node->getOperand(0);

+ SDValue Src = HasPassthruOperand ? Node->getOperand(1) : Node->getOperand(0);

auto *Ld = dyn_cast<LoadSDNode>(Src);

if (!Ld)

break;

@@ -1634,7 +1806,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {

break;

selectVLOp(Node->getOperand(2), VL);

} else

- selectVLOp(Node->getOperand(1), VL);

+ selectVLOp(Node->getOperand(2), VL);

unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());

SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);

@@ -1650,8 +1822,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {

MachineSDNode *Load =

CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);

- if (auto *MemOp = dyn_cast<MemSDNode>(Node))

- CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});

+ CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()});

ReplaceNode(Node, Load);

return;

@@ -1680,11 +1851,37 @@ bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand(

return true;

}

-bool RISCVDAGToDAGISel::SelectAddrFI(SDValue Addr, SDValue &Base) {

+bool RISCVDAGToDAGISel::SelectAddrFrameIndex(SDValue Addr, SDValue &Base,

+ SDValue &Offset) {

if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {

Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());

+ Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT());

return true;

}

+ return false;

+// Select a frame index and an optional immediate offset from an ADD or OR.

+bool RISCVDAGToDAGISel::SelectFrameAddrRegImm(SDValue Addr, SDValue &Base,

+ SDValue &Offset) {

+ if (SelectAddrFrameIndex(Addr, Base, Offset))

+ return true;

+ if (!CurDAG->isBaseWithConstantOffset(Addr))

+ return false;

+ if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) {

+ int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();

+ if (isInt<12>(CVal)) {

+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(),

+ Subtarget->getXLenVT());

+ Offset = CurDAG->getTargetConstant(CVal, SDLoc(Addr),

+ Subtarget->getXLenVT());

+ return true;

+ }

return false;

}

@@ -1698,6 +1895,76 @@ bool RISCVDAGToDAGISel::SelectBaseAddr(SDValue Addr, SDValue &Base) {

return true;

}

+bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,

+ SDValue &Offset) {

+ if (SelectAddrFrameIndex(Addr, Base, Offset))

+ return true;

+ SDLoc DL(Addr);

+ MVT VT = Addr.getSimpleValueType();

+ if (Addr.getOpcode() == RISCVISD::ADD_LO) {

+ Base = Addr.getOperand(0);

+ Offset = Addr.getOperand(1);

+ return true;

+ }

+ if (CurDAG->isBaseWithConstantOffset(Addr)) {

+ int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();

+ if (isInt<12>(CVal)) {

+ Base = Addr.getOperand(0);

+ if (Base.getOpcode() == RISCVISD::ADD_LO) {

+ SDValue LoOperand = Base.getOperand(1);

+ if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) {

+ // If the Lo in (ADD_LO hi, lo) is a global variable's address

+ // (its low part, really), then we can rely on the alignment of that

+ // variable to provide a margin of safety before low part can overflow

+ // the 12 bits of the load/store offset. Check if CVal falls within

+ // that margin; if so (low part + CVal) can't overflow.

+ const DataLayout &DL = CurDAG->getDataLayout();

+ Align Alignment = commonAlignment(

+ GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());

+ if (CVal == 0 || Alignment > CVal) {

+ int64_t CombinedOffset = CVal + GA->getOffset();

+ Base = Base.getOperand(0);

+ Offset = CurDAG->getTargetGlobalAddress(

+ GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(),

+ CombinedOffset, GA->getTargetFlags());

+ return true;

+ }

+ if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))

+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);

+ Offset = CurDAG->getTargetConstant(CVal, DL, VT);

+ return true;

+ }

+ // Handle ADD with large immediates.

+ if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {

+ int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();

+ assert(!isInt<12>(CVal) && "simm12 not already handled?");

+ if (isInt<12>(CVal / 2) && isInt<12>(CVal - CVal / 2)) {

+ // We can use an ADDI for part of the offset and fold the rest into the

+ // load/store. This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.

+ int64_t Adj = CVal < 0 ? -2048 : 2047;

+ Base = SDValue(

+ CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0),

+ CurDAG->getTargetConstant(Adj, DL, VT)),

+ 0);

+ Offset = CurDAG->getTargetConstant(CVal - Adj, DL, VT);

+ return true;

+ }

+ Base = Addr;

+ Offset = CurDAG->getTargetConstant(0, DL, VT);

+ return true;

bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,

SDValue &ShAmt) {

// Shift instructions on RISCV only read the lower 5 or 6 bits of the shift

@@ -1723,6 +1990,21 @@ bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,

ShAmt = N.getOperand(0);

return true;

}

+ } else if (N.getOpcode() == ISD::SUB &&

+ isa<ConstantSDNode>(N.getOperand(0))) {

+ uint64_t Imm = N.getConstantOperandVal(0);

+ // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to

+ // generate a NEG instead of a SUB of a constant.

+ if (Imm != 0 && Imm % ShiftWidth == 0) {

+ SDLoc DL(N);

+ EVT VT = N.getValueType();

+ SDValue Zero = CurDAG->getRegister(RISCV::X0, VT);

+ unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;

+ MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero,

+ N.getOperand(1));

+ ShAmt = SDValue(Neg, 0);

+ return true;

+ }

}

ShAmt = N;

@@ -1778,6 +2060,8 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const {

Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||

Node->getOpcode() == ISD::SRL ||

Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||

+ Node->getOpcode() == RISCVISD::GREV ||

+ Node->getOpcode() == RISCVISD::GORC ||

isa<ConstantSDNode>(Node)) &&

"Unexpected opcode");

@@ -1812,6 +2096,7 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const {

case RISCV::CTZW:

case RISCV::CPOPW:

case RISCV::SLLI_UW:

+ case RISCV::FMV_W_X:

case RISCV::FCVT_H_W:

case RISCV::FCVT_H_WU:

case RISCV::FCVT_S_W:

@@ -1835,6 +2120,7 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const {

return false;

break;

case RISCV::SEXT_H:

+ case RISCV::FMV_H_X:

case RISCV::ZEXT_H_RV32:

case RISCV::ZEXT_H_RV64:

if (Bits < 16)

@@ -1871,22 +2157,32 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const {

// allows us to choose betwen VSETIVLI or VSETVLI later.

bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) {

auto *C = dyn_cast<ConstantSDNode>(N);

- if (C && (isUInt<5>(C->getZExtValue()) ||

- C->getSExtValue() == RISCV::VLMaxSentinel))

+ if (C && isUInt<5>(C->getZExtValue())) {

VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N),

N->getValueType(0));

- else

+ } else if (C && C->isAllOnesValue()) {

+ // Treat all ones as VLMax.

+ VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),

+ N->getValueType(0));

+ } else if (isa<RegisterSDNode>(N) &&

+ cast<RegisterSDNode>(N)->getReg() == RISCV::X0) {

+ // All our VL operands use an operand that allows GPRNoX0 or an immediate

+ // as the register class. Convert X0 to a special immediate to pass the

+ // MachineVerifier. This is recognized specially by the vsetvli insertion

+ // pass.

+ VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),

+ N->getValueType(0));

+ } else {

VL = N;

+ }

return true;

}

bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) {

- if (N.getOpcode() != ISD::SPLAT_VECTOR &&

- N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64 &&

- N.getOpcode() != RISCVISD::VMV_V_X_VL)

+ if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef())

return false;

- SplatVal = N.getOperand(0);

+ SplatVal = N.getOperand(1);

return true;

}

@@ -1896,23 +2192,22 @@ static bool selectVSplatSimmHelper(SDValue N, SDValue &SplatVal,

SelectionDAG &DAG,

const RISCVSubtarget &Subtarget,

ValidateFn ValidateImm) {

- if ((N.getOpcode() != ISD::SPLAT_VECTOR &&

- N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64 &&

- N.getOpcode() != RISCVISD::VMV_V_X_VL) ||

- !isa<ConstantSDNode>(N.getOperand(0)))

+ if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() ||

+ !isa<ConstantSDNode>(N.getOperand(1)))

return false;

- int64_t SplatImm = cast<ConstantSDNode>(N.getOperand(0))->getSExtValue();

+ int64_t SplatImm =

+ cast<ConstantSDNode>(N.getOperand(1))->getSExtValue();

- // ISD::SPLAT_VECTOR, RISCVISD::SPLAT_VECTOR_I64 and RISCVISD::VMV_V_X_VL

- // share semantics when the operand type is wider than the resulting vector

- // element type: an implicit truncation first takes place. Therefore, perform

- // a manual truncation/sign-extension in order to ignore any truncated bits

- // and catch any zero-extended immediate.

+ // The semantics of RISCVISD::VMV_V_X_VL is that when the operand

+ // type is wider than the resulting vector element type: an implicit

+ // truncation first takes place. Therefore, perform a manual

+ // truncation/sign-extension in order to ignore any truncated bits and catch

+ // any zero-extended immediate.

// For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first

// sign-extending to (XLenVT -1).

MVT XLenVT = Subtarget.getXLenVT();

- assert(XLenVT == N.getOperand(0).getSimpleValueType() &&

+ assert(XLenVT == N.getOperand(1).getSimpleValueType() &&

"Unexpected splat operand type");

MVT EltVT = N.getSimpleValueType().getVectorElementType();

if (EltVT.bitsLT(XLenVT))

@@ -1945,13 +2240,12 @@ bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N,

}

bool RISCVDAGToDAGISel::selectVSplatUimm5(SDValue N, SDValue &SplatVal) {

- if ((N.getOpcode() != ISD::SPLAT_VECTOR &&

- N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64 &&

- N.getOpcode() != RISCVISD::VMV_V_X_VL) ||

- !isa<ConstantSDNode>(N.getOperand(0)))

+ if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() ||

+ !isa<ConstantSDNode>(N.getOperand(1)))

return false;

- int64_t SplatImm = cast<ConstantSDNode>(N.getOperand(0))->getSExtValue();

+ int64_t SplatImm =

+ cast<ConstantSDNode>(N.getOperand(1))->getSExtValue();

if (!isUInt<5>(SplatImm))

return false;

@@ -1980,49 +2274,42 @@ bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width,

// Merge an ADDI into the offset of a load/store instruction where possible.

// (load (addi base, off1), off2) -> (load base, off1+off2)

// (store val, (addi base, off1), off2) -> (store val, base, off1+off2)

+// (load (add base, (addi src, off1)), off2)

+// -> (load (add base, src), off1+off2)

+// (store val, (add base, (addi src, off1)), off2)

+// -> (store val, (add base, src), off1+off2)

// This is possible when off1+off2 fits a 12-bit immediate.

bool RISCVDAGToDAGISel::doPeepholeLoadStoreADDI(SDNode *N) {

- int OffsetOpIdx;

- int BaseOpIdx;

- // Only attempt this optimisation for I-type loads and S-type stores.

- switch (N->getMachineOpcode()) {

- default:

+ unsigned OffsetOpIdx, BaseOpIdx;

+ if (!hasMemOffset(N, BaseOpIdx, OffsetOpIdx))

return false;

- case RISCV::LB:

- case RISCV::LH:

- case RISCV::LW:

- case RISCV::LBU:

- case RISCV::LHU:

- case RISCV::LWU:

- case RISCV::LD:

- case RISCV::FLH:

- case RISCV::FLW:

- case RISCV::FLD:

- BaseOpIdx = 0;

- OffsetOpIdx = 1;

- break;

- case RISCV::SB:

- case RISCV::SH:

- case RISCV::SW:

- case RISCV::SD:

- case RISCV::FSH:

- case RISCV::FSW:

- case RISCV::FSD:

- BaseOpIdx = 1;

- OffsetOpIdx = 2;

- break;

- }

if (!isa<ConstantSDNode>(N->getOperand(OffsetOpIdx)))

return false;

SDValue Base = N->getOperand(BaseOpIdx);

- // If the base is an ADDI, we can merge it in to the load/store.

- if (!Base.isMachineOpcode() || Base.getMachineOpcode() != RISCV::ADDI)

+ if (!Base.isMachineOpcode())

return false;

+ if (Base.getMachineOpcode() == RISCV::ADDI) {

+ // If the base is an ADDI, we can merge it in to the load/store.

+ } else if (Base.getMachineOpcode() == RISCV::ADDIW &&

+ isa<ConstantSDNode>(Base.getOperand(1)) &&

+ Base.getOperand(0).isMachineOpcode() &&

+ Base.getOperand(0).getMachineOpcode() == RISCV::LUI &&

+ isa<ConstantSDNode>(Base.getOperand(0).getOperand(0))) {

+ // ADDIW can be merged if it's part of LUI+ADDIW constant materialization

+ // and LUI+ADDI would have produced the same result. This is true for all

+ // simm32 values except 0x7ffff800-0x7fffffff.

+ int64_t Offset =

+ SignExtend64<32>(Base.getOperand(0).getConstantOperandVal(0) << 12);

+ Offset += cast<ConstantSDNode>(Base.getOperand(1))->getSExtValue();

+ if (!isInt<32>(Offset))

+ return false;

+ } else

+ return false;

SDValue ImmOperand = Base.getOperand(1);

uint64_t Offset2 = N->getConstantOperandVal(OffsetOpIdx);

@@ -2039,7 +2326,8 @@ bool RISCVDAGToDAGISel::doPeepholeLoadStoreADDI(SDNode *N) {

// to provide a margin of safety before off1 can overflow the 12 bits.

// Check if off2 falls within that margin; if so off1+off2 can't overflow.

const DataLayout &DL = CurDAG->getDataLayout();

- Align Alignment = GA->getGlobal()->getPointerAlignment(DL);

+ Align Alignment = commonAlignment(GA->getGlobal()->getPointerAlignment(DL),

+ GA->getOffset());

if (Offset2 != 0 && Alignment <= Offset2)

return false;

int64_t Offset1 = GA->getOffset();

@@ -2049,7 +2337,7 @@ bool RISCVDAGToDAGISel::doPeepholeLoadStoreADDI(SDNode *N) {

CombinedOffset, GA->getTargetFlags());

} else if (auto *CP = dyn_cast<ConstantPoolSDNode>(ImmOperand)) {

// Ditto.

- Align Alignment = CP->getAlign();

+ Align Alignment = commonAlignment(CP->getAlign(), CP->getOffset());

if (Offset2 != 0 && Alignment <= Offset2)

return false;

int64_t Offset1 = CP->getOffset();

@@ -2068,12 +2356,13 @@ bool RISCVDAGToDAGISel::doPeepholeLoadStoreADDI(SDNode *N) {

LLVM_DEBUG(dbgs() << "\n");

// Modify the offset operand of the load/store.

- if (BaseOpIdx == 0) // Load

- CurDAG->UpdateNodeOperands(N, Base.getOperand(0), ImmOperand,

- N->getOperand(2));

- else // Store

- CurDAG->UpdateNodeOperands(N, N->getOperand(0), Base.getOperand(0),

- ImmOperand, N->getOperand(3));

+ if (BaseOpIdx == 0) { // Load

+ N = CurDAG->UpdateNodeOperands(N, Base.getOperand(0), ImmOperand,

+ N->getOperand(2));

+ } else { // Store

+ N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), Base.getOperand(0),

+ ImmOperand, N->getOperand(3));

+ }

return true;

}

@@ -2130,6 +2419,8 @@ bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {

case RISCV::SUBW:

case RISCV::MULW:

case RISCV::SLLIW:

+ case RISCV::GREVIW:

+ case RISCV::GORCIW:

// Result is already sign extended just remove the sext.w.

// NOTE: We only handle the nodes that are selected with hasAllWUsers.

ReplaceUses(N, N0.getNode());

@@ -2139,8 +2430,113 @@ bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {

return false;

}

+// Optimize masked RVV pseudo instructions with a known all-ones mask to their

+// corresponding "unmasked" pseudo versions. The mask we're interested in will

+// take the form of a V0 physical register operand, with a glued

+// register-setting instruction.

+bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(SDNode *N) {

+ const RISCV::RISCVMaskedPseudoInfo *I =

+ RISCV::getMaskedPseudoInfo(N->getMachineOpcode());

+ if (!I)

+ return false;

+ unsigned MaskOpIdx = I->MaskOpIdx;

+ // Check that we're using V0 as a mask register.

+ if (!isa<RegisterSDNode>(N->getOperand(MaskOpIdx)) ||

+ cast<RegisterSDNode>(N->getOperand(MaskOpIdx))->getReg() != RISCV::V0)

+ return false;

+ // The glued user defines V0.

+ const auto *Glued = N->getGluedNode();

+ if (!Glued || Glued->getOpcode() != ISD::CopyToReg)

+ return false;

+ // Check that we're defining V0 as a mask register.

+ if (!isa<RegisterSDNode>(Glued->getOperand(1)) ||

+ cast<RegisterSDNode>(Glued->getOperand(1))->getReg() != RISCV::V0)

+ return false;

+ // Check the instruction defining V0; it needs to be a VMSET pseudo.

+ SDValue MaskSetter = Glued->getOperand(2);

+ const auto IsVMSet = [](unsigned Opc) {

+ return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||

+ Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||

+ Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||

+ Opc == RISCV::PseudoVMSET_M_B8;

+ };

+ // TODO: Check that the VMSET is the expected bitwidth? The pseudo has

+ // undefined behaviour if it's the wrong bitwidth, so we could choose to

+ // assume that it's all-ones? Same applies to its VL.

+ if (!MaskSetter->isMachineOpcode() || !IsVMSet(MaskSetter.getMachineOpcode()))

+ return false;

+ // Retrieve the tail policy operand index, if any.

+ Optional<unsigned> TailPolicyOpIdx;

+ const RISCVInstrInfo &TII = *Subtarget->getInstrInfo();

+ const MCInstrDesc &MaskedMCID = TII.get(N->getMachineOpcode());

+ bool IsTA = true;

+ if (RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags)) {

+ // The last operand of the pseudo is the policy op, but we might have a

+ // Glue operand last. We might also have a chain.

+ TailPolicyOpIdx = N->getNumOperands() - 1;

+ if (N->getOperand(*TailPolicyOpIdx).getValueType() == MVT::Glue)

+ (*TailPolicyOpIdx)--;

+ if (N->getOperand(*TailPolicyOpIdx).getValueType() == MVT::Other)

+ (*TailPolicyOpIdx)--;

+ if (!(N->getConstantOperandVal(*TailPolicyOpIdx) &

+ RISCVII::TAIL_AGNOSTIC)) {

+ // Keep the true-masked instruction when there is no unmasked TU

+ // instruction

+ if (I->UnmaskedTUPseudo == I->MaskedPseudo && !N->getOperand(0).isUndef())

+ return false;

+ // We can't use TA if the tie-operand is not IMPLICIT_DEF

+ if (!N->getOperand(0).isUndef())

+ IsTA = false;

+ }

+ unsigned Opc = IsTA ? I->UnmaskedPseudo : I->UnmaskedTUPseudo;

+ // Check that we're dropping the mask operand and any policy operand

+ // when we transform to this unmasked pseudo. Additionally, if this insturtion

+ // is tail agnostic, the unmasked instruction should not have a merge op.

+ uint64_t TSFlags = TII.get(Opc).TSFlags;

+ assert((IsTA != RISCVII::hasMergeOp(TSFlags)) &&

+ RISCVII::hasDummyMaskOp(TSFlags) &&

+ !RISCVII::hasVecPolicyOp(TSFlags) &&

+ "Unexpected pseudo to transform to");

+ (void)TSFlags;

+ SmallVector<SDValue, 8> Ops;

+ // Skip the merge operand at index 0 if IsTA

+ for (unsigned I = IsTA, E = N->getNumOperands(); I != E; I++) {

+ // Skip the mask, the policy, and the Glue.

+ SDValue Op = N->getOperand(I);

+ if (I == MaskOpIdx || I == TailPolicyOpIdx ||

+ Op.getValueType() == MVT::Glue)

+ continue;

+ Ops.push_back(Op);

+ }

+ // Transitively apply any node glued to our new node.

+ if (auto *TGlued = Glued->getGluedNode())

+ Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1));

+ SDNode *Result = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);

+ ReplaceUses(N, Result);

+ return true;

// This pass converts a legalized DAG into a RISCV-specific DAG, ready

// for instruction scheduling.

-FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM) {

- return new RISCVDAGToDAGISel(TM);

+FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM,

+ CodeGenOpt::Level OptLevel) {

+ return new RISCVDAGToDAGISel(TM, OptLevel);

}