aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2022-07-03 14:10:23 +0000
committerDimitry Andric <dim@FreeBSD.org>2022-07-03 14:10:23 +0000
commit145449b1e420787bb99721a429341fa6be3adfb6 (patch)
tree1d56ae694a6de602e348dd80165cf881a36600ed /llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
parentecbca9f5fb7d7613d2b94982c4825eb0d33d6842 (diff)
Diffstat (limited to 'llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp')
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp970
1 files changed, 683 insertions, 287 deletions
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 6f77428ae721..cfaafc7b53d2 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -37,6 +37,7 @@ namespace RISCV {
#define GET_RISCVVSETable_IMPL
#define GET_RISCVVLXTable_IMPL
#define GET_RISCVVSXTable_IMPL
+#define GET_RISCVMaskedPseudosTable_IMPL
#include "RISCVGenSearchableTables.inc"
} // namespace RISCV
} // namespace llvm
@@ -47,17 +48,36 @@ void RISCVDAGToDAGISel::PreprocessISelDAG() {
I != E;) {
SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
+ // Convert integer SPLAT_VECTOR to VMV_V_X_VL and floating-point
+ // SPLAT_VECTOR to VFMV_V_F_VL to reduce isel burden.
+ if (N->getOpcode() == ISD::SPLAT_VECTOR) {
+ MVT VT = N->getSimpleValueType(0);
+ unsigned Opc =
+ VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL;
+ SDLoc DL(N);
+ SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
+ SDValue Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT),
+ N->getOperand(0), VL);
+
+ --I;
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
+ ++I;
+ CurDAG->DeleteNode(N);
+ continue;
+ }
+
// Lower SPLAT_VECTOR_SPLIT_I64 to two scalar stores and a stride 0 vector
// load. Done after lowering and combining so that we have a chance to
// optimize this to VMV_V_X_VL when the upper bits aren't needed.
if (N->getOpcode() != RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL)
continue;
- assert(N->getNumOperands() == 3 && "Unexpected number of operands");
+ assert(N->getNumOperands() == 4 && "Unexpected number of operands");
MVT VT = N->getSimpleValueType(0);
- SDValue Lo = N->getOperand(0);
- SDValue Hi = N->getOperand(1);
- SDValue VL = N->getOperand(2);
+ SDValue Passthru = N->getOperand(0);
+ SDValue Lo = N->getOperand(1);
+ SDValue Hi = N->getOperand(2);
+ SDValue VL = N->getOperand(3);
assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
"Unexpected VTs!");
@@ -88,7 +108,7 @@ void RISCVDAGToDAGISel::PreprocessISelDAG() {
CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
SDValue Ops[] = {Chain,
IntID,
- CurDAG->getUNDEF(VT),
+ Passthru,
StackSlot,
CurDAG->getRegister(RISCV::X0, MVT::i64),
VL};
@@ -112,6 +132,7 @@ void RISCVDAGToDAGISel::PreprocessISelDAG() {
}
void RISCVDAGToDAGISel::PostprocessISelDAG() {
+ HandleSDNode Dummy(CurDAG->getRoot());
SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
bool MadeChange = false;
@@ -123,57 +144,70 @@ void RISCVDAGToDAGISel::PostprocessISelDAG() {
MadeChange |= doPeepholeSExtW(N);
MadeChange |= doPeepholeLoadStoreADDI(N);
+ MadeChange |= doPeepholeMaskedRVV(N);
}
+ CurDAG->setRoot(Dummy.getValue());
+
if (MadeChange)
CurDAG->RemoveDeadNodes();
}
-static SDNode *selectImmWithConstantPool(SelectionDAG *CurDAG, const SDLoc &DL,
- const MVT VT, int64_t Imm,
- const RISCVSubtarget &Subtarget) {
- assert(VT == MVT::i64 && "Expecting MVT::i64");
- const RISCVTargetLowering *TLI = Subtarget.getTargetLowering();
- ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(CurDAG->getConstantPool(
- ConstantInt::get(EVT(VT).getTypeForEVT(*CurDAG->getContext()), Imm), VT));
- SDValue Addr = TLI->getAddr(CP, *CurDAG);
- SDValue Offset = CurDAG->getTargetConstant(0, DL, VT);
- // Since there is no data race, the chain can be the entry node.
- SDNode *Load = CurDAG->getMachineNode(RISCV::LD, DL, VT, Addr, Offset,
- CurDAG->getEntryNode());
- MachineFunction &MF = CurDAG->getMachineFunction();
- MachineMemOperand *MemOp = MF.getMachineMemOperand(
- MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad,
- LLT(VT), CP->getAlign());
- CurDAG->setNodeMemRefs(cast<MachineSDNode>(Load), {MemOp});
- return Load;
-}
-
-static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
- int64_t Imm, const RISCVSubtarget &Subtarget) {
- MVT XLenVT = Subtarget.getXLenVT();
- RISCVMatInt::InstSeq Seq =
- RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits());
+// Returns true if N is a MachineSDNode that has a reg and simm12 memory
+// operand. The indices of the base pointer and offset are returned in BaseOpIdx
+// and OffsetOpIdx.
+static bool hasMemOffset(SDNode *N, unsigned &BaseOpIdx,
+ unsigned &OffsetOpIdx) {
+ switch (N->getMachineOpcode()) {
+ case RISCV::LB:
+ case RISCV::LH:
+ case RISCV::LW:
+ case RISCV::LBU:
+ case RISCV::LHU:
+ case RISCV::LWU:
+ case RISCV::LD:
+ case RISCV::FLH:
+ case RISCV::FLW:
+ case RISCV::FLD:
+ BaseOpIdx = 0;
+ OffsetOpIdx = 1;
+ return true;
+ case RISCV::SB:
+ case RISCV::SH:
+ case RISCV::SW:
+ case RISCV::SD:
+ case RISCV::FSH:
+ case RISCV::FSW:
+ case RISCV::FSD:
+ BaseOpIdx = 1;
+ OffsetOpIdx = 2;
+ return true;
+ }
- // If Imm is expensive to build, then we put it into constant pool.
- if (Subtarget.useConstantPoolForLargeInts() &&
- Seq.size() > Subtarget.getMaxBuildIntsCost())
- return selectImmWithConstantPool(CurDAG, DL, VT, Imm, Subtarget);
+ return false;
+}
+static SDNode *selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
+ RISCVMatInt::InstSeq &Seq) {
SDNode *Result = nullptr;
- SDValue SrcReg = CurDAG->getRegister(RISCV::X0, XLenVT);
+ SDValue SrcReg = CurDAG->getRegister(RISCV::X0, VT);
for (RISCVMatInt::Inst &Inst : Seq) {
- SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, XLenVT);
- if (Inst.Opc == RISCV::LUI)
- Result = CurDAG->getMachineNode(RISCV::LUI, DL, XLenVT, SDImm);
- else if (Inst.Opc == RISCV::ADD_UW)
- Result = CurDAG->getMachineNode(RISCV::ADD_UW, DL, XLenVT, SrcReg,
- CurDAG->getRegister(RISCV::X0, XLenVT));
- else if (Inst.Opc == RISCV::SH1ADD || Inst.Opc == RISCV::SH2ADD ||
- Inst.Opc == RISCV::SH3ADD)
- Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SrcReg, SrcReg);
- else
- Result = CurDAG->getMachineNode(Inst.Opc, DL, XLenVT, SrcReg, SDImm);
+ SDValue SDImm = CurDAG->getTargetConstant(Inst.Imm, DL, VT);
+ switch (Inst.getOpndKind()) {
+ case RISCVMatInt::Imm:
+ Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SDImm);
+ break;
+ case RISCVMatInt::RegX0:
+ Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SrcReg,
+ CurDAG->getRegister(RISCV::X0, VT));
+ break;
+ case RISCVMatInt::RegReg:
+ Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SrcReg, SrcReg);
+ break;
+ case RISCVMatInt::RegImm:
+ Result = CurDAG->getMachineNode(Inst.Opc, DL, VT, SrcReg, SDImm);
+ break;
+ }
// Only the first instruction has X0 as its source.
SrcReg = SDValue(Result, 0);
@@ -182,51 +216,28 @@ static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
return Result;
}
-static SDValue createTupleImpl(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,
- unsigned RegClassID, unsigned SubReg0) {
- assert(Regs.size() >= 2 && Regs.size() <= 8);
-
- SDLoc DL(Regs[0]);
- SmallVector<SDValue, 8> Ops;
-
- Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32));
+static SDNode *selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
+ int64_t Imm, const RISCVSubtarget &Subtarget) {
+ RISCVMatInt::InstSeq Seq =
+ RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits());
- for (unsigned I = 0; I < Regs.size(); ++I) {
- Ops.push_back(Regs[I]);
- Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32));
- }
- SDNode *N =
- CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
- return SDValue(N, 0);
+ return selectImmSeq(CurDAG, DL, VT, Seq);
}
-static SDValue createM1Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,
- unsigned NF) {
- static const unsigned RegClassIDs[] = {
+static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,
+ unsigned NF, RISCVII::VLMUL LMUL) {
+ static const unsigned M1TupleRegClassIDs[] = {
RISCV::VRN2M1RegClassID, RISCV::VRN3M1RegClassID, RISCV::VRN4M1RegClassID,
RISCV::VRN5M1RegClassID, RISCV::VRN6M1RegClassID, RISCV::VRN7M1RegClassID,
RISCV::VRN8M1RegClassID};
+ static const unsigned M2TupleRegClassIDs[] = {RISCV::VRN2M2RegClassID,
+ RISCV::VRN3M2RegClassID,
+ RISCV::VRN4M2RegClassID};
- return createTupleImpl(CurDAG, Regs, RegClassIDs[NF - 2], RISCV::sub_vrm1_0);
-}
-
-static SDValue createM2Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,
- unsigned NF) {
- static const unsigned RegClassIDs[] = {RISCV::VRN2M2RegClassID,
- RISCV::VRN3M2RegClassID,
- RISCV::VRN4M2RegClassID};
-
- return createTupleImpl(CurDAG, Regs, RegClassIDs[NF - 2], RISCV::sub_vrm2_0);
-}
-
-static SDValue createM4Tuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,
- unsigned NF) {
- return createTupleImpl(CurDAG, Regs, RISCV::VRN2M4RegClassID,
- RISCV::sub_vrm4_0);
-}
+ assert(Regs.size() >= 2 && Regs.size() <= 8);
-static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,
- unsigned NF, RISCVII::VLMUL LMUL) {
+ unsigned RegClassID;
+ unsigned SubReg0;
switch (LMUL) {
default:
llvm_unreachable("Invalid LMUL.");
@@ -234,12 +245,37 @@ static SDValue createTuple(SelectionDAG &CurDAG, ArrayRef<SDValue> Regs,
case RISCVII::VLMUL::LMUL_F4:
case RISCVII::VLMUL::LMUL_F2:
case RISCVII::VLMUL::LMUL_1:
- return createM1Tuple(CurDAG, Regs, NF);
+ static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
+ "Unexpected subreg numbering");
+ SubReg0 = RISCV::sub_vrm1_0;
+ RegClassID = M1TupleRegClassIDs[NF - 2];
+ break;
case RISCVII::VLMUL::LMUL_2:
- return createM2Tuple(CurDAG, Regs, NF);
+ static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
+ "Unexpected subreg numbering");
+ SubReg0 = RISCV::sub_vrm2_0;
+ RegClassID = M2TupleRegClassIDs[NF - 2];
+ break;
case RISCVII::VLMUL::LMUL_4:
- return createM4Tuple(CurDAG, Regs, NF);
+ static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
+ "Unexpected subreg numbering");
+ SubReg0 = RISCV::sub_vrm4_0;
+ RegClassID = RISCV::VRN2M4RegClassID;
+ break;
+ }
+
+ SDLoc DL(Regs[0]);
+ SmallVector<SDValue, 8> Ops;
+
+ Ops.push_back(CurDAG.getTargetConstant(RegClassID, DL, MVT::i32));
+
+ for (unsigned I = 0; I < Regs.size(); ++I) {
+ Ops.push_back(Regs[I]);
+ Ops.push_back(CurDAG.getTargetConstant(SubReg0 + I, DL, MVT::i32));
}
+ SDNode *N =
+ CurDAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
+ return SDValue(N, 0);
}
void RISCVDAGToDAGISel::addVectorLoadStoreOperands(
@@ -287,6 +323,10 @@ void RISCVDAGToDAGISel::addVectorLoadStoreOperands(
Operands.push_back(Glue);
}
+static bool isAllUndef(ArrayRef<SDValue> Values) {
+ return llvm::all_of(Values, [](SDValue V) { return V->isUndef(); });
+}
+
void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked,
bool IsStrided) {
SDLoc DL(Node);
@@ -297,19 +337,21 @@ void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked,
unsigned CurOp = 2;
SmallVector<SDValue, 8> Operands;
- if (IsMasked) {
- SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
- Node->op_begin() + CurOp + NF);
- SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
- Operands.push_back(MaskedOff);
- CurOp += NF;
+
+ SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
+ Node->op_begin() + CurOp + NF);
+ bool IsTU = IsMasked || !isAllUndef(Regs);
+ if (IsTU) {
+ SDValue Merge = createTuple(*CurDAG, Regs, NF, LMUL);
+ Operands.push_back(Merge);
}
+ CurOp += NF;
addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
Operands, /*IsLoad=*/true);
const RISCV::VLSEGPseudo *P =
- RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW,
+ RISCV::getVLSEGPseudo(NF, IsMasked, IsTU, IsStrided, /*FF*/ false, Log2SEW,
static_cast<unsigned>(LMUL));
MachineSDNode *Load =
CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
@@ -338,25 +380,25 @@ void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) {
unsigned CurOp = 2;
SmallVector<SDValue, 7> Operands;
- if (IsMasked) {
- SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
- Node->op_begin() + CurOp + NF);
+
+ SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
+ Node->op_begin() + CurOp + NF);
+ bool IsTU = IsMasked || !isAllUndef(Regs);
+ if (IsTU) {
SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
Operands.push_back(MaskedOff);
- CurOp += NF;
}
+ CurOp += NF;
addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
/*IsStridedOrIndexed*/ false, Operands,
/*IsLoad=*/true);
const RISCV::VLSEGPseudo *P =
- RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true,
+ RISCV::getVLSEGPseudo(NF, IsMasked, IsTU, /*Strided*/ false, /*FF*/ true,
Log2SEW, static_cast<unsigned>(LMUL));
MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped,
- MVT::Other, MVT::Glue, Operands);
- SDNode *ReadVL = CurDAG->getMachineNode(RISCV::PseudoReadVL, DL, XLenVT,
- /*Glue*/ SDValue(Load, 2));
+ XLenVT, MVT::Other, Operands);
if (auto *MemOp = dyn_cast<MemSDNode>(Node))
CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
@@ -368,8 +410,8 @@ void RISCVDAGToDAGISel::selectVLSEGFF(SDNode *Node, bool IsMasked) {
CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SuperReg));
}
- ReplaceUses(SDValue(Node, NF), SDValue(ReadVL, 0)); // VL
- ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 1)); // Chain
+ ReplaceUses(SDValue(Node, NF), SDValue(Load, 1)); // VL
+ ReplaceUses(SDValue(Node, NF + 1), SDValue(Load, 2)); // Chain
CurDAG->RemoveDeadNode(Node);
}
@@ -383,13 +425,15 @@ void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked,
unsigned CurOp = 2;
SmallVector<SDValue, 8> Operands;
- if (IsMasked) {
- SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
- Node->op_begin() + CurOp + NF);
+
+ SmallVector<SDValue, 8> Regs(Node->op_begin() + CurOp,
+ Node->op_begin() + CurOp + NF);
+ bool IsTU = IsMasked || !isAllUndef(Regs);
+ if (IsTU) {
SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL);
Operands.push_back(MaskedOff);
- CurOp += NF;
}
+ CurOp += NF;
MVT IndexVT;
addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
@@ -406,7 +450,7 @@ void RISCVDAGToDAGISel::selectVLXSEG(SDNode *Node, bool IsMasked,
"values when XLEN=32");
}
const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo(
- NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
+ NF, IsMasked, IsTU, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
static_cast<unsigned>(IndexLMUL));
MachineSDNode *Load =
CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands);
@@ -596,32 +640,125 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
int64_t Imm = ConstNode->getSExtValue();
// If the upper XLen-16 bits are not used, try to convert this to a simm12
// by sign extending bit 15.
- if (isUInt<16>(Imm) && isInt<12>(SignExtend64(Imm, 16)) &&
+ if (isUInt<16>(Imm) && isInt<12>(SignExtend64<16>(Imm)) &&
hasAllHUsers(Node))
- Imm = SignExtend64(Imm, 16);
+ Imm = SignExtend64<16>(Imm);
// If the upper 32-bits are not used try to convert this into a simm32 by
// sign extending bit 32.
if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
- Imm = SignExtend64(Imm, 32);
+ Imm = SignExtend64<32>(Imm);
ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget));
return;
}
- case ISD::FrameIndex: {
- SDValue Imm = CurDAG->getTargetConstant(0, DL, XLenVT);
- int FI = cast<FrameIndexSDNode>(Node)->getIndex();
- SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT);
- ReplaceNode(Node, CurDAG->getMachineNode(RISCV::ADDI, DL, VT, TFI, Imm));
+ case ISD::ADD: {
+ // Try to select ADD + immediate used as memory addresses to
+ // (ADDI (ADD X, Imm-Lo12), Lo12) if it will allow the ADDI to be removed by
+ // doPeepholeLoadStoreADDI.
+
+ // LHS should be an immediate.
+ auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
+ if (!N1C)
+ break;
+
+ int64_t Offset = N1C->getSExtValue();
+ int64_t Lo12 = SignExtend64<12>(Offset);
+
+ // Don't do this if the lower 12 bits are 0 or we could use ADDI directly.
+ if (Lo12 == 0 || isInt<12>(Offset))
+ break;
+
+ // Don't do this if we can use a pair of ADDIs.
+ if (isInt<12>(Offset / 2) && isInt<12>(Offset - Offset / 2))
+ break;
+
+ RISCVMatInt::InstSeq Seq =
+ RISCVMatInt::generateInstSeq(Offset, Subtarget->getFeatureBits());
+
+ Offset -= Lo12;
+ // Restore sign bits for RV32.
+ if (!Subtarget->is64Bit())
+ Offset = SignExtend64<32>(Offset);
+
+ // We can fold if the last operation is an ADDI or its an ADDIW that could
+ // be treated as an ADDI.
+ if (Seq.back().Opc != RISCV::ADDI &&
+ !(Seq.back().Opc == RISCV::ADDIW && isInt<32>(Offset)))
+ break;
+ assert(Seq.back().Imm == Lo12 && "Expected immediate to match Lo12");
+ // Drop the last operation.
+ Seq.pop_back();
+ assert(!Seq.empty() && "Expected more instructions in sequence");
+
+ bool AllPointerUses = true;
+ for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) {
+ SDNode *User = *UI;
+
+ // Is this user a memory instruction that uses a register and immediate
+ // that has this ADD as its pointer.
+ unsigned BaseOpIdx, OffsetOpIdx;
+ if (!User->isMachineOpcode() ||
+ !hasMemOffset(User, BaseOpIdx, OffsetOpIdx) ||
+ UI.getOperandNo() != BaseOpIdx) {
+ AllPointerUses = false;
+ break;
+ }
+
+ // If the memory instruction already has an offset, make sure the combined
+ // offset is foldable.
+ int64_t MemOffs =
+ cast<ConstantSDNode>(User->getOperand(OffsetOpIdx))->getSExtValue();
+ MemOffs += Lo12;
+ if (!isInt<12>(MemOffs)) {
+ AllPointerUses = false;
+ break;
+ }
+ }
+
+ if (!AllPointerUses)
+ break;
+
+ // Emit (ADDI (ADD X, Hi), Lo)
+ SDNode *Imm = selectImmSeq(CurDAG, DL, VT, Seq);
+ SDNode *ADD = CurDAG->getMachineNode(RISCV::ADD, DL, VT,
+ Node->getOperand(0), SDValue(Imm, 0));
+ SDNode *ADDI =
+ CurDAG->getMachineNode(RISCV::ADDI, DL, VT, SDValue(ADD, 0),
+ CurDAG->getTargetConstant(Lo12, DL, VT));
+ ReplaceNode(Node, ADDI);
return;
}
+ case ISD::SHL: {
+ auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
+ if (!N1C)
+ break;
+ SDValue N0 = Node->getOperand(0);
+ if (N0.getOpcode() != ISD::AND || !N0.hasOneUse() ||
+ !isa<ConstantSDNode>(N0.getOperand(1)))
+ break;
+ unsigned ShAmt = N1C->getZExtValue();
+ uint64_t Mask = N0.getConstantOperandVal(1);
+
+ // Optimize (shl (and X, C2), C) -> (slli (srliw X, C3), C3+C) where C2 has
+ // 32 leading zeros and C3 trailing zeros.
+ if (ShAmt <= 32 && isShiftedMask_64(Mask)) {
+ unsigned XLen = Subtarget->getXLen();
+ unsigned LeadingZeros = XLen - (64 - countLeadingZeros(Mask));
+ unsigned TrailingZeros = countTrailingZeros(Mask);
+ if (TrailingZeros > 0 && LeadingZeros == 32) {
+ SDNode *SRLIW = CurDAG->getMachineNode(
+ RISCV::SRLIW, DL, VT, N0->getOperand(0),
+ CurDAG->getTargetConstant(TrailingZeros, DL, VT));
+ SDNode *SLLI = CurDAG->getMachineNode(
+ RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
+ CurDAG->getTargetConstant(TrailingZeros + ShAmt, DL, VT));
+ ReplaceNode(Node, SLLI);
+ return;
+ }
+ }
+ break;
+ }
case ISD::SRL: {
- // Optimize (srl (and X, C2), C) ->
- // (srli (slli X, (XLen-C3), (XLen-C3) + C)
- // Where C2 is a mask with C3 trailing ones.
- // Taking into account that the C2 may have had lower bits unset by
- // SimplifyDemandedBits. This avoids materializing the C2 immediate.
- // This pattern occurs when type legalizing right shifts for types with
- // less than XLen bits.
auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
if (!N1C)
break;
@@ -631,6 +768,32 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
break;
unsigned ShAmt = N1C->getZExtValue();
uint64_t Mask = N0.getConstantOperandVal(1);
+
+ // Optimize (srl (and X, C2), C) -> (slli (srliw X, C3), C3-C) where C2 has
+ // 32 leading zeros and C3 trailing zeros.
+ if (isShiftedMask_64(Mask)) {
+ unsigned XLen = Subtarget->getXLen();
+ unsigned LeadingZeros = XLen - (64 - countLeadingZeros(Mask));
+ unsigned TrailingZeros = countTrailingZeros(Mask);
+ if (LeadingZeros == 32 && TrailingZeros > ShAmt) {
+ SDNode *SRLIW = CurDAG->getMachineNode(
+ RISCV::SRLIW, DL, VT, N0->getOperand(0),
+ CurDAG->getTargetConstant(TrailingZeros, DL, VT));
+ SDNode *SLLI = CurDAG->getMachineNode(
+ RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
+ CurDAG->getTargetConstant(TrailingZeros - ShAmt, DL, VT));
+ ReplaceNode(Node, SLLI);
+ return;
+ }
+ }
+
+ // Optimize (srl (and X, C2), C) ->
+ // (srli (slli X, (XLen-C3), (XLen-C3) + C)
+ // Where C2 is a mask with C3 trailing ones.
+ // Taking into account that the C2 may have had lower bits unset by
+ // SimplifyDemandedBits. This avoids materializing the C2 immediate.
+ // This pattern occurs when type legalizing right shifts for types with
+ // less than XLen bits.
Mask |= maskTrailingOnes<uint64_t>(ShAmt);
if (!isMask_64(Mask))
break;
@@ -700,13 +863,12 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
uint64_t C1 = N1C->getZExtValue();
- // Keep track of whether this is a andi, zext.h, or zext.w.
- bool ZExtOrANDI = isInt<12>(N1C->getSExtValue());
- if (C1 == UINT64_C(0xFFFF) &&
- (Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbp()))
- ZExtOrANDI = true;
- if (C1 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba())
- ZExtOrANDI = true;
+ // Keep track of whether this is a c.andi. If we can't use c.andi, the
+ // shift pair might offer more compression opportunities.
+ // TODO: We could check for C extension here, but we don't have many lit
+ // tests with the C extension enabled so not checking gets better coverage.
+ // TODO: What if ANDI faster than shift?
+ bool IsCANDI = isInt<6>(N1C->getSExtValue());
// Clear irrelevant bits in the mask.
if (LeftShift)
@@ -727,9 +889,8 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
if (C2 < C3) {
// If the number of leading zeros is C2+32 this can be SRLIW.
if (C2 + 32 == C3) {
- SDNode *SRLIW =
- CurDAG->getMachineNode(RISCV::SRLIW, DL, XLenVT, X,
- CurDAG->getTargetConstant(C2, DL, XLenVT));
+ SDNode *SRLIW = CurDAG->getMachineNode(
+ RISCV::SRLIW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));
ReplaceNode(Node, SRLIW);
return;
}
@@ -739,27 +900,33 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
//
// This pattern occurs when (i32 (srl (sra 31), c3 - 32)) is type
// legalized and goes through DAG combine.
- SDValue Y;
if (C2 >= 32 && (C3 - C2) == 1 && N0.hasOneUse() &&
- selectSExti32(X, Y)) {
+ X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+ cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32) {
SDNode *SRAIW =
- CurDAG->getMachineNode(RISCV::SRAIW, DL, XLenVT, Y,
- CurDAG->getTargetConstant(31, DL, XLenVT));
+ CurDAG->getMachineNode(RISCV::SRAIW, DL, VT, X.getOperand(0),
+ CurDAG->getTargetConstant(31, DL, VT));
SDNode *SRLIW = CurDAG->getMachineNode(
- RISCV::SRLIW, DL, XLenVT, SDValue(SRAIW, 0),
- CurDAG->getTargetConstant(C3 - 32, DL, XLenVT));
+ RISCV::SRLIW, DL, VT, SDValue(SRAIW, 0),
+ CurDAG->getTargetConstant(C3 - 32, DL, VT));
ReplaceNode(Node, SRLIW);
return;
}
// (srli (slli x, c3-c2), c3).
- if (OneUseOrZExtW && !ZExtOrANDI) {
+ // Skip if we could use (zext.w (sraiw X, C2)).
+ bool Skip = Subtarget->hasStdExtZba() && C3 == 32 &&
+ X.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+ cast<VTSDNode>(X.getOperand(1))->getVT() == MVT::i32;
+ // Also Skip if we can use bexti.
+ Skip |= Subtarget->hasStdExtZbs() && C3 == XLen - 1;
+ if (OneUseOrZExtW && !Skip) {
SDNode *SLLI = CurDAG->getMachineNode(
- RISCV::SLLI, DL, XLenVT, X,
- CurDAG->getTargetConstant(C3 - C2, DL, XLenVT));
+ RISCV::SLLI, DL, VT, X,
+ CurDAG->getTargetConstant(C3 - C2, DL, VT));
SDNode *SRLI =
- CurDAG->getMachineNode(RISCV::SRLI, DL, XLenVT, SDValue(SLLI, 0),
- CurDAG->getTargetConstant(C3, DL, XLenVT));
+ CurDAG->getMachineNode(RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
+ CurDAG->getTargetConstant(C3, DL, VT));
ReplaceNode(Node, SRLI);
return;
}
@@ -775,21 +942,20 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
C1 == (maskTrailingOnes<uint64_t>(XLen - (C2 + C3)) << C2)) {
// Use slli.uw when possible.
if ((XLen - (C2 + C3)) == 32 && Subtarget->hasStdExtZba()) {
- SDNode *SLLI_UW =
- CurDAG->getMachineNode(RISCV::SLLI_UW, DL, XLenVT, X,
- CurDAG->getTargetConstant(C2, DL, XLenVT));
+ SDNode *SLLI_UW = CurDAG->getMachineNode(
+ RISCV::SLLI_UW, DL, VT, X, CurDAG->getTargetConstant(C2, DL, VT));
ReplaceNode(Node, SLLI_UW);
return;
}
// (srli (slli c2+c3), c3)
- if (OneUseOrZExtW && !ZExtOrANDI) {
+ if (OneUseOrZExtW && !IsCANDI) {
SDNode *SLLI = CurDAG->getMachineNode(
- RISCV::SLLI, DL, XLenVT, X,
- CurDAG->getTargetConstant(C2 + C3, DL, XLenVT));
+ RISCV::SLLI, DL, VT, X,
+ CurDAG->getTargetConstant(C2 + C3, DL, VT));
SDNode *SRLI =
- CurDAG->getMachineNode(RISCV::SRLI, DL, XLenVT, SDValue(SLLI, 0),
- CurDAG->getTargetConstant(C3, DL, XLenVT));
+ CurDAG->getMachineNode(RISCV::SRLI, DL, VT, SDValue(SLLI, 0),
+ CurDAG->getTargetConstant(C3, DL, VT));
ReplaceNode(Node, SRLI);
return;
}
@@ -801,25 +967,31 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
if (!LeftShift && isShiftedMask_64(C1)) {
uint64_t Leading = XLen - (64 - countLeadingZeros(C1));
uint64_t C3 = countTrailingZeros(C1);
- if (Leading == C2 && C2 + C3 < XLen && OneUseOrZExtW && !ZExtOrANDI) {
+ if (Leading == C2 && C2 + C3 < XLen && OneUseOrZExtW && !IsCANDI) {
+ unsigned SrliOpc = RISCV::SRLI;
+ // If the input is zexti32 we should use SRLIW.
+ if (X.getOpcode() == ISD::AND && isa<ConstantSDNode>(X.getOperand(1)) &&
+ X.getConstantOperandVal(1) == UINT64_C(0xFFFFFFFF)) {
+ SrliOpc = RISCV::SRLIW;
+ X = X.getOperand(0);
+ }
SDNode *SRLI = CurDAG->getMachineNode(
- RISCV::SRLI, DL, XLenVT, X,
- CurDAG->getTargetConstant(C2 + C3, DL, XLenVT));
+ SrliOpc, DL, VT, X, CurDAG->getTargetConstant(C2 + C3, DL, VT));
SDNode *SLLI =
- CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLI, 0),
- CurDAG->getTargetConstant(C3, DL, XLenVT));
+ CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
+ CurDAG->getTargetConstant(C3, DL, VT));
ReplaceNode(Node, SLLI);
return;
}
// If the leading zero count is C2+32, we can use SRLIW instead of SRLI.
if (Leading > 32 && (Leading - 32) == C2 && C2 + C3 < 32 &&
- OneUseOrZExtW && !ZExtOrANDI) {
- SDNode *SRLIW = CurDAG->getMachineNode(
- RISCV::SRLIW, DL, XLenVT, X,
- CurDAG->getTargetConstant(C2 + C3, DL, XLenVT));
+ OneUseOrZExtW && !IsCANDI) {
+ SDNode *SRLIW =
+ CurDAG->getMachineNode(RISCV::SRLIW, DL, VT, X,
+ CurDAG->getTargetConstant(C2 + C3, DL, VT));
SDNode *SLLI =
- CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLIW, 0),
- CurDAG->getTargetConstant(C3, DL, XLenVT));
+ CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
+ CurDAG->getTargetConstant(C3, DL, VT));
ReplaceNode(Node, SLLI);
return;
}
@@ -830,24 +1002,23 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
if (LeftShift && isShiftedMask_64(C1)) {
uint64_t Leading = XLen - (64 - countLeadingZeros(C1));
uint64_t C3 = countTrailingZeros(C1);
- if (Leading == 0 && C2 < C3 && OneUseOrZExtW && !ZExtOrANDI) {
+ if (Leading == 0 && C2 < C3 && OneUseOrZExtW && !IsCANDI) {
SDNode *SRLI = CurDAG->getMachineNode(
- RISCV::SRLI, DL, XLenVT, X,
- CurDAG->getTargetConstant(C3 - C2, DL, XLenVT));
+ RISCV::SRLI, DL, VT, X, CurDAG->getTargetConstant(C3 - C2, DL, VT));
SDNode *SLLI =
- CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLI, 0),
- CurDAG->getTargetConstant(C3, DL, XLenVT));
+ CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLI, 0),
+ CurDAG->getTargetConstant(C3, DL, VT));
ReplaceNode(Node, SLLI);
return;
}
// If we have (32-C2) leading zeros, we can use SRLIW instead of SRLI.
- if (C2 < C3 && Leading + C2 == 32 && OneUseOrZExtW && !ZExtOrANDI) {
- SDNode *SRLIW = CurDAG->getMachineNode(
- RISCV::SRLIW, DL, XLenVT, X,
- CurDAG->getTargetConstant(C3 - C2, DL, XLenVT));
+ if (C2 < C3 && Leading + C2 == 32 && OneUseOrZExtW && !IsCANDI) {
+ SDNode *SRLIW =
+ CurDAG->getMachineNode(RISCV::SRLIW, DL, VT, X,
+ CurDAG->getTargetConstant(C3 - C2, DL, VT));
SDNode *SLLI =
- CurDAG->getMachineNode(RISCV::SLLI, DL, XLenVT, SDValue(SRLIW, 0),
- CurDAG->getTargetConstant(C3, DL, XLenVT));
+ CurDAG->getMachineNode(RISCV::SLLI, DL, VT, SDValue(SRLIW, 0),
+ CurDAG->getTargetConstant(C3, DL, VT));
ReplaceNode(Node, SLLI);
return;
}
@@ -908,7 +1079,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
uint64_t ShiftedC1 = C1 << ConstantShift;
// If this RV32, we need to sign extend the constant.
if (XLen == 32)
- ShiftedC1 = SignExtend64(ShiftedC1, 32);
+ ShiftedC1 = SignExtend64<32>(ShiftedC1);
// Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))).
SDNode *Imm = selectImm(CurDAG, DL, VT, ShiftedC1, *Subtarget);
@@ -1005,45 +1176,44 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
}
MVT Src1VT = Src1.getSimpleValueType();
unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode,
- VMSetOpcode, VMANDOpcode;
+ VMOROpcode;
switch (RISCVTargetLowering::getLMUL(Src1VT)) {
default:
llvm_unreachable("Unexpected LMUL!");
-#define CASE_VMSLT_VMSET_OPCODES(lmulenum, suffix, suffix_b) \
+#define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b) \
case RISCVII::VLMUL::lmulenum: \
VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \
: RISCV::PseudoVMSLT_VX_##suffix; \
VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \
: RISCV::PseudoVMSLT_VX_##suffix##_MASK; \
- VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b; \
break;
- CASE_VMSLT_VMSET_OPCODES(LMUL_F8, MF8, B1)
- CASE_VMSLT_VMSET_OPCODES(LMUL_F4, MF4, B2)
- CASE_VMSLT_VMSET_OPCODES(LMUL_F2, MF2, B4)
- CASE_VMSLT_VMSET_OPCODES(LMUL_1, M1, B8)
- CASE_VMSLT_VMSET_OPCODES(LMUL_2, M2, B16)
- CASE_VMSLT_VMSET_OPCODES(LMUL_4, M4, B32)
- CASE_VMSLT_VMSET_OPCODES(LMUL_8, M8, B64)
-#undef CASE_VMSLT_VMSET_OPCODES
+ CASE_VMSLT_OPCODES(LMUL_F8, MF8, B1)
+ CASE_VMSLT_OPCODES(LMUL_F4, MF4, B2)
+ CASE_VMSLT_OPCODES(LMUL_F2, MF2, B4)
+ CASE_VMSLT_OPCODES(LMUL_1, M1, B8)
+ CASE_VMSLT_OPCODES(LMUL_2, M2, B16)
+ CASE_VMSLT_OPCODES(LMUL_4, M4, B32)
+ CASE_VMSLT_OPCODES(LMUL_8, M8, B64)
+#undef CASE_VMSLT_OPCODES
}
// Mask operations use the LMUL from the mask type.
switch (RISCVTargetLowering::getLMUL(VT)) {
default:
llvm_unreachable("Unexpected LMUL!");
-#define CASE_VMXOR_VMANDN_VMAND_OPCODES(lmulenum, suffix) \
+#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \
case RISCVII::VLMUL::lmulenum: \
VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \
VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \
- VMANDOpcode = RISCV::PseudoVMAND_MM_##suffix; \
+ VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \
break;
- CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_F8, MF8)
- CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_F4, MF4)
- CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_F2, MF2)
- CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_1, M1)
- CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_2, M2)
- CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_4, M4)
- CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_8, M8)
-#undef CASE_VMXOR_VMANDN_VMAND_OPCODES
+ CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, MF8)
+ CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, MF4)
+ CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, MF2)
+ CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_1, M1)
+ CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_2, M2)
+ CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_4, M4)
+ CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_8, M8)
+#undef CASE_VMXOR_VMANDN_VMOR_OPCODES
}
SDValue SEW = CurDAG->getTargetConstant(
Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT);
@@ -1053,12 +1223,17 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
SDValue MaskedOff = Node->getOperand(1);
SDValue Mask = Node->getOperand(4);
- // If vmsgeu_mask with 0 immediate, expand it to {vmset, vmand}.
+ // If vmsgeu_mask with 0 immediate, expand it to vmor mask, maskedoff.
if (IsCmpUnsignedZero) {
- SDValue VMSet =
- SDValue(CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW), 0);
- ReplaceNode(Node, CurDAG->getMachineNode(VMANDOpcode, DL, VT,
- {Mask, VMSet, VL, MaskSEW}));
+ // We don't need vmor if the MaskedOff and the Mask are the same
+ // value.
+ if (Mask == MaskedOff) {
+ ReplaceUses(Node, Mask.getNode());
+ return;
+ }
+ ReplaceNode(Node,
+ CurDAG->getMachineNode(VMOROpcode, DL, VT,
+ {Mask, MaskedOff, VL, MaskSEW}));
return;
}
@@ -1082,10 +1257,14 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
// Otherwise use
// vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0
+ // The result is mask undisturbed.
+ // We use the same instructions to emulate mask agnostic behavior, because
+ // the agnostic result can be either undisturbed or all 1.
SDValue Cmp = SDValue(
CurDAG->getMachineNode(VMSLTMaskOpcode, DL, VT,
{MaskedOff, Src1, Src2, V0, VL, SEW, Glue}),
0);
+ // vmxor.mm vd, vd, v0 is used to update active value.
ReplaceNode(Node, CurDAG->getMachineNode(VMXOROpcode, DL, VT,
{Cmp, Mask, VL, MaskSEW}));
return;
@@ -1215,7 +1394,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
unsigned CurOp = 2;
// Masked intrinsic only have TU version pseduo instructions.
- bool IsTU = IsMasked || (!IsMasked && !Node->getOperand(CurOp).isUndef());
+ bool IsTU = IsMasked || !Node->getOperand(CurOp).isUndef();
SmallVector<SDValue, 8> Operands;
if (IsTU)
Operands.push_back(Node->getOperand(CurOp++));
@@ -1267,9 +1446,8 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
// The riscv_vlm intrinsic are always tail agnostic and no passthru operand.
bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
// Masked intrinsic only have TU version pseduo instructions.
- bool IsTU =
- HasPassthruOperand &&
- ((!IsMasked && !Node->getOperand(CurOp).isUndef()) || IsMasked);
+ bool IsTU = HasPassthruOperand &&
+ (IsMasked || !Node->getOperand(CurOp).isUndef());
SmallVector<SDValue, 8> Operands;
if (IsTU)
Operands.push_back(Node->getOperand(CurOp++));
@@ -1302,7 +1480,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
unsigned CurOp = 2;
// Masked intrinsic only have TU version pseduo instructions.
- bool IsTU = IsMasked || (!IsMasked && !Node->getOperand(CurOp).isUndef());
+ bool IsTU = IsMasked || !Node->getOperand(CurOp).isUndef();
SmallVector<SDValue, 7> Operands;
if (IsTU)
Operands.push_back(Node->getOperand(CurOp++));
@@ -1318,19 +1496,12 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
const RISCV::VLEPseudo *P =
RISCV::getVLEPseudo(IsMasked, IsTU, /*Strided*/ false, /*FF*/ true,
Log2SEW, static_cast<unsigned>(LMUL));
- MachineSDNode *Load =
- CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0),
- MVT::Other, MVT::Glue, Operands);
- SDNode *ReadVL = CurDAG->getMachineNode(RISCV::PseudoReadVL, DL, XLenVT,
- /*Glue*/ SDValue(Load, 2));
-
+ MachineSDNode *Load = CurDAG->getMachineNode(
+ P->Pseudo, DL, Node->getVTList(), Operands);
if (auto *MemOp = dyn_cast<MemSDNode>(Node))
CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
- ReplaceUses(SDValue(Node, 0), SDValue(Load, 0));
- ReplaceUses(SDValue(Node, 1), SDValue(ReadVL, 0)); // VL
- ReplaceUses(SDValue(Node, 2), SDValue(Load, 1)); // Chain
- CurDAG->RemoveDeadNode(Node);
+ ReplaceNode(Node, Load);
return;
}
}
@@ -1610,9 +1781,10 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
// Try to match splat of a scalar load to a strided load with stride of x0.
bool IsScalarMove = Node->getOpcode() == RISCVISD::VMV_S_X_VL ||
Node->getOpcode() == RISCVISD::VFMV_S_F_VL;
- if (IsScalarMove && !Node->getOperand(0).isUndef())
+ bool HasPassthruOperand = Node->getOpcode() != ISD::SPLAT_VECTOR;
+ if (HasPassthruOperand && !Node->getOperand(0).isUndef())
break;
- SDValue Src = IsScalarMove ? Node->getOperand(1) : Node->getOperand(0);
+ SDValue Src = HasPassthruOperand ? Node->getOperand(1) : Node->getOperand(0);
auto *Ld = dyn_cast<LoadSDNode>(Src);
if (!Ld)
break;
@@ -1634,7 +1806,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
break;
selectVLOp(Node->getOperand(2), VL);
} else
- selectVLOp(Node->getOperand(1), VL);
+ selectVLOp(Node->getOperand(2), VL);
unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT);
@@ -1650,8 +1822,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
MachineSDNode *Load =
CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
- if (auto *MemOp = dyn_cast<MemSDNode>(Node))
- CurDAG->setNodeMemRefs(Load, {MemOp->getMemOperand()});
+ CurDAG->setNodeMemRefs(Load, {Ld->getMemOperand()});
ReplaceNode(Node, Load);
return;
@@ -1680,11 +1851,37 @@ bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand(
return true;
}
-bool RISCVDAGToDAGISel::SelectAddrFI(SDValue Addr, SDValue &Base) {
+bool RISCVDAGToDAGISel::SelectAddrFrameIndex(SDValue Addr, SDValue &Base,
+ SDValue &Offset) {
if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), Subtarget->getXLenVT());
+ Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), Subtarget->getXLenVT());
return true;
}
+
+ return false;
+}
+
+// Select a frame index and an optional immediate offset from an ADD or OR.
+bool RISCVDAGToDAGISel::SelectFrameAddrRegImm(SDValue Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (SelectAddrFrameIndex(Addr, Base, Offset))
+ return true;
+
+ if (!CurDAG->isBaseWithConstantOffset(Addr))
+ return false;
+
+ if (auto *FIN = dyn_cast<FrameIndexSDNode>(Addr.getOperand(0))) {
+ int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
+ if (isInt<12>(CVal)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(),
+ Subtarget->getXLenVT());
+ Offset = CurDAG->getTargetConstant(CVal, SDLoc(Addr),
+ Subtarget->getXLenVT());
+ return true;
+ }
+ }
+
return false;
}
@@ -1698,6 +1895,76 @@ bool RISCVDAGToDAGISel::SelectBaseAddr(SDValue Addr, SDValue &Base) {
return true;
}
+bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (SelectAddrFrameIndex(Addr, Base, Offset))
+ return true;
+
+ SDLoc DL(Addr);
+ MVT VT = Addr.getSimpleValueType();
+
+ if (Addr.getOpcode() == RISCVISD::ADD_LO) {
+ Base = Addr.getOperand(0);
+ Offset = Addr.getOperand(1);
+ return true;
+ }
+
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
+ if (isInt<12>(CVal)) {
+ Base = Addr.getOperand(0);
+ if (Base.getOpcode() == RISCVISD::ADD_LO) {
+ SDValue LoOperand = Base.getOperand(1);
+ if (auto *GA = dyn_cast<GlobalAddressSDNode>(LoOperand)) {
+ // If the Lo in (ADD_LO hi, lo) is a global variable's address
+ // (its low part, really), then we can rely on the alignment of that
+ // variable to provide a margin of safety before low part can overflow
+ // the 12 bits of the load/store offset. Check if CVal falls within
+ // that margin; if so (low part + CVal) can't overflow.
+ const DataLayout &DL = CurDAG->getDataLayout();
+ Align Alignment = commonAlignment(
+ GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
+ if (CVal == 0 || Alignment > CVal) {
+ int64_t CombinedOffset = CVal + GA->getOffset();
+ Base = Base.getOperand(0);
+ Offset = CurDAG->getTargetGlobalAddress(
+ GA->getGlobal(), SDLoc(LoOperand), LoOperand.getValueType(),
+ CombinedOffset, GA->getTargetFlags());
+ return true;
+ }
+ }
+ }
+
+ if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
+ Offset = CurDAG->getTargetConstant(CVal, DL, VT);
+ return true;
+ }
+ }
+
+ // Handle ADD with large immediates.
+ if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
+ int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
+ assert(!isInt<12>(CVal) && "simm12 not already handled?");
+
+ if (isInt<12>(CVal / 2) && isInt<12>(CVal - CVal / 2)) {
+ // We can use an ADDI for part of the offset and fold the rest into the
+ // load/store. This mirrors the AddiPair PatFrag in RISCVInstrInfo.td.
+ int64_t Adj = CVal < 0 ? -2048 : 2047;
+ Base = SDValue(
+ CurDAG->getMachineNode(RISCV::ADDI, DL, VT, Addr.getOperand(0),
+ CurDAG->getTargetConstant(Adj, DL, VT)),
+ 0);
+ Offset = CurDAG->getTargetConstant(CVal - Adj, DL, VT);
+ return true;
+ }
+ }
+
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, DL, VT);
+ return true;
+}
+
bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,
SDValue &ShAmt) {
// Shift instructions on RISCV only read the lower 5 or 6 bits of the shift
@@ -1723,6 +1990,21 @@ bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,
ShAmt = N.getOperand(0);
return true;
}
+ } else if (N.getOpcode() == ISD::SUB &&
+ isa<ConstantSDNode>(N.getOperand(0))) {
+ uint64_t Imm = N.getConstantOperandVal(0);
+ // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
+ // generate a NEG instead of a SUB of a constant.
+ if (Imm != 0 && Imm % ShiftWidth == 0) {
+ SDLoc DL(N);
+ EVT VT = N.getValueType();
+ SDValue Zero = CurDAG->getRegister(RISCV::X0, VT);
+ unsigned NegOpc = VT == MVT::i64 ? RISCV::SUBW : RISCV::SUB;
+ MachineSDNode *Neg = CurDAG->getMachineNode(NegOpc, DL, VT, Zero,
+ N.getOperand(1));
+ ShAmt = SDValue(Neg, 0);
+ return true;
+ }
}
ShAmt = N;
@@ -1778,6 +2060,8 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const {
Node->getOpcode() == ISD::MUL || Node->getOpcode() == ISD::SHL ||
Node->getOpcode() == ISD::SRL ||
Node->getOpcode() == ISD::SIGN_EXTEND_INREG ||
+ Node->getOpcode() == RISCVISD::GREV ||
+ Node->getOpcode() == RISCVISD::GORC ||
isa<ConstantSDNode>(Node)) &&
"Unexpected opcode");
@@ -1812,6 +2096,7 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const {
case RISCV::CTZW:
case RISCV::CPOPW:
case RISCV::SLLI_UW:
+ case RISCV::FMV_W_X:
case RISCV::FCVT_H_W:
case RISCV::FCVT_H_WU:
case RISCV::FCVT_S_W:
@@ -1835,6 +2120,7 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const {
return false;
break;
case RISCV::SEXT_H:
+ case RISCV::FMV_H_X:
case RISCV::ZEXT_H_RV32:
case RISCV::ZEXT_H_RV64:
if (Bits < 16)
@@ -1871,22 +2157,32 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits) const {
// allows us to choose betwen VSETIVLI or VSETVLI later.
bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) {
auto *C = dyn_cast<ConstantSDNode>(N);
- if (C && (isUInt<5>(C->getZExtValue()) ||
- C->getSExtValue() == RISCV::VLMaxSentinel))
+ if (C && isUInt<5>(C->getZExtValue())) {
VL = CurDAG->getTargetConstant(C->getZExtValue(), SDLoc(N),
N->getValueType(0));
- else
+ } else if (C && C->isAllOnesValue()) {
+ // Treat all ones as VLMax.
+ VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
+ N->getValueType(0));
+ } else if (isa<RegisterSDNode>(N) &&
+ cast<RegisterSDNode>(N)->getReg() == RISCV::X0) {
+ // All our VL operands use an operand that allows GPRNoX0 or an immediate
+ // as the register class. Convert X0 to a special immediate to pass the
+ // MachineVerifier. This is recognized specially by the vsetvli insertion
+ // pass.
+ VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, SDLoc(N),
+ N->getValueType(0));
+ } else {
VL = N;
+ }
return true;
}
bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) {
- if (N.getOpcode() != ISD::SPLAT_VECTOR &&
- N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64 &&
- N.getOpcode() != RISCVISD::VMV_V_X_VL)
+ if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef())
return false;
- SplatVal = N.getOperand(0);
+ SplatVal = N.getOperand(1);
return true;
}
@@ -1896,23 +2192,22 @@ static bool selectVSplatSimmHelper(SDValue N, SDValue &SplatVal,
SelectionDAG &DAG,
const RISCVSubtarget &Subtarget,
ValidateFn ValidateImm) {
- if ((N.getOpcode() != ISD::SPLAT_VECTOR &&
- N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64 &&
- N.getOpcode() != RISCVISD::VMV_V_X_VL) ||
- !isa<ConstantSDNode>(N.getOperand(0)))
+ if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() ||
+ !isa<ConstantSDNode>(N.getOperand(1)))
return false;
- int64_t SplatImm = cast<ConstantSDNode>(N.getOperand(0))->getSExtValue();
+ int64_t SplatImm =
+ cast<ConstantSDNode>(N.getOperand(1))->getSExtValue();
- // ISD::SPLAT_VECTOR, RISCVISD::SPLAT_VECTOR_I64 and RISCVISD::VMV_V_X_VL
- // share semantics when the operand type is wider than the resulting vector
- // element type: an implicit truncation first takes place. Therefore, perform
- // a manual truncation/sign-extension in order to ignore any truncated bits
- // and catch any zero-extended immediate.
+ // The semantics of RISCVISD::VMV_V_X_VL is that when the operand
+ // type is wider than the resulting vector element type: an implicit
+ // truncation first takes place. Therefore, perform a manual
+ // truncation/sign-extension in order to ignore any truncated bits and catch
+ // any zero-extended immediate.
// For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
// sign-extending to (XLenVT -1).
MVT XLenVT = Subtarget.getXLenVT();
- assert(XLenVT == N.getOperand(0).getSimpleValueType() &&
+ assert(XLenVT == N.getOperand(1).getSimpleValueType() &&
"Unexpected splat operand type");
MVT EltVT = N.getSimpleValueType().getVectorElementType();
if (EltVT.bitsLT(XLenVT))
@@ -1945,13 +2240,12 @@ bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N,
}
bool RISCVDAGToDAGISel::selectVSplatUimm5(SDValue N, SDValue &SplatVal) {
- if ((N.getOpcode() != ISD::SPLAT_VECTOR &&
- N.getOpcode() != RISCVISD::SPLAT_VECTOR_I64 &&
- N.getOpcode() != RISCVISD::VMV_V_X_VL) ||
- !isa<ConstantSDNode>(N.getOperand(0)))
+ if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() ||
+ !isa<ConstantSDNode>(N.getOperand(1)))
return false;
- int64_t SplatImm = cast<ConstantSDNode>(N.getOperand(0))->getSExtValue();
+ int64_t SplatImm =
+ cast<ConstantSDNode>(N.getOperand(1))->getSExtValue();
if (!isUInt<5>(SplatImm))
return false;
@@ -1980,49 +2274,42 @@ bool RISCVDAGToDAGISel::selectRVVSimm5(SDValue N, unsigned Width,
// Merge an ADDI into the offset of a load/store instruction where possible.
// (load (addi base, off1), off2) -> (load base, off1+off2)
// (store val, (addi base, off1), off2) -> (store val, base, off1+off2)
+// (load (add base, (addi src, off1)), off2)
+// -> (load (add base, src), off1+off2)
+// (store val, (add base, (addi src, off1)), off2)
+// -> (store val, (add base, src), off1+off2)
// This is possible when off1+off2 fits a 12-bit immediate.
bool RISCVDAGToDAGISel::doPeepholeLoadStoreADDI(SDNode *N) {
- int OffsetOpIdx;
- int BaseOpIdx;
-
- // Only attempt this optimisation for I-type loads and S-type stores.
- switch (N->getMachineOpcode()) {
- default:
+ unsigned OffsetOpIdx, BaseOpIdx;
+ if (!hasMemOffset(N, BaseOpIdx, OffsetOpIdx))
return false;
- case RISCV::LB:
- case RISCV::LH:
- case RISCV::LW:
- case RISCV::LBU:
- case RISCV::LHU:
- case RISCV::LWU:
- case RISCV::LD:
- case RISCV::FLH:
- case RISCV::FLW:
- case RISCV::FLD:
- BaseOpIdx = 0;
- OffsetOpIdx = 1;
- break;
- case RISCV::SB:
- case RISCV::SH:
- case RISCV::SW:
- case RISCV::SD:
- case RISCV::FSH:
- case RISCV::FSW:
- case RISCV::FSD:
- BaseOpIdx = 1;
- OffsetOpIdx = 2;
- break;
- }
if (!isa<ConstantSDNode>(N->getOperand(OffsetOpIdx)))
return false;
SDValue Base = N->getOperand(BaseOpIdx);
- // If the base is an ADDI, we can merge it in to the load/store.
- if (!Base.isMachineOpcode() || Base.getMachineOpcode() != RISCV::ADDI)
+ if (!Base.isMachineOpcode())
return false;
+ if (Base.getMachineOpcode() == RISCV::ADDI) {
+ // If the base is an ADDI, we can merge it in to the load/store.
+ } else if (Base.getMachineOpcode() == RISCV::ADDIW &&
+ isa<ConstantSDNode>(Base.getOperand(1)) &&
+ Base.getOperand(0).isMachineOpcode() &&
+ Base.getOperand(0).getMachineOpcode() == RISCV::LUI &&
+ isa<ConstantSDNode>(Base.getOperand(0).getOperand(0))) {
+ // ADDIW can be merged if it's part of LUI+ADDIW constant materialization
+ // and LUI+ADDI would have produced the same result. This is true for all
+ // simm32 values except 0x7ffff800-0x7fffffff.
+ int64_t Offset =
+ SignExtend64<32>(Base.getOperand(0).getConstantOperandVal(0) << 12);
+ Offset += cast<ConstantSDNode>(Base.getOperand(1))->getSExtValue();
+ if (!isInt<32>(Offset))
+ return false;
+ } else
+ return false;
+
SDValue ImmOperand = Base.getOperand(1);
uint64_t Offset2 = N->getConstantOperandVal(OffsetOpIdx);
@@ -2039,7 +2326,8 @@ bool RISCVDAGToDAGISel::doPeepholeLoadStoreADDI(SDNode *N) {
// to provide a margin of safety before off1 can overflow the 12 bits.
// Check if off2 falls within that margin; if so off1+off2 can't overflow.
const DataLayout &DL = CurDAG->getDataLayout();
- Align Alignment = GA->getGlobal()->getPointerAlignment(DL);
+ Align Alignment = commonAlignment(GA->getGlobal()->getPointerAlignment(DL),
+ GA->getOffset());
if (Offset2 != 0 && Alignment <= Offset2)
return false;
int64_t Offset1 = GA->getOffset();
@@ -2049,7 +2337,7 @@ bool RISCVDAGToDAGISel::doPeepholeLoadStoreADDI(SDNode *N) {
CombinedOffset, GA->getTargetFlags());
} else if (auto *CP = dyn_cast<ConstantPoolSDNode>(ImmOperand)) {
// Ditto.
- Align Alignment = CP->getAlign();
+ Align Alignment = commonAlignment(CP->getAlign(), CP->getOffset());
if (Offset2 != 0 && Alignment <= Offset2)
return false;
int64_t Offset1 = CP->getOffset();
@@ -2068,12 +2356,13 @@ bool RISCVDAGToDAGISel::doPeepholeLoadStoreADDI(SDNode *N) {
LLVM_DEBUG(dbgs() << "\n");
// Modify the offset operand of the load/store.
- if (BaseOpIdx == 0) // Load
- CurDAG->UpdateNodeOperands(N, Base.getOperand(0), ImmOperand,
- N->getOperand(2));
- else // Store
- CurDAG->UpdateNodeOperands(N, N->getOperand(0), Base.getOperand(0),
- ImmOperand, N->getOperand(3));
+ if (BaseOpIdx == 0) { // Load
+ N = CurDAG->UpdateNodeOperands(N, Base.getOperand(0), ImmOperand,
+ N->getOperand(2));
+ } else { // Store
+ N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), Base.getOperand(0),
+ ImmOperand, N->getOperand(3));
+ }
return true;
}
@@ -2130,6 +2419,8 @@ bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
case RISCV::SUBW:
case RISCV::MULW:
case RISCV::SLLIW:
+ case RISCV::GREVIW:
+ case RISCV::GORCIW:
// Result is already sign extended just remove the sext.w.
// NOTE: We only handle the nodes that are selected with hasAllWUsers.
ReplaceUses(N, N0.getNode());
@@ -2139,8 +2430,113 @@ bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
return false;
}
+// Optimize masked RVV pseudo instructions with a known all-ones mask to their
+// corresponding "unmasked" pseudo versions. The mask we're interested in will
+// take the form of a V0 physical register operand, with a glued
+// register-setting instruction.
+bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(SDNode *N) {
+ const RISCV::RISCVMaskedPseudoInfo *I =
+ RISCV::getMaskedPseudoInfo(N->getMachineOpcode());
+ if (!I)
+ return false;
+
+ unsigned MaskOpIdx = I->MaskOpIdx;
+
+ // Check that we're using V0 as a mask register.
+ if (!isa<RegisterSDNode>(N->getOperand(MaskOpIdx)) ||
+ cast<RegisterSDNode>(N->getOperand(MaskOpIdx))->getReg() != RISCV::V0)
+ return false;
+
+ // The glued user defines V0.
+ const auto *Glued = N->getGluedNode();
+
+ if (!Glued || Glued->getOpcode() != ISD::CopyToReg)
+ return false;
+
+ // Check that we're defining V0 as a mask register.
+ if (!isa<RegisterSDNode>(Glued->getOperand(1)) ||
+ cast<RegisterSDNode>(Glued->getOperand(1))->getReg() != RISCV::V0)
+ return false;
+
+ // Check the instruction defining V0; it needs to be a VMSET pseudo.
+ SDValue MaskSetter = Glued->getOperand(2);
+
+ const auto IsVMSet = [](unsigned Opc) {
+ return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
+ Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
+ Opc == RISCV::PseudoVMSET_M_B4 || Opc == RISCV::PseudoVMSET_M_B64 ||
+ Opc == RISCV::PseudoVMSET_M_B8;
+ };
+
+ // TODO: Check that the VMSET is the expected bitwidth? The pseudo has
+ // undefined behaviour if it's the wrong bitwidth, so we could choose to
+ // assume that it's all-ones? Same applies to its VL.
+ if (!MaskSetter->isMachineOpcode() || !IsVMSet(MaskSetter.getMachineOpcode()))
+ return false;
+
+ // Retrieve the tail policy operand index, if any.
+ Optional<unsigned> TailPolicyOpIdx;
+ const RISCVInstrInfo &TII = *Subtarget->getInstrInfo();
+ const MCInstrDesc &MaskedMCID = TII.get(N->getMachineOpcode());
+
+ bool IsTA = true;
+ if (RISCVII::hasVecPolicyOp(MaskedMCID.TSFlags)) {
+ // The last operand of the pseudo is the policy op, but we might have a
+ // Glue operand last. We might also have a chain.
+ TailPolicyOpIdx = N->getNumOperands() - 1;
+ if (N->getOperand(*TailPolicyOpIdx).getValueType() == MVT::Glue)
+ (*TailPolicyOpIdx)--;
+ if (N->getOperand(*TailPolicyOpIdx).getValueType() == MVT::Other)
+ (*TailPolicyOpIdx)--;
+
+ if (!(N->getConstantOperandVal(*TailPolicyOpIdx) &
+ RISCVII::TAIL_AGNOSTIC)) {
+ // Keep the true-masked instruction when there is no unmasked TU
+ // instruction
+ if (I->UnmaskedTUPseudo == I->MaskedPseudo && !N->getOperand(0).isUndef())
+ return false;
+ // We can't use TA if the tie-operand is not IMPLICIT_DEF
+ if (!N->getOperand(0).isUndef())
+ IsTA = false;
+ }
+ }
+
+ unsigned Opc = IsTA ? I->UnmaskedPseudo : I->UnmaskedTUPseudo;
+
+ // Check that we're dropping the mask operand and any policy operand
+ // when we transform to this unmasked pseudo. Additionally, if this insturtion
+ // is tail agnostic, the unmasked instruction should not have a merge op.
+ uint64_t TSFlags = TII.get(Opc).TSFlags;
+ assert((IsTA != RISCVII::hasMergeOp(TSFlags)) &&
+ RISCVII::hasDummyMaskOp(TSFlags) &&
+ !RISCVII::hasVecPolicyOp(TSFlags) &&
+ "Unexpected pseudo to transform to");
+ (void)TSFlags;
+
+ SmallVector<SDValue, 8> Ops;
+ // Skip the merge operand at index 0 if IsTA
+ for (unsigned I = IsTA, E = N->getNumOperands(); I != E; I++) {
+ // Skip the mask, the policy, and the Glue.
+ SDValue Op = N->getOperand(I);
+ if (I == MaskOpIdx || I == TailPolicyOpIdx ||
+ Op.getValueType() == MVT::Glue)
+ continue;
+ Ops.push_back(Op);
+ }
+
+ // Transitively apply any node glued to our new node.
+ if (auto *TGlued = Glued->getGluedNode())
+ Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1));
+
+ SDNode *Result = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
+ ReplaceUses(N, Result);
+
+ return true;
+}
+
// This pass converts a legalized DAG into a RISCV-specific DAG, ready
// for instruction scheduling.
-FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM) {
- return new RISCVDAGToDAGISel(TM);
+FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM,
+ CodeGenOpt::Level OptLevel) {
+ return new RISCVDAGToDAGISel(TM, OptLevel);
}