aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp')
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp505
1 files changed, 358 insertions, 147 deletions
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index cafce628cf6a..09b3ab96974c 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -22,13 +22,18 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include <optional>
using namespace llvm;
#define DEBUG_TYPE "riscv-isel"
#define PASS_NAME "RISC-V DAG->DAG Pattern Instruction Selection"
+static cl::opt<bool> UsePseudoMovImm(
+ "riscv-use-rematerializable-movimm", cl::Hidden,
+ cl::desc("Use a rematerializable pseudoinstruction for 2 instruction "
+ "constant materialization"),
+ cl::init(false));
+
namespace llvm::RISCV {
#define GET_RISCVVSSEGTable_IMPL
#define GET_RISCVVLSEGTable_IMPL
@@ -61,8 +66,11 @@ void RISCVDAGToDAGISel::PreprocessISelDAG() {
VT.isInteger() ? RISCVISD::VMV_V_X_VL : RISCVISD::VFMV_V_F_VL;
SDLoc DL(N);
SDValue VL = CurDAG->getRegister(RISCV::X0, Subtarget->getXLenVT());
- Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT),
- N->getOperand(0), VL);
+ SDValue Src = N->getOperand(0);
+ if (VT.isInteger())
+ Src = CurDAG->getNode(ISD::ANY_EXTEND, DL, Subtarget->getXLenVT(),
+ N->getOperand(0));
+ Result = CurDAG->getNode(Opc, DL, VT, CurDAG->getUNDEF(VT), Src, VL);
break;
}
case RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL: {
@@ -83,7 +91,7 @@ void RISCVDAGToDAGISel::PreprocessISelDAG() {
// Create temporary stack for each expanding node.
SDValue StackSlot =
- CurDAG->CreateStackTemporary(TypeSize::Fixed(8), Align(4));
+ CurDAG->CreateStackTemporary(TypeSize::getFixed(8), Align(8));
int FI = cast<FrameIndexSDNode>(StackSlot.getNode())->getIndex();
MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
@@ -91,7 +99,7 @@ void RISCVDAGToDAGISel::PreprocessISelDAG() {
Lo = CurDAG->getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
SDValue OffsetSlot =
- CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), DL);
+ CurDAG->getMemBasePlusOffset(StackSlot, TypeSize::getFixed(4), DL);
Hi = CurDAG->getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4),
Align(8));
@@ -142,13 +150,25 @@ void RISCVDAGToDAGISel::PostprocessISelDAG() {
continue;
MadeChange |= doPeepholeSExtW(N);
- MadeChange |= doPeepholeMaskedRVV(N);
+
+ // FIXME: This is here only because the VMerge transform doesn't
+ // know how to handle masked true inputs. Once that has been moved
+ // to post-ISEL, this can be deleted as well.
+ MadeChange |= doPeepholeMaskedRVV(cast<MachineSDNode>(N));
}
CurDAG->setRoot(Dummy.getValue());
MadeChange |= doPeepholeMergeVVMFold();
+ // After we're done with everything else, convert IMPLICIT_DEF
+ // passthru operands to NoRegister. This is required to workaround
+ // an optimization deficiency in MachineCSE. This really should
+ // be merged back into each of the patterns (i.e. there's no good
+ // reason not to go directly to NoReg), but is being done this way
+ // to allow easy backporting.
+ MadeChange |= doPeepholeNoRegPassThru();
+
if (MadeChange)
CurDAG->RemoveDeadNodes();
}
@@ -184,28 +204,32 @@ static SDValue selectImmSeq(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
static SDValue selectImm(SelectionDAG *CurDAG, const SDLoc &DL, const MVT VT,
int64_t Imm, const RISCVSubtarget &Subtarget) {
- RISCVMatInt::InstSeq Seq =
- RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits());
+ RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, Subtarget);
- // See if we can create this constant as (ADD (SLLI X, 32), X) where X is at
+ // Use a rematerializable pseudo instruction for short sequences if enabled.
+ if (Seq.size() == 2 && UsePseudoMovImm)
+ return SDValue(
+ CurDAG->getMachineNode(RISCV::PseudoMovImm, DL, VT,
+ CurDAG->getTargetConstant(Imm, DL, VT)),
+ 0);
+
+ // See if we can create this constant as (ADD (SLLI X, C), X) where X is at
// worst an LUI+ADDIW. This will require an extra register, but avoids a
// constant pool.
+ // If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where
+ // low and high 32 bits are the same and bit 31 and 63 are set.
if (Seq.size() > 3) {
- int64_t LoVal = SignExtend64<32>(Imm);
- int64_t HiVal = SignExtend64<32>(((uint64_t)Imm - (uint64_t)LoVal) >> 32);
- if (LoVal == HiVal) {
- RISCVMatInt::InstSeq SeqLo =
- RISCVMatInt::generateInstSeq(LoVal, Subtarget.getFeatureBits());
- if ((SeqLo.size() + 2) < Seq.size()) {
- SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo);
+ unsigned ShiftAmt, AddOpc;
+ RISCVMatInt::InstSeq SeqLo =
+ RISCVMatInt::generateTwoRegInstSeq(Imm, Subtarget, ShiftAmt, AddOpc);
+ if (!SeqLo.empty() && (SeqLo.size() + 2) < Seq.size()) {
+ SDValue Lo = selectImmSeq(CurDAG, DL, VT, SeqLo);
- SDValue SLLI = SDValue(
- CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo,
- CurDAG->getTargetConstant(32, DL, VT)),
- 0);
- return SDValue(CurDAG->getMachineNode(RISCV::ADD, DL, VT, Lo, SLLI),
- 0);
- }
+ SDValue SLLI = SDValue(
+ CurDAG->getMachineNode(RISCV::SLLI, DL, VT, Lo,
+ CurDAG->getTargetConstant(ShiftAmt, DL, VT)),
+ 0);
+ return SDValue(CurDAG->getMachineNode(AddOpc, DL, VT, Lo, SLLI), 0);
}
}
@@ -552,6 +576,12 @@ void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) {
SDValue VLOperand;
unsigned Opcode = RISCV::PseudoVSETVLI;
+ if (auto *C = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
+ const unsigned VLEN = Subtarget->getRealMinVLen();
+ if (VLEN == Subtarget->getRealMaxVLen())
+ if (VLEN / RISCVVType::getSEWLMULRatio(SEW, VLMul) == C->getZExtValue())
+ VLMax = true;
+ }
if (VLMax || isAllOnesConstant(Node->getOperand(1))) {
VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
Opcode = RISCV::PseudoVSETVLIX0;
@@ -808,7 +838,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
switch (Opcode) {
case ISD::Constant: {
- assert(VT == Subtarget->getXLenVT() && "Unexpected VT");
+ assert((VT == Subtarget->getXLenVT() || VT == MVT::i32) && "Unexpected VT");
auto *ConstNode = cast<ConstantSDNode>(Node);
if (ConstNode->isZero()) {
SDValue New =
@@ -832,26 +862,34 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
}
case ISD::ConstantFP: {
const APFloat &APF = cast<ConstantFPSDNode>(Node)->getValueAPF();
- int FPImm = static_cast<const RISCVTargetLowering *>(TLI)->getLegalZfaFPImm(
- APF, VT);
+ auto [FPImm, NeedsFNeg] =
+ static_cast<const RISCVTargetLowering *>(TLI)->getLegalZfaFPImm(APF,
+ VT);
if (FPImm >= 0) {
unsigned Opc;
+ unsigned FNegOpc;
switch (VT.SimpleTy) {
default:
llvm_unreachable("Unexpected size");
case MVT::f16:
Opc = RISCV::FLI_H;
+ FNegOpc = RISCV::FSGNJN_H;
break;
case MVT::f32:
Opc = RISCV::FLI_S;
+ FNegOpc = RISCV::FSGNJN_S;
break;
case MVT::f64:
Opc = RISCV::FLI_D;
+ FNegOpc = RISCV::FSGNJN_D;
break;
}
-
SDNode *Res = CurDAG->getMachineNode(
Opc, DL, VT, CurDAG->getTargetConstant(FPImm, DL, XLenVT));
+ if (NeedsFNeg)
+ Res = CurDAG->getMachineNode(FNegOpc, DL, VT, SDValue(Res, 0),
+ SDValue(Res, 0));
+
ReplaceNode(Node, Res);
return;
}
@@ -866,10 +904,16 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
Imm = selectImm(CurDAG, DL, XLenVT, APF.bitcastToAPInt().getSExtValue(),
*Subtarget);
+ bool HasZdinx = Subtarget->hasStdExtZdinx();
+ bool Is64Bit = Subtarget->is64Bit();
unsigned Opc;
switch (VT.SimpleTy) {
default:
llvm_unreachable("Unexpected size");
+ case MVT::bf16:
+ assert(Subtarget->hasStdExtZfbfmin());
+ Opc = RISCV::FMV_H_X;
+ break;
case MVT::f16:
Opc =
Subtarget->hasStdExtZhinxOrZhinxmin() ? RISCV::COPY : RISCV::FMV_H_X;
@@ -881,20 +925,29 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
// For RV32, we can't move from a GPR, we need to convert instead. This
// should only happen for +0.0 and -0.0.
assert((Subtarget->is64Bit() || APF.isZero()) && "Unexpected constant");
- bool HasZdinx = Subtarget->hasStdExtZdinx();
- if (Subtarget->is64Bit())
+ if (Is64Bit)
Opc = HasZdinx ? RISCV::COPY : RISCV::FMV_D_X;
else
Opc = HasZdinx ? RISCV::FCVT_D_W_IN32X : RISCV::FCVT_D_W;
break;
}
- SDNode *Res = CurDAG->getMachineNode(Opc, DL, VT, Imm);
+ SDNode *Res;
+ if (Opc == RISCV::FCVT_D_W_IN32X || Opc == RISCV::FCVT_D_W)
+ Res = CurDAG->getMachineNode(
+ Opc, DL, VT, Imm,
+ CurDAG->getTargetConstant(RISCVFPRndMode::RNE, DL, XLenVT));
+ else
+ Res = CurDAG->getMachineNode(Opc, DL, VT, Imm);
// For f64 -0.0, we need to insert a fneg.d idiom.
- if (NegZeroF64)
- Res = CurDAG->getMachineNode(RISCV::FSGNJN_D, DL, VT, SDValue(Res, 0),
- SDValue(Res, 0));
+ if (NegZeroF64) {
+ Opc = RISCV::FSGNJN_D;
+ if (HasZdinx)
+ Opc = Is64Bit ? RISCV::FSGNJN_D_INX : RISCV::FSGNJN_D_IN32X;
+ Res =
+ CurDAG->getMachineNode(Opc, DL, VT, SDValue(Res, 0), SDValue(Res, 0));
+ }
ReplaceNode(Node, Res);
return;
@@ -991,12 +1044,12 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
unsigned TrailingOnes = llvm::countr_one(Mask);
if (ShAmt >= TrailingOnes)
break;
- // If the mask has 32 trailing ones, use SRLIW.
+ // If the mask has 32 trailing ones, use SRLI on RV32 or SRLIW on RV64.
if (TrailingOnes == 32) {
- SDNode *SRLIW =
- CurDAG->getMachineNode(RISCV::SRLIW, DL, VT, N0->getOperand(0),
- CurDAG->getTargetConstant(ShAmt, DL, VT));
- ReplaceNode(Node, SRLIW);
+ SDNode *SRLI = CurDAG->getMachineNode(
+ Subtarget->is64Bit() ? RISCV::SRLIW : RISCV::SRLI, DL, VT,
+ N0->getOperand(0), CurDAG->getTargetConstant(ShAmt, DL, VT));
+ ReplaceNode(Node, SRLI);
return;
}
@@ -2082,8 +2135,9 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad())
break;
- SmallVector<SDValue> Operands =
- {CurDAG->getUNDEF(VT), Ld->getBasePtr()};
+ SmallVector<SDValue> Operands = {
+ SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT), 0),
+ Ld->getBasePtr()};
if (IsStrided)
Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT));
uint64_t Policy = RISCVII::MASK_AGNOSTIC | RISCVII::TAIL_AGNOSTIC;
@@ -2141,12 +2195,13 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
}
bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand(
- const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
+ const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
+ std::vector<SDValue> &OutOps) {
// Always produce a register and immediate operand, as expected by
// RISCVAsmPrinter::PrintAsmMemoryOperand.
switch (ConstraintID) {
- case InlineAsm::Constraint_o:
- case InlineAsm::Constraint_m: {
+ case InlineAsm::ConstraintCode::o:
+ case InlineAsm::ConstraintCode::m: {
SDValue Op0, Op1;
bool Found = SelectAddrRegImm(Op, Op0, Op1);
assert(Found && "SelectAddrRegImm should always succeed");
@@ -2155,7 +2210,7 @@ bool RISCVDAGToDAGISel::SelectInlineAsmMemoryOperand(
OutOps.push_back(Op1);
return false;
}
- case InlineAsm::Constraint_A:
+ case InlineAsm::ConstraintCode::A:
OutOps.push_back(Op);
OutOps.push_back(
CurDAG->getTargetConstant(0, SDLoc(Op), Subtarget->getXLenVT()));
@@ -2205,7 +2260,8 @@ bool RISCVDAGToDAGISel::SelectFrameAddrRegImm(SDValue Addr, SDValue &Base,
// Fold constant addresses.
static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
const MVT VT, const RISCVSubtarget *Subtarget,
- SDValue Addr, SDValue &Base, SDValue &Offset) {
+ SDValue Addr, SDValue &Base, SDValue &Offset,
+ bool IsPrefetch = false) {
if (!isa<ConstantSDNode>(Addr))
return false;
@@ -2217,6 +2273,9 @@ static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
int64_t Lo12 = SignExtend64<12>(CVal);
int64_t Hi = (uint64_t)CVal - (uint64_t)Lo12;
if (!Subtarget->is64Bit() || isInt<32>(Hi)) {
+ if (IsPrefetch && (Lo12 & 0b11111) != 0)
+ return false;
+
if (Hi) {
int64_t Hi20 = (Hi >> 12) & 0xfffff;
Base = SDValue(
@@ -2231,14 +2290,15 @@ static bool selectConstantAddr(SelectionDAG *CurDAG, const SDLoc &DL,
}
// Ask how constant materialization would handle this constant.
- RISCVMatInt::InstSeq Seq =
- RISCVMatInt::generateInstSeq(CVal, Subtarget->getFeatureBits());
+ RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(CVal, *Subtarget);
// If the last instruction would be an ADDI, we can fold its immediate and
// emit the rest of the sequence as the base.
if (Seq.back().getOpcode() != RISCV::ADDI)
return false;
Lo12 = Seq.back().getImm();
+ if (IsPrefetch && (Lo12 & 0b11111) != 0)
+ return false;
// Drop the last instruction.
Seq.pop_back();
@@ -2419,14 +2479,85 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
return true;
}
+/// Similar to SelectAddrRegImm, except that the least significant 5 bits of
+/// Offset shoule be all zeros.
+bool RISCVDAGToDAGISel::SelectAddrRegImmLsb00000(SDValue Addr, SDValue &Base,
+ SDValue &Offset) {
+ if (SelectAddrFrameIndex(Addr, Base, Offset))
+ return true;
+
+ SDLoc DL(Addr);
+ MVT VT = Addr.getSimpleValueType();
+
+ if (CurDAG->isBaseWithConstantOffset(Addr)) {
+ int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
+ if (isInt<12>(CVal)) {
+ Base = Addr.getOperand(0);
+
+ // Early-out if not a valid offset.
+ if ((CVal & 0b11111) != 0) {
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, DL, VT);
+ return true;
+ }
+
+ if (auto *FIN = dyn_cast<FrameIndexSDNode>(Base))
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), VT);
+ Offset = CurDAG->getTargetConstant(CVal, DL, VT);
+ return true;
+ }
+ }
+
+ // Handle ADD with large immediates.
+ if (Addr.getOpcode() == ISD::ADD && isa<ConstantSDNode>(Addr.getOperand(1))) {
+ int64_t CVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
+ assert(!(isInt<12>(CVal) && isInt<12>(CVal)) &&
+ "simm12 not already handled?");
+
+ // Handle immediates in the range [-4096,-2049] or [2017, 4065]. We can save
+ // one instruction by folding adjustment (-2048 or 2016) into the address.
+ if ((-2049 >= CVal && CVal >= -4096) || (4065 >= CVal && CVal >= 2017)) {
+ int64_t Adj = CVal < 0 ? -2048 : 2016;
+ int64_t AdjustedOffset = CVal - Adj;
+ Base = SDValue(CurDAG->getMachineNode(
+ RISCV::ADDI, DL, VT, Addr.getOperand(0),
+ CurDAG->getTargetConstant(AdjustedOffset, DL, VT)),
+ 0);
+ Offset = CurDAG->getTargetConstant(Adj, DL, VT);
+ return true;
+ }
+
+ if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr.getOperand(1), Base,
+ Offset, true)) {
+ // Insert an ADD instruction with the materialized Hi52 bits.
+ Base = SDValue(
+ CurDAG->getMachineNode(RISCV::ADD, DL, VT, Addr.getOperand(0), Base),
+ 0);
+ return true;
+ }
+ }
+
+ if (selectConstantAddr(CurDAG, DL, VT, Subtarget, Addr, Base, Offset, true))
+ return true;
+
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, DL, VT);
+ return true;
+}
+
bool RISCVDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,
SDValue &ShAmt) {
ShAmt = N;
+ // Peek through zext.
+ if (ShAmt->getOpcode() == ISD::ZERO_EXTEND)
+ ShAmt = ShAmt.getOperand(0);
+
// Shift instructions on RISC-V only read the lower 5 or 6 bits of the shift
// amount. If there is an AND on the shift amount, we can bypass it if it
// doesn't affect any of those bits.
- if (ShAmt.getOpcode() == ISD::AND && isa<ConstantSDNode>(ShAmt.getOperand(1))) {
+ if (ShAmt.getOpcode() == ISD::AND &&
+ isa<ConstantSDNode>(ShAmt.getOperand(1))) {
const APInt &AndMask = ShAmt.getConstantOperandAPInt(1);
// Since the max shift amount is a power of 2 we can subtract 1 to make a
@@ -2729,6 +2860,36 @@ bool RISCVDAGToDAGISel::selectSHXADD_UWOp(SDValue N, unsigned ShAmt,
return false;
}
+static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo,
+ unsigned Bits,
+ const TargetInstrInfo *TII) {
+ unsigned MCOpcode = RISCV::getRVVMCOpcode(User->getMachineOpcode());
+
+ if (!MCOpcode)
+ return false;
+
+ const MCInstrDesc &MCID = TII->get(User->getMachineOpcode());
+ const uint64_t TSFlags = MCID.TSFlags;
+ if (!RISCVII::hasSEWOp(TSFlags))
+ return false;
+ assert(RISCVII::hasVLOp(TSFlags));
+
+ bool HasGlueOp = User->getGluedNode() != nullptr;
+ unsigned ChainOpIdx = User->getNumOperands() - HasGlueOp - 1;
+ bool HasChainOp = User->getOperand(ChainOpIdx).getValueType() == MVT::Other;
+ bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
+ unsigned VLIdx =
+ User->getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
+ const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1);
+
+ if (UserOpNo == VLIdx)
+ return false;
+
+ auto NumDemandedBits =
+ RISCV::getVectorLowDemandedScalarBits(MCOpcode, Log2SEW);
+ return NumDemandedBits && Bits >= *NumDemandedBits;
+}
+
// Return true if all users of this SDNode* only consume the lower \p Bits.
// This can be used to form W instructions for add/sub/mul/shl even when the
// root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
@@ -2751,6 +2912,11 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits,
if (Depth >= SelectionDAG::MaxRecursionDepth)
return false;
+ // The PatFrags that call this may run before RISCVGenDAGISel.inc has checked
+ // the VT. Ensure the type is scalar to avoid wasting time on vectors.
+ if (Depth == 0 && !Node->getValueType(0).isScalarInteger())
+ return false;
+
for (auto UI = Node->use_begin(), UE = Node->use_end(); UI != UE; ++UI) {
SDNode *User = *UI;
// Users of this node should have already been instruction selected
@@ -2760,6 +2926,8 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits,
// TODO: Add more opcodes?
switch (User->getMachineOpcode()) {
default:
+ if (vectorPseudoHasAllNBitUsers(User, UI.getOperandNo(), Bits, TII))
+ break;
return false;
case RISCV::ADDW:
case RISCV::ADDIW:
@@ -2937,27 +3105,41 @@ bool RISCVDAGToDAGISel::selectVLOp(SDValue N, SDValue &VL) {
return true;
}
+static SDValue findVSplat(SDValue N) {
+ if (N.getOpcode() == ISD::INSERT_SUBVECTOR) {
+ if (!N.getOperand(0).isUndef())
+ return SDValue();
+ N = N.getOperand(1);
+ }
+ SDValue Splat = N;
+ if ((Splat.getOpcode() != RISCVISD::VMV_V_X_VL &&
+ Splat.getOpcode() != RISCVISD::VMV_S_X_VL) ||
+ !Splat.getOperand(0).isUndef())
+ return SDValue();
+ assert(Splat.getNumOperands() == 3 && "Unexpected number of operands");
+ return Splat;
+}
+
bool RISCVDAGToDAGISel::selectVSplat(SDValue N, SDValue &SplatVal) {
- if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef())
+ SDValue Splat = findVSplat(N);
+ if (!Splat)
return false;
- assert(N.getNumOperands() == 3 && "Unexpected number of operands");
- SplatVal = N.getOperand(1);
+
+ SplatVal = Splat.getOperand(1);
return true;
}
-using ValidateFn = bool (*)(int64_t);
-
-static bool selectVSplatSimmHelper(SDValue N, SDValue &SplatVal,
- SelectionDAG &DAG,
- const RISCVSubtarget &Subtarget,
- ValidateFn ValidateImm) {
- if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() ||
- !isa<ConstantSDNode>(N.getOperand(1)))
+static bool selectVSplatImmHelper(SDValue N, SDValue &SplatVal,
+ SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget,
+ std::function<bool(int64_t)> ValidateImm) {
+ SDValue Splat = findVSplat(N);
+ if (!Splat || !isa<ConstantSDNode>(Splat.getOperand(1)))
return false;
- assert(N.getNumOperands() == 3 && "Unexpected number of operands");
- int64_t SplatImm =
- cast<ConstantSDNode>(N.getOperand(1))->getSExtValue();
+ const unsigned SplatEltSize = Splat.getScalarValueSizeInBits();
+ assert(Subtarget.getXLenVT() == Splat.getOperand(1).getSimpleValueType() &&
+ "Unexpected splat operand type");
// The semantics of RISCVISD::VMV_V_X_VL is that when the operand
// type is wider than the resulting vector element type: an implicit
@@ -2966,34 +3148,31 @@ static bool selectVSplatSimmHelper(SDValue N, SDValue &SplatVal,
// any zero-extended immediate.
// For example, we wish to match (i8 -1) -> (XLenVT 255) as a simm5 by first
// sign-extending to (XLenVT -1).
- MVT XLenVT = Subtarget.getXLenVT();
- assert(XLenVT == N.getOperand(1).getSimpleValueType() &&
- "Unexpected splat operand type");
- MVT EltVT = N.getSimpleValueType().getVectorElementType();
- if (EltVT.bitsLT(XLenVT))
- SplatImm = SignExtend64(SplatImm, EltVT.getSizeInBits());
+ APInt SplatConst = Splat.getConstantOperandAPInt(1).sextOrTrunc(SplatEltSize);
+
+ int64_t SplatImm = SplatConst.getSExtValue();
if (!ValidateImm(SplatImm))
return false;
- SplatVal = DAG.getTargetConstant(SplatImm, SDLoc(N), XLenVT);
+ SplatVal = DAG.getTargetConstant(SplatImm, SDLoc(N), Subtarget.getXLenVT());
return true;
}
bool RISCVDAGToDAGISel::selectVSplatSimm5(SDValue N, SDValue &SplatVal) {
- return selectVSplatSimmHelper(N, SplatVal, *CurDAG, *Subtarget,
- [](int64_t Imm) { return isInt<5>(Imm); });
+ return selectVSplatImmHelper(N, SplatVal, *CurDAG, *Subtarget,
+ [](int64_t Imm) { return isInt<5>(Imm); });
}
bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal) {
- return selectVSplatSimmHelper(
+ return selectVSplatImmHelper(
N, SplatVal, *CurDAG, *Subtarget,
[](int64_t Imm) { return (isInt<5>(Imm) && Imm != -16) || Imm == 16; });
}
bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N,
SDValue &SplatVal) {
- return selectVSplatSimmHelper(
+ return selectVSplatImmHelper(
N, SplatVal, *CurDAG, *Subtarget, [](int64_t Imm) {
return Imm != 0 && ((isInt<5>(Imm) && Imm != -16) || Imm == 16);
});
@@ -3001,29 +3180,34 @@ bool RISCVDAGToDAGISel::selectVSplatSimm5Plus1NonZero(SDValue N,
bool RISCVDAGToDAGISel::selectVSplatUimm(SDValue N, unsigned Bits,
SDValue &SplatVal) {
- if (N.getOpcode() != RISCVISD::VMV_V_X_VL || !N.getOperand(0).isUndef() ||
- !isa<ConstantSDNode>(N.getOperand(1)))
- return false;
-
- int64_t SplatImm =
- cast<ConstantSDNode>(N.getOperand(1))->getSExtValue();
-
- if (!isUIntN(Bits, SplatImm))
- return false;
-
- SplatVal =
- CurDAG->getTargetConstant(SplatImm, SDLoc(N), Subtarget->getXLenVT());
-
- return true;
+ return selectVSplatImmHelper(
+ N, SplatVal, *CurDAG, *Subtarget,
+ [Bits](int64_t Imm) { return isUIntN(Bits, Imm); });
}
-bool RISCVDAGToDAGISel::selectExtOneUseVSplat(SDValue N, SDValue &SplatVal) {
- if (N->getOpcode() == ISD::SIGN_EXTEND ||
- N->getOpcode() == ISD::ZERO_EXTEND) {
- if (!N.hasOneUse())
+bool RISCVDAGToDAGISel::selectLow8BitsVSplat(SDValue N, SDValue &SplatVal) {
+ // Truncates are custom lowered during legalization.
+ auto IsTrunc = [this](SDValue N) {
+ if (N->getOpcode() != RISCVISD::TRUNCATE_VECTOR_VL)
+ return false;
+ SDValue VL;
+ selectVLOp(N->getOperand(2), VL);
+ // Any vmset_vl is ok, since any bits past VL are undefined and we can
+ // assume they are set.
+ return N->getOperand(1).getOpcode() == RISCVISD::VMSET_VL &&
+ isa<ConstantSDNode>(VL) &&
+ cast<ConstantSDNode>(VL)->getSExtValue() == RISCV::VLMaxSentinel;
+ };
+
+ // We can have multiple nested truncates, so unravel them all if needed.
+ while (N->getOpcode() == ISD::SIGN_EXTEND ||
+ N->getOpcode() == ISD::ZERO_EXTEND || IsTrunc(N)) {
+ if (!N.hasOneUse() ||
+ N.getValueType().getSizeInBits().getKnownMinValue() < 8)
return false;
N = N->getOperand(0);
}
+
return selectVSplat(N, SplatVal);
}
@@ -3038,8 +3222,12 @@ bool RISCVDAGToDAGISel::selectFPImm(SDValue N, SDValue &Imm) {
MVT VT = CFP->getSimpleValueType(0);
- if (static_cast<const RISCVTargetLowering *>(TLI)->getLegalZfaFPImm(APF,
- VT) >= 0)
+ // Even if this FPImm requires an additional FNEG (i.e. the second element of
+ // the returned pair is true) we still prefer FLI + FNEG over immediate
+ // materialization as the latter might generate a longer instruction sequence.
+ if (static_cast<const RISCVTargetLowering *>(TLI)
+ ->getLegalZfaFPImm(APF, VT)
+ .first >= 0)
return false;
MVT XLenVT = Subtarget->getXLenVT();
@@ -3125,6 +3313,9 @@ bool RISCVDAGToDAGISel::doPeepholeSExtW(SDNode *N) {
case RISCV::TH_MULAH:
case RISCV::TH_MULSW:
case RISCV::TH_MULSH:
+ if (N0.getValueType() == MVT::i32)
+ break;
+
// Result is already sign extended just remove the sext.w.
// NOTE: We only handle the nodes that are selected with hasAllWUsers.
ReplaceUses(N, N0.getNode());
@@ -3154,6 +3345,12 @@ static bool usesAllOnesMask(SDValue MaskOp, SDValue GlueOp) {
// Check the instruction defining V0; it needs to be a VMSET pseudo.
SDValue MaskSetter = Glued->getOperand(2);
+ // Sometimes the VMSET is wrapped in a COPY_TO_REGCLASS, e.g. if the mask came
+ // from an extract_subvector or insert_subvector.
+ if (MaskSetter->isMachineOpcode() &&
+ MaskSetter->getMachineOpcode() == RISCV::COPY_TO_REGCLASS)
+ MaskSetter = MaskSetter->getOperand(0);
+
const auto IsVMSet = [](unsigned Opc) {
return Opc == RISCV::PseudoVMSET_M_B1 || Opc == RISCV::PseudoVMSET_M_B16 ||
Opc == RISCV::PseudoVMSET_M_B2 || Opc == RISCV::PseudoVMSET_M_B32 ||
@@ -3183,7 +3380,7 @@ static bool isImplicitDef(SDValue V) {
// corresponding "unmasked" pseudo versions. The mask we're interested in will
// take the form of a V0 physical register operand, with a glued
// register-setting instruction.
-bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(SDNode *N) {
+bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(MachineSDNode *N) {
const RISCV::RISCVMaskedPseudoInfo *I =
RISCV::getMaskedPseudoInfo(N->getMachineOpcode());
if (!I)
@@ -3222,7 +3419,12 @@ bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(SDNode *N) {
if (auto *TGlued = Glued->getGluedNode())
Ops.push_back(SDValue(TGlued, TGlued->getNumValues() - 1));
- SDNode *Result = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
+ MachineSDNode *Result =
+ CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
+
+ if (!N->memoperands_empty())
+ CurDAG->setNodeMemRefs(Result, N->memoperands());
+
Result->setFlags(N->getFlags());
ReplaceUses(N, Result);
@@ -3230,21 +3432,11 @@ bool RISCVDAGToDAGISel::doPeepholeMaskedRVV(SDNode *N) {
}
static bool IsVMerge(SDNode *N) {
- unsigned Opc = N->getMachineOpcode();
- return Opc == RISCV::PseudoVMERGE_VVM_MF8 ||
- Opc == RISCV::PseudoVMERGE_VVM_MF4 ||
- Opc == RISCV::PseudoVMERGE_VVM_MF2 ||
- Opc == RISCV::PseudoVMERGE_VVM_M1 ||
- Opc == RISCV::PseudoVMERGE_VVM_M2 ||
- Opc == RISCV::PseudoVMERGE_VVM_M4 || Opc == RISCV::PseudoVMERGE_VVM_M8;
+ return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMERGE_VVM;
}
static bool IsVMv(SDNode *N) {
- unsigned Opc = N->getMachineOpcode();
- return Opc == RISCV::PseudoVMV_V_V_MF8 || Opc == RISCV::PseudoVMV_V_V_MF4 ||
- Opc == RISCV::PseudoVMV_V_V_MF2 || Opc == RISCV::PseudoVMV_V_V_M1 ||
- Opc == RISCV::PseudoVMV_V_V_M2 || Opc == RISCV::PseudoVMV_V_V_M4 ||
- Opc == RISCV::PseudoVMV_V_V_M8;
+ return RISCV::getRVVMCOpcode(N->getMachineOpcode()) == RISCV::VMV_V_V;
}
static unsigned GetVMSetForLMul(RISCVII::VLMUL LMUL) {
@@ -3336,6 +3528,11 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
if (!Info)
return false;
+ // When Mask is not a true mask, this transformation is illegal for some
+ // operations whose results are affected by mask, like viota.m.
+ if (Info->MaskAffectsResult && Mask && !usesAllOnesMask(Mask, Glue))
+ return false;
+
if (HasTiedDest && !isImplicitDef(True->getOperand(0))) {
// The vmerge instruction must be TU.
// FIXME: This could be relaxed, but we need to handle the policy for the
@@ -3414,6 +3611,7 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
// Because N and True must have the same merge operand (or True's operand is
// implicit_def), the "effective" body is the minimum of their VLs.
+ SDValue OrigVL = VL;
VL = GetMinVL(TrueVL, VL);
if (!VL)
return false;
@@ -3461,7 +3659,17 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
"Expected instructions with mask have a tied dest.");
#endif
- uint64_t Policy = isImplicitDef(Merge) ? RISCVII::TAIL_AGNOSTIC : /*TUMU*/ 0;
+ // Use a tumu policy, relaxing it to tail agnostic provided that the merge
+ // operand is undefined.
+ //
+ // However, if the VL became smaller than what the vmerge had originally, then
+ // elements past VL that were previously in the vmerge's body will have moved
+ // to the tail. In that case we always need to use tail undisturbed to
+ // preserve them.
+ bool MergeVLShrunk = VL != OrigVL;
+ uint64_t Policy = (isImplicitDef(Merge) && !MergeVLShrunk)
+ ? RISCVII::TAIL_AGNOSTIC
+ : /*TUMU*/ 0;
SDValue PolicyOp =
CurDAG->getTargetConstant(Policy, DL, Subtarget->getXLenVT());
@@ -3492,10 +3700,13 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
// Add the glue for the CopyToReg of mask->v0.
Ops.push_back(Glue);
- SDNode *Result =
+ MachineSDNode *Result =
CurDAG->getMachineNode(MaskedOpc, DL, True->getVTList(), Ops);
Result->setFlags(True->getFlags());
+ if (!cast<MachineSDNode>(True)->memoperands_empty())
+ CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(True)->memoperands());
+
// Replace vmerge.vvm node by Result.
ReplaceUses(SDValue(N, 0), SDValue(Result, 0));
@@ -3503,46 +3714,30 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
for (unsigned Idx = 1; Idx < True->getNumValues(); ++Idx)
ReplaceUses(True.getValue(Idx), SDValue(Result, Idx));
- // Try to transform Result to unmasked intrinsic.
- doPeepholeMaskedRVV(Result);
return true;
}
-// Transform (VMERGE_VVM_<LMUL> false, false, true, allones, vl, sew) to
-// (VMV_V_V_<LMUL> false, true, vl, sew). It may decrease uses of VMSET.
-bool RISCVDAGToDAGISel::performVMergeToVMv(SDNode *N) {
-#define CASE_VMERGE_TO_VMV(lmul) \
- case RISCV::PseudoVMERGE_VVM_##lmul: \
- NewOpc = RISCV::PseudoVMV_V_V_##lmul; \
- break;
- unsigned NewOpc;
- switch (N->getMachineOpcode()) {
- default:
- llvm_unreachable("Expected VMERGE_VVM_<LMUL> instruction.");
- CASE_VMERGE_TO_VMV(MF8)
- CASE_VMERGE_TO_VMV(MF4)
- CASE_VMERGE_TO_VMV(MF2)
- CASE_VMERGE_TO_VMV(M1)
- CASE_VMERGE_TO_VMV(M2)
- CASE_VMERGE_TO_VMV(M4)
- CASE_VMERGE_TO_VMV(M8)
- }
+bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() {
+ bool MadeChange = false;
+ SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
- if (!usesAllOnesMask(N, /* MaskOpIdx */ 3))
- return false;
+ while (Position != CurDAG->allnodes_begin()) {
+ SDNode *N = &*--Position;
+ if (N->use_empty() || !N->isMachineOpcode())
+ continue;
- SDLoc DL(N);
- SDValue PolicyOp =
- CurDAG->getTargetConstant(/*TUMU*/ 0, DL, Subtarget->getXLenVT());
- SDNode *Result = CurDAG->getMachineNode(
- NewOpc, DL, N->getValueType(0),
- {N->getOperand(1), N->getOperand(2), N->getOperand(4), N->getOperand(5),
- PolicyOp});
- ReplaceUses(N, Result);
- return true;
+ if (IsVMerge(N) || IsVMv(N))
+ MadeChange |= performCombineVMergeAndVOps(N);
+ }
+ return MadeChange;
}
-bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() {
+/// If our passthru is an implicit_def, use noreg instead. This side
+/// steps issues with MachineCSE not being able to CSE expressions with
+/// IMPLICIT_DEF operands while preserving the semantic intent. See
+/// pr64282 for context. Note that this transform is the last one
+/// performed at ISEL DAG to DAG.
+bool RISCVDAGToDAGISel::doPeepholeNoRegPassThru() {
bool MadeChange = false;
SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
@@ -3551,18 +3746,34 @@ bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() {
if (N->use_empty() || !N->isMachineOpcode())
continue;
- if (IsVMerge(N) || IsVMv(N))
- MadeChange |= performCombineVMergeAndVOps(N);
- if (IsVMerge(N) && N->getOperand(0) == N->getOperand(1))
- MadeChange |= performVMergeToVMv(N);
+ const unsigned Opc = N->getMachineOpcode();
+ if (!RISCVVPseudosTable::getPseudoInfo(Opc) ||
+ !RISCVII::isFirstDefTiedToFirstUse(TII->get(Opc)) ||
+ !isImplicitDef(N->getOperand(0)))
+ continue;
+
+ SmallVector<SDValue> Ops;
+ Ops.push_back(CurDAG->getRegister(RISCV::NoRegister, N->getValueType(0)));
+ for (unsigned I = 1, E = N->getNumOperands(); I != E; I++) {
+ SDValue Op = N->getOperand(I);
+ Ops.push_back(Op);
+ }
+
+ MachineSDNode *Result =
+ CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
+ Result->setFlags(N->getFlags());
+ CurDAG->setNodeMemRefs(Result, cast<MachineSDNode>(N)->memoperands());
+ ReplaceUses(N, Result);
+ MadeChange = true;
}
return MadeChange;
}
+
// This pass converts a legalized DAG into a RISCV-specific DAG, ready
// for instruction scheduling.
FunctionPass *llvm::createRISCVISelDag(RISCVTargetMachine &TM,
- CodeGenOpt::Level OptLevel) {
+ CodeGenOptLevel OptLevel) {
return new RISCVDAGToDAGISel(TM, OptLevel);
}