aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2022-07-03 14:10:23 +0000
committerDimitry Andric <dim@FreeBSD.org>2022-07-03 14:10:23 +0000
commit145449b1e420787bb99721a429341fa6be3adfb6 (patch)
tree1d56ae694a6de602e348dd80165cf881a36600ed /llvm/lib/Target/RISCV/RISCVISelLowering.cpp
parentecbca9f5fb7d7613d2b94982c4825eb0d33d6842 (diff)
downloadsrc-145449b1e420787bb99721a429341fa6be3adfb6.tar.gz
src-145449b1e420787bb99721a429341fa6be3adfb6.zip
Vendor import of llvm-project main llvmorg-15-init-15358-g53dc0f107877.vendor/llvm-project/llvmorg-15-init-15358-g53dc0f107877
Diffstat (limited to 'llvm/lib/Target/RISCV/RISCVISelLowering.cpp')
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp4004
1 files changed, 2692 insertions, 1312 deletions
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 97d24c8e9c0b..ff645dea4e7a 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -112,17 +112,24 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
if (Subtarget.hasVInstructions()) {
auto addRegClassForRVV = [this](MVT VT) {
+ // Disable the smallest fractional LMUL types if ELEN is less than
+ // RVVBitsPerBlock.
+ unsigned MinElts = RISCV::RVVBitsPerBlock / Subtarget.getELEN();
+ if (VT.getVectorMinNumElements() < MinElts)
+ return;
+
unsigned Size = VT.getSizeInBits().getKnownMinValue();
- assert(Size <= 512 && isPowerOf2_32(Size));
const TargetRegisterClass *RC;
- if (Size <= 64)
+ if (Size <= RISCV::RVVBitsPerBlock)
RC = &RISCV::VRRegClass;
- else if (Size == 128)
+ else if (Size == 2 * RISCV::RVVBitsPerBlock)
RC = &RISCV::VRM2RegClass;
- else if (Size == 256)
+ else if (Size == 4 * RISCV::RVVBitsPerBlock)
RC = &RISCV::VRM4RegClass;
- else
+ else if (Size == 8 * RISCV::RVVBitsPerBlock)
RC = &RISCV::VRM8RegClass;
+ else
+ llvm_unreachable("Unexpected size");
addRegisterClass(VT, RC);
};
@@ -170,8 +177,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setStackPointerRegisterToSaveRestore(RISCV::X2);
- for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD})
- setLoadExtAction(N, XLenVT, MVT::i1, Promote);
+ setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, XLenVT,
+ MVT::i1, Promote);
// TODO: add all necessary setOperationAction calls.
setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand);
@@ -181,100 +188,75 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
- setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
- setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+ setOperationAction({ISD::STACKSAVE, ISD::STACKRESTORE}, MVT::Other, Expand);
setOperationAction(ISD::VASTART, MVT::Other, Custom);
- setOperationAction(ISD::VAARG, MVT::Other, Expand);
- setOperationAction(ISD::VACOPY, MVT::Other, Expand);
- setOperationAction(ISD::VAEND, MVT::Other, Expand);
+ setOperationAction({ISD::VAARG, ISD::VACOPY, ISD::VAEND}, MVT::Other, Expand);
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
- if (!Subtarget.hasStdExtZbb()) {
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
- }
+
+ setOperationAction(ISD::EH_DWARF_CFA, MVT::i32, Custom);
+
+ if (!Subtarget.hasStdExtZbb())
+ setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::i8, MVT::i16}, Expand);
if (Subtarget.is64Bit()) {
- setOperationAction(ISD::ADD, MVT::i32, Custom);
- setOperationAction(ISD::SUB, MVT::i32, Custom);
- setOperationAction(ISD::SHL, MVT::i32, Custom);
- setOperationAction(ISD::SRA, MVT::i32, Custom);
- setOperationAction(ISD::SRL, MVT::i32, Custom);
-
- setOperationAction(ISD::UADDO, MVT::i32, Custom);
- setOperationAction(ISD::USUBO, MVT::i32, Custom);
- setOperationAction(ISD::UADDSAT, MVT::i32, Custom);
- setOperationAction(ISD::USUBSAT, MVT::i32, Custom);
+ setOperationAction(ISD::EH_DWARF_CFA, MVT::i64, Custom);
+
+ setOperationAction({ISD::ADD, ISD::SUB, ISD::SHL, ISD::SRA, ISD::SRL},
+ MVT::i32, Custom);
+
+ setOperationAction({ISD::UADDO, ISD::USUBO, ISD::UADDSAT, ISD::USUBSAT},
+ MVT::i32, Custom);
} else {
- setLibcallName(RTLIB::SHL_I128, nullptr);
- setLibcallName(RTLIB::SRL_I128, nullptr);
- setLibcallName(RTLIB::SRA_I128, nullptr);
- setLibcallName(RTLIB::MUL_I128, nullptr);
+ setLibcallName(
+ {RTLIB::SHL_I128, RTLIB::SRL_I128, RTLIB::SRA_I128, RTLIB::MUL_I128},
+ nullptr);
setLibcallName(RTLIB::MULO_I64, nullptr);
}
if (!Subtarget.hasStdExtM()) {
- setOperationAction(ISD::MUL, XLenVT, Expand);
- setOperationAction(ISD::MULHS, XLenVT, Expand);
- setOperationAction(ISD::MULHU, XLenVT, Expand);
- setOperationAction(ISD::SDIV, XLenVT, Expand);
- setOperationAction(ISD::UDIV, XLenVT, Expand);
- setOperationAction(ISD::SREM, XLenVT, Expand);
- setOperationAction(ISD::UREM, XLenVT, Expand);
+ setOperationAction({ISD::MUL, ISD::MULHS, ISD::MULHU, ISD::SDIV, ISD::UDIV,
+ ISD::SREM, ISD::UREM},
+ XLenVT, Expand);
} else {
if (Subtarget.is64Bit()) {
- setOperationAction(ISD::MUL, MVT::i32, Custom);
- setOperationAction(ISD::MUL, MVT::i128, Custom);
-
- setOperationAction(ISD::SDIV, MVT::i8, Custom);
- setOperationAction(ISD::UDIV, MVT::i8, Custom);
- setOperationAction(ISD::UREM, MVT::i8, Custom);
- setOperationAction(ISD::SDIV, MVT::i16, Custom);
- setOperationAction(ISD::UDIV, MVT::i16, Custom);
- setOperationAction(ISD::UREM, MVT::i16, Custom);
- setOperationAction(ISD::SDIV, MVT::i32, Custom);
- setOperationAction(ISD::UDIV, MVT::i32, Custom);
- setOperationAction(ISD::UREM, MVT::i32, Custom);
+ setOperationAction(ISD::MUL, {MVT::i32, MVT::i128}, Custom);
+
+ setOperationAction({ISD::SDIV, ISD::UDIV, ISD::UREM},
+ {MVT::i8, MVT::i16, MVT::i32}, Custom);
} else {
setOperationAction(ISD::MUL, MVT::i64, Custom);
}
}
- setOperationAction(ISD::SDIVREM, XLenVT, Expand);
- setOperationAction(ISD::UDIVREM, XLenVT, Expand);
- setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand);
- setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand);
+ setOperationAction(
+ {ISD::SDIVREM, ISD::UDIVREM, ISD::SMUL_LOHI, ISD::UMUL_LOHI}, XLenVT,
+ Expand);
- setOperationAction(ISD::SHL_PARTS, XLenVT, Custom);
- setOperationAction(ISD::SRL_PARTS, XLenVT, Custom);
- setOperationAction(ISD::SRA_PARTS, XLenVT, Custom);
+ setOperationAction({ISD::SHL_PARTS, ISD::SRL_PARTS, ISD::SRA_PARTS}, XLenVT,
+ Custom);
if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp() ||
Subtarget.hasStdExtZbkb()) {
- if (Subtarget.is64Bit()) {
- setOperationAction(ISD::ROTL, MVT::i32, Custom);
- setOperationAction(ISD::ROTR, MVT::i32, Custom);
- }
+ if (Subtarget.is64Bit())
+ setOperationAction({ISD::ROTL, ISD::ROTR}, MVT::i32, Custom);
} else {
- setOperationAction(ISD::ROTL, XLenVT, Expand);
- setOperationAction(ISD::ROTR, XLenVT, Expand);
+ setOperationAction({ISD::ROTL, ISD::ROTR}, XLenVT, Expand);
}
if (Subtarget.hasStdExtZbp()) {
// Custom lower bswap/bitreverse so we can convert them to GREVI to enable
// more combining.
- setOperationAction(ISD::BITREVERSE, XLenVT, Custom);
- setOperationAction(ISD::BSWAP, XLenVT, Custom);
- setOperationAction(ISD::BITREVERSE, MVT::i8, Custom);
+ setOperationAction({ISD::BITREVERSE, ISD::BSWAP}, XLenVT, Custom);
+
// BSWAP i8 doesn't exist.
- setOperationAction(ISD::BITREVERSE, MVT::i16, Custom);
- setOperationAction(ISD::BSWAP, MVT::i16, Custom);
+ setOperationAction(ISD::BITREVERSE, MVT::i8, Custom);
- if (Subtarget.is64Bit()) {
- setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
- setOperationAction(ISD::BSWAP, MVT::i32, Custom);
- }
+ setOperationAction({ISD::BITREVERSE, ISD::BSWAP}, MVT::i16, Custom);
+
+ if (Subtarget.is64Bit())
+ setOperationAction({ISD::BITREVERSE, ISD::BSWAP}, MVT::i32, Custom);
} else {
// With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
// pattern match it directly in isel.
@@ -288,36 +270,38 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
if (Subtarget.hasStdExtZbb()) {
- setOperationAction(ISD::SMIN, XLenVT, Legal);
- setOperationAction(ISD::SMAX, XLenVT, Legal);
- setOperationAction(ISD::UMIN, XLenVT, Legal);
- setOperationAction(ISD::UMAX, XLenVT, Legal);
+ setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, XLenVT,
+ Legal);
- if (Subtarget.is64Bit()) {
- setOperationAction(ISD::CTTZ, MVT::i32, Custom);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom);
- setOperationAction(ISD::CTLZ, MVT::i32, Custom);
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom);
- }
+ if (Subtarget.is64Bit())
+ setOperationAction(
+ {ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF},
+ MVT::i32, Custom);
} else {
- setOperationAction(ISD::CTTZ, XLenVT, Expand);
- setOperationAction(ISD::CTLZ, XLenVT, Expand);
- setOperationAction(ISD::CTPOP, XLenVT, Expand);
+ setOperationAction({ISD::CTTZ, ISD::CTLZ, ISD::CTPOP}, XLenVT, Expand);
+
+ if (Subtarget.is64Bit())
+ setOperationAction(ISD::ABS, MVT::i32, Custom);
}
if (Subtarget.hasStdExtZbt()) {
- setOperationAction(ISD::FSHL, XLenVT, Custom);
- setOperationAction(ISD::FSHR, XLenVT, Custom);
+ setOperationAction({ISD::FSHL, ISD::FSHR}, XLenVT, Custom);
setOperationAction(ISD::SELECT, XLenVT, Legal);
- if (Subtarget.is64Bit()) {
- setOperationAction(ISD::FSHL, MVT::i32, Custom);
- setOperationAction(ISD::FSHR, MVT::i32, Custom);
- }
+ if (Subtarget.is64Bit())
+ setOperationAction({ISD::FSHL, ISD::FSHR}, MVT::i32, Custom);
} else {
setOperationAction(ISD::SELECT, XLenVT, Custom);
}
+ static constexpr ISD::NodeType FPLegalNodeTypes[] = {
+ ISD::FMINNUM, ISD::FMAXNUM, ISD::LRINT,
+ ISD::LLRINT, ISD::LROUND, ISD::LLROUND,
+ ISD::STRICT_LRINT, ISD::STRICT_LLRINT, ISD::STRICT_LROUND,
+ ISD::STRICT_LLROUND, ISD::STRICT_FMA, ISD::STRICT_FADD,
+ ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV,
+ ISD::STRICT_FSQRT, ISD::STRICT_FSETCC, ISD::STRICT_FSETCCS};
+
static const ISD::CondCode FPCCToExpand[] = {
ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
@@ -331,50 +315,21 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
if (Subtarget.hasStdExtZfh()) {
- setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
- setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
- setOperationAction(ISD::LRINT, MVT::f16, Legal);
- setOperationAction(ISD::LLRINT, MVT::f16, Legal);
- setOperationAction(ISD::LROUND, MVT::f16, Legal);
- setOperationAction(ISD::LLROUND, MVT::f16, Legal);
- setOperationAction(ISD::STRICT_LRINT, MVT::f16, Legal);
- setOperationAction(ISD::STRICT_LLRINT, MVT::f16, Legal);
- setOperationAction(ISD::STRICT_LROUND, MVT::f16, Legal);
- setOperationAction(ISD::STRICT_LLROUND, MVT::f16, Legal);
- setOperationAction(ISD::STRICT_FADD, MVT::f16, Legal);
- setOperationAction(ISD::STRICT_FMA, MVT::f16, Legal);
- setOperationAction(ISD::STRICT_FSUB, MVT::f16, Legal);
- setOperationAction(ISD::STRICT_FMUL, MVT::f16, Legal);
- setOperationAction(ISD::STRICT_FDIV, MVT::f16, Legal);
+ for (auto NT : FPLegalNodeTypes)
+ setOperationAction(NT, MVT::f16, Legal);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
- setOperationAction(ISD::STRICT_FSQRT, MVT::f16, Legal);
- setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Legal);
- setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Legal);
- for (auto CC : FPCCToExpand)
- setCondCodeAction(CC, MVT::f16, Expand);
+ setCondCodeAction(FPCCToExpand, MVT::f16, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
setOperationAction(ISD::SELECT, MVT::f16, Custom);
setOperationAction(ISD::BR_CC, MVT::f16, Expand);
- setOperationAction(ISD::FREM, MVT::f16, Promote);
- setOperationAction(ISD::FCEIL, MVT::f16, Promote);
- setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
- setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
- setOperationAction(ISD::FRINT, MVT::f16, Promote);
- setOperationAction(ISD::FROUND, MVT::f16, Promote);
- setOperationAction(ISD::FROUNDEVEN, MVT::f16, Promote);
- setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
- setOperationAction(ISD::FPOW, MVT::f16, Promote);
- setOperationAction(ISD::FPOWI, MVT::f16, Promote);
- setOperationAction(ISD::FCOS, MVT::f16, Promote);
- setOperationAction(ISD::FSIN, MVT::f16, Promote);
- setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
- setOperationAction(ISD::FEXP, MVT::f16, Promote);
- setOperationAction(ISD::FEXP2, MVT::f16, Promote);
- setOperationAction(ISD::FLOG, MVT::f16, Promote);
- setOperationAction(ISD::FLOG2, MVT::f16, Promote);
- setOperationAction(ISD::FLOG10, MVT::f16, Promote);
+ setOperationAction({ISD::FREM, ISD::FCEIL, ISD::FFLOOR, ISD::FNEARBYINT,
+ ISD::FRINT, ISD::FROUND, ISD::FROUNDEVEN, ISD::FTRUNC,
+ ISD::FPOW, ISD::FPOWI, ISD::FCOS, ISD::FSIN,
+ ISD::FSINCOS, ISD::FEXP, ISD::FEXP2, ISD::FLOG,
+ ISD::FLOG2, ISD::FLOG10},
+ MVT::f16, Promote);
// FIXME: Need to promote f16 STRICT_* to f32 libcalls, but we don't have
// complete support for all operations in LegalizeDAG.
@@ -385,26 +340,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
if (Subtarget.hasStdExtF()) {
- setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
- setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
- setOperationAction(ISD::LRINT, MVT::f32, Legal);
- setOperationAction(ISD::LLRINT, MVT::f32, Legal);
- setOperationAction(ISD::LROUND, MVT::f32, Legal);
- setOperationAction(ISD::LLROUND, MVT::f32, Legal);
- setOperationAction(ISD::STRICT_LRINT, MVT::f32, Legal);
- setOperationAction(ISD::STRICT_LLRINT, MVT::f32, Legal);
- setOperationAction(ISD::STRICT_LROUND, MVT::f32, Legal);
- setOperationAction(ISD::STRICT_LLROUND, MVT::f32, Legal);
- setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
- setOperationAction(ISD::STRICT_FMA, MVT::f32, Legal);
- setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
- setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
- setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
- setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
- setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Legal);
- setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Legal);
- for (auto CC : FPCCToExpand)
- setCondCodeAction(CC, MVT::f32, Expand);
+ for (auto NT : FPLegalNodeTypes)
+ setOperationAction(NT, MVT::f32, Legal);
+ setCondCodeAction(FPCCToExpand, MVT::f32, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
setOperationAction(ISD::SELECT, MVT::f32, Custom);
setOperationAction(ISD::BR_CC, MVT::f32, Expand);
@@ -418,28 +356,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BITCAST, MVT::i32, Custom);
if (Subtarget.hasStdExtD()) {
- setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
- setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
- setOperationAction(ISD::LRINT, MVT::f64, Legal);
- setOperationAction(ISD::LLRINT, MVT::f64, Legal);
- setOperationAction(ISD::LROUND, MVT::f64, Legal);
- setOperationAction(ISD::LLROUND, MVT::f64, Legal);
- setOperationAction(ISD::STRICT_LRINT, MVT::f64, Legal);
- setOperationAction(ISD::STRICT_LLRINT, MVT::f64, Legal);
- setOperationAction(ISD::STRICT_LROUND, MVT::f64, Legal);
- setOperationAction(ISD::STRICT_LLROUND, MVT::f64, Legal);
- setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal);
- setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
- setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
- setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
- setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
+ for (auto NT : FPLegalNodeTypes)
+ setOperationAction(NT, MVT::f64, Legal);
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
- setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
- setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Legal);
- setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Legal);
- for (auto CC : FPCCToExpand)
- setCondCodeAction(CC, MVT::f64, Expand);
+ setCondCodeAction(FPCCToExpand, MVT::f64, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
setOperationAction(ISD::SELECT, MVT::f64, Custom);
setOperationAction(ISD::BR_CC, MVT::f64, Expand);
@@ -451,40 +372,38 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setTruncStoreAction(MVT::f64, MVT::f16, Expand);
}
- if (Subtarget.is64Bit()) {
- setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
- setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
- setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
- setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
- }
+ if (Subtarget.is64Bit())
+ setOperationAction({ISD::FP_TO_UINT, ISD::FP_TO_SINT,
+ ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT},
+ MVT::i32, Custom);
if (Subtarget.hasStdExtF()) {
- setOperationAction(ISD::FP_TO_UINT_SAT, XLenVT, Custom);
- setOperationAction(ISD::FP_TO_SINT_SAT, XLenVT, Custom);
+ setOperationAction({ISD::FP_TO_UINT_SAT, ISD::FP_TO_SINT_SAT}, XLenVT,
+ Custom);
- setOperationAction(ISD::STRICT_FP_TO_UINT, XLenVT, Legal);
- setOperationAction(ISD::STRICT_FP_TO_SINT, XLenVT, Legal);
- setOperationAction(ISD::STRICT_UINT_TO_FP, XLenVT, Legal);
- setOperationAction(ISD::STRICT_SINT_TO_FP, XLenVT, Legal);
+ setOperationAction({ISD::STRICT_FP_TO_UINT, ISD::STRICT_FP_TO_SINT,
+ ISD::STRICT_UINT_TO_FP, ISD::STRICT_SINT_TO_FP},
+ XLenVT, Legal);
setOperationAction(ISD::FLT_ROUNDS_, XLenVT, Custom);
setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
}
- setOperationAction(ISD::GlobalAddress, XLenVT, Custom);
- setOperationAction(ISD::BlockAddress, XLenVT, Custom);
- setOperationAction(ISD::ConstantPool, XLenVT, Custom);
- setOperationAction(ISD::JumpTable, XLenVT, Custom);
+ setOperationAction({ISD::GlobalAddress, ISD::BlockAddress, ISD::ConstantPool,
+ ISD::JumpTable},
+ XLenVT, Custom);
setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom);
+ if (Subtarget.is64Bit())
+ setOperationAction(ISD::Constant, MVT::i64, Custom);
+
// TODO: On M-mode only targets, the cycle[h] CSR may not be present.
// Unfortunately this can't be determined just from the ISA naming string.
setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
Subtarget.is64Bit() ? Legal : Custom);
- setOperationAction(ISD::TRAP, MVT::Other, Legal);
- setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
+ setOperationAction({ISD::TRAP, ISD::DEBUGTRAP}, MVT::Other, Legal);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
if (Subtarget.is64Bit())
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
@@ -505,19 +424,16 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// RVV intrinsics may have illegal operands.
// We also need to custom legalize vmv.x.s.
- setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
- setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom);
- if (Subtarget.is64Bit()) {
+ setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN},
+ {MVT::i8, MVT::i16}, Custom);
+ if (Subtarget.is64Bit())
setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom);
- } else {
- setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
- }
+ else
+ setOperationAction({ISD::INTRINSIC_WO_CHAIN, ISD::INTRINSIC_W_CHAIN},
+ MVT::i64, Custom);
- setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
- setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
+ setOperationAction({ISD::INTRINSIC_W_CHAIN, ISD::INTRINSIC_VOID},
+ MVT::Other, Custom);
static const unsigned IntegerVPOps[] = {
ISD::VP_ADD, ISD::VP_SUB, ISD::VP_MUL,
@@ -527,191 +443,175 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX,
ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
- ISD::VP_MERGE, ISD::VP_SELECT};
+ ISD::VP_MERGE, ISD::VP_SELECT, ISD::VP_FPTOSI,
+ ISD::VP_FPTOUI, ISD::VP_SETCC, ISD::VP_SIGN_EXTEND,
+ ISD::VP_ZERO_EXTEND, ISD::VP_TRUNCATE};
static const unsigned FloatingPointVPOps[] = {
- ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL,
- ISD::VP_FDIV, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
- ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
- ISD::VP_SELECT};
+ ISD::VP_FADD, ISD::VP_FSUB,
+ ISD::VP_FMUL, ISD::VP_FDIV,
+ ISD::VP_FNEG, ISD::VP_FMA,
+ ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
+ ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX,
+ ISD::VP_MERGE, ISD::VP_SELECT,
+ ISD::VP_SITOFP, ISD::VP_UITOFP,
+ ISD::VP_SETCC, ISD::VP_FP_ROUND,
+ ISD::VP_FP_EXTEND};
if (!Subtarget.is64Bit()) {
// We must custom-lower certain vXi64 operations on RV32 due to the vector
// element type being illegal.
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i64, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::i64, Custom);
-
- setOperationAction(ISD::VECREDUCE_ADD, MVT::i64, Custom);
- setOperationAction(ISD::VECREDUCE_AND, MVT::i64, Custom);
- setOperationAction(ISD::VECREDUCE_OR, MVT::i64, Custom);
- setOperationAction(ISD::VECREDUCE_XOR, MVT::i64, Custom);
- setOperationAction(ISD::VECREDUCE_SMAX, MVT::i64, Custom);
- setOperationAction(ISD::VECREDUCE_SMIN, MVT::i64, Custom);
- setOperationAction(ISD::VECREDUCE_UMAX, MVT::i64, Custom);
- setOperationAction(ISD::VECREDUCE_UMIN, MVT::i64, Custom);
-
- setOperationAction(ISD::VP_REDUCE_ADD, MVT::i64, Custom);
- setOperationAction(ISD::VP_REDUCE_AND, MVT::i64, Custom);
- setOperationAction(ISD::VP_REDUCE_OR, MVT::i64, Custom);
- setOperationAction(ISD::VP_REDUCE_XOR, MVT::i64, Custom);
- setOperationAction(ISD::VP_REDUCE_SMAX, MVT::i64, Custom);
- setOperationAction(ISD::VP_REDUCE_SMIN, MVT::i64, Custom);
- setOperationAction(ISD::VP_REDUCE_UMAX, MVT::i64, Custom);
- setOperationAction(ISD::VP_REDUCE_UMIN, MVT::i64, Custom);
+ setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT},
+ MVT::i64, Custom);
+
+ setOperationAction({ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND,
+ ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR,
+ ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN,
+ ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN},
+ MVT::i64, Custom);
+
+ setOperationAction({ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND,
+ ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR,
+ ISD::VP_REDUCE_SMAX, ISD::VP_REDUCE_SMIN,
+ ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN},
+ MVT::i64, Custom);
}
for (MVT VT : BoolVecVTs) {
+ if (!isTypeLegal(VT))
+ continue;
+
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
// Mask VTs are custom-expanded into a series of standard nodes
- setOperationAction(ISD::TRUNCATE, VT, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
+ setOperationAction({ISD::TRUNCATE, ISD::CONCAT_VECTORS,
+ ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR},
+ VT, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+ setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
+ Custom);
setOperationAction(ISD::SELECT, VT, Custom);
- setOperationAction(ISD::SELECT_CC, VT, Expand);
- setOperationAction(ISD::VSELECT, VT, Expand);
- setOperationAction(ISD::VP_MERGE, VT, Expand);
- setOperationAction(ISD::VP_SELECT, VT, Expand);
+ setOperationAction(
+ {ISD::SELECT_CC, ISD::VSELECT, ISD::VP_MERGE, ISD::VP_SELECT}, VT,
+ Expand);
- setOperationAction(ISD::VP_AND, VT, Custom);
- setOperationAction(ISD::VP_OR, VT, Custom);
- setOperationAction(ISD::VP_XOR, VT, Custom);
+ setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR}, VT, Custom);
- setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
- setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
- setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
+ setOperationAction(
+ {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
+ Custom);
- setOperationAction(ISD::VP_REDUCE_AND, VT, Custom);
- setOperationAction(ISD::VP_REDUCE_OR, VT, Custom);
- setOperationAction(ISD::VP_REDUCE_XOR, VT, Custom);
+ setOperationAction(
+ {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
+ Custom);
// RVV has native int->float & float->int conversions where the
// element type sizes are within one power-of-two of each other. Any
// wider distances between type sizes have to be lowered as sequences
// which progressively narrow the gap in stages.
- setOperationAction(ISD::SINT_TO_FP, VT, Custom);
- setOperationAction(ISD::UINT_TO_FP, VT, Custom);
- setOperationAction(ISD::FP_TO_SINT, VT, Custom);
- setOperationAction(ISD::FP_TO_UINT, VT, Custom);
+ setOperationAction(
+ {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT},
+ VT, Custom);
// Expand all extending loads to types larger than this, and truncating
// stores from types larger than this.
for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
setTruncStoreAction(OtherVT, VT, Expand);
- setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
- setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
- setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
+ setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, OtherVT,
+ VT, Expand);
}
+
+ setOperationAction(
+ {ISD::VP_FPTOSI, ISD::VP_FPTOUI, ISD::VP_TRUNCATE, ISD::VP_SETCC}, VT,
+ Custom);
+ setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
}
for (MVT VT : IntVecVTs) {
- if (VT.getVectorElementType() == MVT::i64 &&
- !Subtarget.hasVInstructionsI64())
+ if (!isTypeLegal(VT))
continue;
setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
// Vectors implement MULHS/MULHU.
- setOperationAction(ISD::SMUL_LOHI, VT, Expand);
- setOperationAction(ISD::UMUL_LOHI, VT, Expand);
+ setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT, Expand);
// nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
- if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV()) {
- setOperationAction(ISD::MULHU, VT, Expand);
- setOperationAction(ISD::MULHS, VT, Expand);
- }
+ if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV())
+ setOperationAction({ISD::MULHU, ISD::MULHS}, VT, Expand);
- setOperationAction(ISD::SMIN, VT, Legal);
- setOperationAction(ISD::SMAX, VT, Legal);
- setOperationAction(ISD::UMIN, VT, Legal);
- setOperationAction(ISD::UMAX, VT, Legal);
+ setOperationAction({ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}, VT,
+ Legal);
- setOperationAction(ISD::ROTL, VT, Expand);
- setOperationAction(ISD::ROTR, VT, Expand);
+ setOperationAction({ISD::ROTL, ISD::ROTR}, VT, Expand);
- setOperationAction(ISD::CTTZ, VT, Expand);
- setOperationAction(ISD::CTLZ, VT, Expand);
- setOperationAction(ISD::CTPOP, VT, Expand);
+ setOperationAction({ISD::CTTZ, ISD::CTLZ, ISD::CTPOP, ISD::BSWAP}, VT,
+ Expand);
setOperationAction(ISD::BSWAP, VT, Expand);
// Custom-lower extensions and truncations from/to mask types.
- setOperationAction(ISD::ANY_EXTEND, VT, Custom);
- setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
- setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
+ setOperationAction({ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND},
+ VT, Custom);
// RVV has native int->float & float->int conversions where the
// element type sizes are within one power-of-two of each other. Any
// wider distances between type sizes have to be lowered as sequences
// which progressively narrow the gap in stages.
- setOperationAction(ISD::SINT_TO_FP, VT, Custom);
- setOperationAction(ISD::UINT_TO_FP, VT, Custom);
- setOperationAction(ISD::FP_TO_SINT, VT, Custom);
- setOperationAction(ISD::FP_TO_UINT, VT, Custom);
+ setOperationAction(
+ {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT},
+ VT, Custom);
- setOperationAction(ISD::SADDSAT, VT, Legal);
- setOperationAction(ISD::UADDSAT, VT, Legal);
- setOperationAction(ISD::SSUBSAT, VT, Legal);
- setOperationAction(ISD::USUBSAT, VT, Legal);
+ setOperationAction(
+ {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, VT, Legal);
// Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
// nodes which truncate by one power of two at a time.
setOperationAction(ISD::TRUNCATE, VT, Custom);
// Custom-lower insert/extract operations to simplify patterns.
- setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+ setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
+ Custom);
// Custom-lower reduction operations to set up the corresponding custom
// nodes' operands.
- setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
- setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
- setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
- setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
- setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
- setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
- setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
- setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
-
- for (unsigned VPOpc : IntegerVPOps)
- setOperationAction(VPOpc, VT, Custom);
-
- setOperationAction(ISD::LOAD, VT, Custom);
- setOperationAction(ISD::STORE, VT, Custom);
-
- setOperationAction(ISD::MLOAD, VT, Custom);
- setOperationAction(ISD::MSTORE, VT, Custom);
- setOperationAction(ISD::MGATHER, VT, Custom);
- setOperationAction(ISD::MSCATTER, VT, Custom);
-
- setOperationAction(ISD::VP_LOAD, VT, Custom);
- setOperationAction(ISD::VP_STORE, VT, Custom);
- setOperationAction(ISD::VP_GATHER, VT, Custom);
- setOperationAction(ISD::VP_SCATTER, VT, Custom);
-
- setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
+ setOperationAction({ISD::VECREDUCE_ADD, ISD::VECREDUCE_AND,
+ ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR,
+ ISD::VECREDUCE_SMAX, ISD::VECREDUCE_SMIN,
+ ISD::VECREDUCE_UMAX, ISD::VECREDUCE_UMIN},
+ VT, Custom);
+
+ setOperationAction(IntegerVPOps, VT, Custom);
+
+ setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
+
+ setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
+ VT, Custom);
+
+ setOperationAction(
+ {ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, VT,
+ Custom);
+
+ setOperationAction(
+ {ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR},
+ VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
- setOperationAction(ISD::STEP_VECTOR, VT, Custom);
- setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
+ setOperationAction({ISD::STEP_VECTOR, ISD::VECTOR_REVERSE}, VT, Custom);
for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
setTruncStoreAction(VT, OtherVT, Expand);
- setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
- setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
- setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
+ setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, OtherVT,
+ VT, Expand);
}
+ // Splice
+ setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
+
// Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if we have a floating point
// type that can represent the value exactly.
if (VT.getVectorElementType() != MVT::i64) {
@@ -719,8 +619,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
VT.getVectorElementType() == MVT::i32 ? MVT::f64 : MVT::f32;
EVT FloatVT = MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
if (isTypeLegal(FloatVT)) {
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
+ setOperationAction({ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT,
+ Custom);
}
}
}
@@ -745,21 +645,35 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// sizes are within one power-of-two of each other. Therefore conversions
// between vXf16 and vXf64 must be lowered as sequences which convert via
// vXf32.
- setOperationAction(ISD::FP_ROUND, VT, Custom);
- setOperationAction(ISD::FP_EXTEND, VT, Custom);
+ setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
// Custom-lower insert/extract operations to simplify patterns.
- setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+ setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT}, VT,
+ Custom);
// Expand various condition codes (explained above).
- for (auto CC : VFPCCToExpand)
- setCondCodeAction(CC, VT, Expand);
-
- setOperationAction(ISD::FMINNUM, VT, Legal);
- setOperationAction(ISD::FMAXNUM, VT, Legal);
-
- setOperationAction(ISD::FTRUNC, VT, Custom);
- setOperationAction(ISD::FCEIL, VT, Custom);
- setOperationAction(ISD::FFLOOR, VT, Custom);
+ setCondCodeAction(VFPCCToExpand, VT, Expand);
+
+ setOperationAction({ISD::FMINNUM, ISD::FMAXNUM}, VT, Legal);
+
+ setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND},
+ VT, Custom);
+
+ setOperationAction({ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD,
+ ISD::VECREDUCE_FMIN, ISD::VECREDUCE_FMAX},
+ VT, Custom);
+
+ // Expand FP operations that need libcalls.
+ setOperationAction(ISD::FREM, VT, Expand);
+ setOperationAction(ISD::FPOW, VT, Expand);
+ setOperationAction(ISD::FCOS, VT, Expand);
+ setOperationAction(ISD::FSIN, VT, Expand);
+ setOperationAction(ISD::FSINCOS, VT, Expand);
+ setOperationAction(ISD::FEXP, VT, Expand);
+ setOperationAction(ISD::FEXP2, VT, Expand);
+ setOperationAction(ISD::FLOG, VT, Expand);
+ setOperationAction(ISD::FLOG2, VT, Expand);
+ setOperationAction(ISD::FLOG10, VT, Expand);
+ setOperationAction(ISD::FRINT, VT, Expand);
+ setOperationAction(ISD::FNEARBYINT, VT, Expand);
setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
@@ -768,30 +682,25 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FCOPYSIGN, VT, Legal);
- setOperationAction(ISD::LOAD, VT, Custom);
- setOperationAction(ISD::STORE, VT, Custom);
+ setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
- setOperationAction(ISD::MLOAD, VT, Custom);
- setOperationAction(ISD::MSTORE, VT, Custom);
- setOperationAction(ISD::MGATHER, VT, Custom);
- setOperationAction(ISD::MSCATTER, VT, Custom);
+ setOperationAction({ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER},
+ VT, Custom);
- setOperationAction(ISD::VP_LOAD, VT, Custom);
- setOperationAction(ISD::VP_STORE, VT, Custom);
- setOperationAction(ISD::VP_GATHER, VT, Custom);
- setOperationAction(ISD::VP_SCATTER, VT, Custom);
+ setOperationAction(
+ {ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, VT,
+ Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
- setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
- setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
+ setOperationAction(
+ {ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR},
+ VT, Custom);
- setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
+ setOperationAction({ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE}, VT, Custom);
- for (unsigned VPOpc : FloatingPointVPOps)
- setOperationAction(VPOpc, VT, Custom);
+ setOperationAction(FloatingPointVPOps, VT, Custom);
};
// Sets common extload/truncstore actions on RVV floating-point vector
@@ -804,21 +713,31 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
};
- if (Subtarget.hasVInstructionsF16())
- for (MVT VT : F16VecVTs)
+ if (Subtarget.hasVInstructionsF16()) {
+ for (MVT VT : F16VecVTs) {
+ if (!isTypeLegal(VT))
+ continue;
SetCommonVFPActions(VT);
+ }
+ }
- for (MVT VT : F32VecVTs) {
- if (Subtarget.hasVInstructionsF32())
+ if (Subtarget.hasVInstructionsF32()) {
+ for (MVT VT : F32VecVTs) {
+ if (!isTypeLegal(VT))
+ continue;
SetCommonVFPActions(VT);
- SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
+ SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
+ }
}
- for (MVT VT : F64VecVTs) {
- if (Subtarget.hasVInstructionsF64())
+ if (Subtarget.hasVInstructionsF64()) {
+ for (MVT VT : F64VecVTs) {
+ if (!isTypeLegal(VT))
+ continue;
SetCommonVFPActions(VT);
- SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
- SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
+ SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
+ SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
+ }
}
if (Subtarget.useRVVForFixedLengthVectors()) {
@@ -831,23 +750,21 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(Op, VT, Expand);
for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) {
setTruncStoreAction(VT, OtherVT, Expand);
- setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
- setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
- setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
+ setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD},
+ OtherVT, VT, Expand);
}
// We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
- setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
+ setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT,
+ Custom);
- setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
+ setOperationAction({ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS}, VT,
+ Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+ setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT},
+ VT, Custom);
- setOperationAction(ISD::LOAD, VT, Custom);
- setOperationAction(ISD::STORE, VT, Custom);
+ setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
@@ -857,100 +774,80 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::BITCAST, VT, Custom);
- setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
- setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
- setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
+ setOperationAction(
+ {ISD::VECREDUCE_AND, ISD::VECREDUCE_OR, ISD::VECREDUCE_XOR}, VT,
+ Custom);
- setOperationAction(ISD::VP_REDUCE_AND, VT, Custom);
- setOperationAction(ISD::VP_REDUCE_OR, VT, Custom);
- setOperationAction(ISD::VP_REDUCE_XOR, VT, Custom);
+ setOperationAction(
+ {ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR}, VT,
+ Custom);
- setOperationAction(ISD::SINT_TO_FP, VT, Custom);
- setOperationAction(ISD::UINT_TO_FP, VT, Custom);
- setOperationAction(ISD::FP_TO_SINT, VT, Custom);
- setOperationAction(ISD::FP_TO_UINT, VT, Custom);
+ setOperationAction({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT,
+ ISD::FP_TO_UINT},
+ VT, Custom);
// Operations below are different for between masks and other vectors.
if (VT.getVectorElementType() == MVT::i1) {
- setOperationAction(ISD::VP_AND, VT, Custom);
- setOperationAction(ISD::VP_OR, VT, Custom);
- setOperationAction(ISD::VP_XOR, VT, Custom);
- setOperationAction(ISD::AND, VT, Custom);
- setOperationAction(ISD::OR, VT, Custom);
- setOperationAction(ISD::XOR, VT, Custom);
+ setOperationAction({ISD::VP_AND, ISD::VP_OR, ISD::VP_XOR, ISD::AND,
+ ISD::OR, ISD::XOR},
+ VT, Custom);
+
+ setOperationAction(
+ {ISD::VP_FPTOSI, ISD::VP_FPTOUI, ISD::VP_SETCC, ISD::VP_TRUNCATE},
+ VT, Custom);
continue;
}
- // Use SPLAT_VECTOR to prevent type legalization from destroying the
- // splats when type legalizing i64 scalar on RV32.
+ // Make SPLAT_VECTOR Legal so DAGCombine will convert splat vectors to
+ // it before type legalization for i64 vectors on RV32. It will then be
+ // type legalized to SPLAT_VECTOR_PARTS which we need to Custom handle.
// FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
// improvements first.
if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
- setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
+ setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
}
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
- setOperationAction(ISD::MLOAD, VT, Custom);
- setOperationAction(ISD::MSTORE, VT, Custom);
- setOperationAction(ISD::MGATHER, VT, Custom);
- setOperationAction(ISD::MSCATTER, VT, Custom);
-
- setOperationAction(ISD::VP_LOAD, VT, Custom);
- setOperationAction(ISD::VP_STORE, VT, Custom);
- setOperationAction(ISD::VP_GATHER, VT, Custom);
- setOperationAction(ISD::VP_SCATTER, VT, Custom);
-
- setOperationAction(ISD::ADD, VT, Custom);
- setOperationAction(ISD::MUL, VT, Custom);
- setOperationAction(ISD::SUB, VT, Custom);
- setOperationAction(ISD::AND, VT, Custom);
- setOperationAction(ISD::OR, VT, Custom);
- setOperationAction(ISD::XOR, VT, Custom);
- setOperationAction(ISD::SDIV, VT, Custom);
- setOperationAction(ISD::SREM, VT, Custom);
- setOperationAction(ISD::UDIV, VT, Custom);
- setOperationAction(ISD::UREM, VT, Custom);
- setOperationAction(ISD::SHL, VT, Custom);
- setOperationAction(ISD::SRA, VT, Custom);
- setOperationAction(ISD::SRL, VT, Custom);
-
- setOperationAction(ISD::SMIN, VT, Custom);
- setOperationAction(ISD::SMAX, VT, Custom);
- setOperationAction(ISD::UMIN, VT, Custom);
- setOperationAction(ISD::UMAX, VT, Custom);
- setOperationAction(ISD::ABS, VT, Custom);
+ setOperationAction(
+ {ISD::MLOAD, ISD::MSTORE, ISD::MGATHER, ISD::MSCATTER}, VT, Custom);
+
+ setOperationAction(
+ {ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, VT,
+ Custom);
+
+ setOperationAction({ISD::ADD, ISD::MUL, ISD::SUB, ISD::AND, ISD::OR,
+ ISD::XOR, ISD::SDIV, ISD::SREM, ISD::UDIV,
+ ISD::UREM, ISD::SHL, ISD::SRA, ISD::SRL},
+ VT, Custom);
+
+ setOperationAction(
+ {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX, ISD::ABS}, VT, Custom);
// vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
- if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV()) {
- setOperationAction(ISD::MULHS, VT, Custom);
- setOperationAction(ISD::MULHU, VT, Custom);
- }
+ if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
+ setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Custom);
- setOperationAction(ISD::SADDSAT, VT, Custom);
- setOperationAction(ISD::UADDSAT, VT, Custom);
- setOperationAction(ISD::SSUBSAT, VT, Custom);
- setOperationAction(ISD::USUBSAT, VT, Custom);
+ setOperationAction(
+ {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, VT,
+ Custom);
setOperationAction(ISD::VSELECT, VT, Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
- setOperationAction(ISD::ANY_EXTEND, VT, Custom);
- setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
- setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
+ setOperationAction(
+ {ISD::ANY_EXTEND, ISD::SIGN_EXTEND, ISD::ZERO_EXTEND}, VT, Custom);
// Custom-lower reduction operations to set up the corresponding custom
// nodes' operands.
- setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
- setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
- setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
- setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
- setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
+ setOperationAction({ISD::VECREDUCE_ADD, ISD::VECREDUCE_SMAX,
+ ISD::VECREDUCE_SMIN, ISD::VECREDUCE_UMAX,
+ ISD::VECREDUCE_UMIN},
+ VT, Custom);
- for (unsigned VPOpc : IntegerVPOps)
- setOperationAction(VPOpc, VT, Custom);
+ setOperationAction(IntegerVPOps, VT, Custom);
// Lower CTLZ_ZERO_UNDEF and CTTZ_ZERO_UNDEF if we have a floating point
// type that can represent the value exactly.
@@ -959,10 +856,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
VT.getVectorElementType() == MVT::i32 ? MVT::f64 : MVT::f32;
EVT FloatVT =
MVT::getVectorVT(FloatEltVT, VT.getVectorElementCount());
- if (isTypeLegal(FloatVT)) {
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
- }
+ if (isTypeLegal(FloatVT))
+ setOperationAction({ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT,
+ Custom);
}
}
@@ -979,69 +875,50 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
// We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
- setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
+ setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT,
+ Custom);
- setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
-
- setOperationAction(ISD::LOAD, VT, Custom);
- setOperationAction(ISD::STORE, VT, Custom);
- setOperationAction(ISD::MLOAD, VT, Custom);
- setOperationAction(ISD::MSTORE, VT, Custom);
- setOperationAction(ISD::MGATHER, VT, Custom);
- setOperationAction(ISD::MSCATTER, VT, Custom);
-
- setOperationAction(ISD::VP_LOAD, VT, Custom);
- setOperationAction(ISD::VP_STORE, VT, Custom);
- setOperationAction(ISD::VP_GATHER, VT, Custom);
- setOperationAction(ISD::VP_SCATTER, VT, Custom);
-
- setOperationAction(ISD::FADD, VT, Custom);
- setOperationAction(ISD::FSUB, VT, Custom);
- setOperationAction(ISD::FMUL, VT, Custom);
- setOperationAction(ISD::FDIV, VT, Custom);
- setOperationAction(ISD::FNEG, VT, Custom);
- setOperationAction(ISD::FABS, VT, Custom);
- setOperationAction(ISD::FCOPYSIGN, VT, Custom);
- setOperationAction(ISD::FSQRT, VT, Custom);
- setOperationAction(ISD::FMA, VT, Custom);
- setOperationAction(ISD::FMINNUM, VT, Custom);
- setOperationAction(ISD::FMAXNUM, VT, Custom);
-
- setOperationAction(ISD::FP_ROUND, VT, Custom);
- setOperationAction(ISD::FP_EXTEND, VT, Custom);
-
- setOperationAction(ISD::FTRUNC, VT, Custom);
- setOperationAction(ISD::FCEIL, VT, Custom);
- setOperationAction(ISD::FFLOOR, VT, Custom);
+ setOperationAction({ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS,
+ ISD::VECTOR_SHUFFLE, ISD::INSERT_VECTOR_ELT,
+ ISD::EXTRACT_VECTOR_ELT},
+ VT, Custom);
+
+ setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE,
+ ISD::MGATHER, ISD::MSCATTER},
+ VT, Custom);
+
+ setOperationAction(
+ {ISD::VP_LOAD, ISD::VP_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, VT,
+ Custom);
+
+ setOperationAction({ISD::FADD, ISD::FSUB, ISD::FMUL, ISD::FDIV,
+ ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN, ISD::FSQRT,
+ ISD::FMA, ISD::FMINNUM, ISD::FMAXNUM},
+ VT, Custom);
+
+ setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
+
+ setOperationAction({ISD::FTRUNC, ISD::FCEIL, ISD::FFLOOR, ISD::FROUND},
+ VT, Custom);
for (auto CC : VFPCCToExpand)
setCondCodeAction(CC, VT, Expand);
- setOperationAction(ISD::VSELECT, VT, Custom);
- setOperationAction(ISD::SELECT, VT, Custom);
+ setOperationAction({ISD::VSELECT, ISD::SELECT}, VT, Custom);
setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction(ISD::BITCAST, VT, Custom);
- setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
- setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
- setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
- setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
+ setOperationAction({ISD::VECREDUCE_FADD, ISD::VECREDUCE_SEQ_FADD,
+ ISD::VECREDUCE_FMIN, ISD::VECREDUCE_FMAX},
+ VT, Custom);
- for (unsigned VPOpc : FloatingPointVPOps)
- setOperationAction(VPOpc, VT, Custom);
+ setOperationAction(FloatingPointVPOps, VT, Custom);
}
// Custom-legalize bitcasts from fixed-length vectors to scalar types.
- setOperationAction(ISD::BITCAST, MVT::i8, Custom);
- setOperationAction(ISD::BITCAST, MVT::i16, Custom);
- setOperationAction(ISD::BITCAST, MVT::i32, Custom);
- setOperationAction(ISD::BITCAST, MVT::i64, Custom);
+ setOperationAction(ISD::BITCAST, {MVT::i8, MVT::i16, MVT::i32, MVT::i64},
+ Custom);
if (Subtarget.hasStdExtZfh())
setOperationAction(ISD::BITCAST, MVT::f16, Custom);
if (Subtarget.hasStdExtF())
@@ -1061,30 +938,33 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// Jumps are expensive, compared to logic
setJumpIsExpensive();
- setTargetDAGCombine(ISD::ADD);
- setTargetDAGCombine(ISD::SUB);
- setTargetDAGCombine(ISD::AND);
- setTargetDAGCombine(ISD::OR);
- setTargetDAGCombine(ISD::XOR);
- setTargetDAGCombine(ISD::ANY_EXTEND);
- if (Subtarget.hasStdExtF()) {
- setTargetDAGCombine(ISD::ZERO_EXTEND);
- setTargetDAGCombine(ISD::FP_TO_SINT);
- setTargetDAGCombine(ISD::FP_TO_UINT);
- setTargetDAGCombine(ISD::FP_TO_SINT_SAT);
- setTargetDAGCombine(ISD::FP_TO_UINT_SAT);
- }
- if (Subtarget.hasVInstructions()) {
- setTargetDAGCombine(ISD::FCOPYSIGN);
- setTargetDAGCombine(ISD::MGATHER);
- setTargetDAGCombine(ISD::MSCATTER);
- setTargetDAGCombine(ISD::VP_GATHER);
- setTargetDAGCombine(ISD::VP_SCATTER);
+ setTargetDAGCombine({ISD::INTRINSIC_WO_CHAIN, ISD::ADD, ISD::SUB, ISD::AND,
+ ISD::OR, ISD::XOR});
+ if (Subtarget.is64Bit())
setTargetDAGCombine(ISD::SRA);
- setTargetDAGCombine(ISD::SRL);
- setTargetDAGCombine(ISD::SHL);
- setTargetDAGCombine(ISD::STORE);
- }
+
+ if (Subtarget.hasStdExtF())
+ setTargetDAGCombine({ISD::FADD, ISD::FMAXNUM, ISD::FMINNUM});
+
+ if (Subtarget.hasStdExtZbp())
+ setTargetDAGCombine({ISD::ROTL, ISD::ROTR});
+
+ if (Subtarget.hasStdExtZbb())
+ setTargetDAGCombine({ISD::UMAX, ISD::UMIN, ISD::SMAX, ISD::SMIN});
+
+ if (Subtarget.hasStdExtZbkb())
+ setTargetDAGCombine(ISD::BITREVERSE);
+ if (Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZbb())
+ setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
+ if (Subtarget.hasStdExtF())
+ setTargetDAGCombine({ISD::ZERO_EXTEND, ISD::FP_TO_SINT, ISD::FP_TO_UINT,
+ ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT});
+ if (Subtarget.hasVInstructions())
+ setTargetDAGCombine({ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER,
+ ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
+ ISD::SHL, ISD::STORE, ISD::SPLAT_VECTOR});
+ if (Subtarget.useRVVForFixedLengthVectors())
+ setTargetDAGCombine(ISD::BITCAST);
setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
@@ -1149,6 +1029,24 @@ bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.size = MemoryLocation::UnknownSize;
Info.flags |= MachineMemOperand::MOStore;
return true;
+ case Intrinsic::riscv_seg2_load:
+ case Intrinsic::riscv_seg3_load:
+ case Intrinsic::riscv_seg4_load:
+ case Intrinsic::riscv_seg5_load:
+ case Intrinsic::riscv_seg6_load:
+ case Intrinsic::riscv_seg7_load:
+ case Intrinsic::riscv_seg8_load:
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.ptrVal = I.getArgOperand(0);
+ Info.memVT =
+ getValueType(DL, I.getType()->getStructElementType(0)->getScalarType());
+ Info.align =
+ Align(DL.getTypeSizeInBits(
+ I.getType()->getStructElementType(0)->getScalarType()) /
+ 8);
+ Info.size = MemoryLocation::UnknownSize;
+ Info.flags |= MachineMemOperand::MOLoad;
+ return true;
}
}
@@ -1160,6 +1058,10 @@ bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
if (AM.BaseGV)
return false;
+ // RVV instructions only support register addressing.
+ if (Subtarget.hasVInstructions() && isa<VectorType>(Ty))
+ return AM.HasBaseReg && AM.Scale == 0 && !AM.BaseOffs;
+
// Require a 12-bit signed offset.
if (!isInt<12>(AM.BaseOffs))
return false;
@@ -1225,6 +1127,10 @@ bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
}
+bool RISCVTargetLowering::signExtendConstant(const ConstantInt *CI) const {
+ return Subtarget.is64Bit() && CI->getType()->isIntegerTy(32);
+}
+
bool RISCVTargetLowering::isCheapToSpeculateCttz() const {
return Subtarget.hasStdExtZbb();
}
@@ -1245,6 +1151,36 @@ bool RISCVTargetLowering::hasAndNotCompare(SDValue Y) const {
!isa<ConstantSDNode>(Y);
}
+bool RISCVTargetLowering::hasBitTest(SDValue X, SDValue Y) const {
+ // We can use ANDI+SEQZ/SNEZ as a bit test. Y contains the bit position.
+ auto *C = dyn_cast<ConstantSDNode>(Y);
+ return C && C->getAPIntValue().ule(10);
+}
+
+bool RISCVTargetLowering::
+ shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
+ SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
+ unsigned OldShiftOpcode, unsigned NewShiftOpcode,
+ SelectionDAG &DAG) const {
+ // One interesting pattern that we'd want to form is 'bit extract':
+ // ((1 >> Y) & 1) ==/!= 0
+ // But we also need to be careful not to try to reverse that fold.
+
+ // Is this '((1 >> Y) & 1)'?
+ if (XC && OldShiftOpcode == ISD::SRL && XC->isOne())
+ return false; // Keep the 'bit extract' pattern.
+
+ // Will this be '((1 >> Y) & 1)' after the transform?
+ if (NewShiftOpcode == ISD::SRL && CC->isOne())
+ return true; // Do form the 'bit extract' pattern.
+
+ // If 'X' is a constant, and we transform, then we will immediately
+ // try to undo the fold, thus causing endless combine loop.
+ // So only do the transform if X is not a constant. This matches the default
+ // implementation of this function.
+ return !XC;
+}
+
/// Check if sinking \p I's operands to I's basic block is profitable, because
/// the operands can be folded into a target instruction, e.g.
/// splats of scalars can fold into vector instructions.
@@ -1282,6 +1218,7 @@ bool RISCVTargetLowering::shouldSinkOperands(
if (auto *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
case Intrinsic::fma:
+ case Intrinsic::vp_fma:
return Operand == 0 || Operand == 1;
// FIXME: Our patterns can only match vx/vf instructions when the splat
// it on the RHS, because TableGen doesn't recognize our VP operations
@@ -1345,6 +1282,15 @@ bool RISCVTargetLowering::shouldSinkOperands(
return true;
}
+bool RISCVTargetLowering::isOffsetFoldingLegal(
+ const GlobalAddressSDNode *GA) const {
+ // In order to maximise the opportunity for common subexpression elimination,
+ // keep a separate ADD node for the global address offset instead of folding
+ // it in the global address node. Later peephole optimisations may choose to
+ // fold it back in when profitable.
+ return false;
+}
+
bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const {
// FIXME: Change to Zfhmin once f16 becomes a legal type with Zfhmin.
@@ -1583,7 +1529,7 @@ static bool useRVVForFixedLengthVectorVT(MVT VT,
if (VT.getFixedSizeInBits() > 1024 * 8)
return false;
- unsigned MinVLen = Subtarget.getMinRVVVectorSizeInBits();
+ unsigned MinVLen = Subtarget.getRealMinVLen();
MVT EltVT = VT.getVectorElementType();
@@ -1621,7 +1567,7 @@ static bool useRVVForFixedLengthVectorVT(MVT VT,
}
// Reject elements larger than ELEN.
- if (EltVT.getSizeInBits() > Subtarget.getMaxELENForFixedLengthVectors())
+ if (EltVT.getSizeInBits() > Subtarget.getELEN())
return false;
unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
@@ -1649,8 +1595,8 @@ static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT,
useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
"Expected legal fixed length vector!");
- unsigned MinVLen = Subtarget.getMinRVVVectorSizeInBits();
- unsigned MaxELen = Subtarget.getMaxELENForFixedLengthVectors();
+ unsigned MinVLen = Subtarget.getRealMinVLen();
+ unsigned MaxELen = Subtarget.getELEN();
MVT EltVT = VT.getVectorElementType();
switch (EltVT.SimpleTy) {
@@ -1710,6 +1656,23 @@ static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
}
+/// Return the type of the mask type suitable for masking the provided
+/// vector type. This is simply an i1 element type vector of the same
+/// (possibly scalable) length.
+static MVT getMaskTypeFor(EVT VecVT) {
+ assert(VecVT.isVector());
+ ElementCount EC = VecVT.getVectorElementCount();
+ return MVT::getVectorVT(MVT::i1, EC);
+}
+
+/// Creates an all ones mask suitable for masking a vector of type VecTy with
+/// vector length VL. .
+static SDValue getAllOnesMask(MVT VecVT, SDValue VL, SDLoc DL,
+ SelectionDAG &DAG) {
+ MVT MaskVT = getMaskTypeFor(VecVT);
+ return DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
+}
+
// Gets the two common "VL" operands: an all-ones mask and the vector length.
// VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
// the vector type that it is contained in.
@@ -1720,9 +1683,8 @@ getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG,
MVT XLenVT = Subtarget.getXLenVT();
SDValue VL = VecVT.isFixedLengthVector()
? DAG.getConstant(VecVT.getVectorNumElements(), DL, XLenVT)
- : DAG.getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT);
- MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
- SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
+ : DAG.getRegister(RISCV::X0, XLenVT);
+ SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
return {Mask, VL};
}
@@ -1747,14 +1709,6 @@ bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles(
return false;
}
-bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
- // Only splats are currently supported.
- if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
- return true;
-
- return false;
-}
-
static SDValue lowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
// RISCV FP-to-int conversions saturate to the destination register size, but
@@ -1796,7 +1750,7 @@ static SDValue lowerFTRUNC_FCEIL_FFLOOR(SDValue Op, SelectionDAG &DAG) {
SDLoc DL(Op);
// Freeze the source since we are increasing the number of uses.
- SDValue Src = DAG.getNode(ISD::FREEZE, DL, VT, Op.getOperand(0));
+ SDValue Src = DAG.getFreeze(Op.getOperand(0));
// Truncate to integer and convert back to FP.
MVT IntVT = VT.changeVectorElementTypeToInteger();
@@ -1844,21 +1798,56 @@ static SDValue lowerFTRUNC_FCEIL_FFLOOR(SDValue Op, SelectionDAG &DAG) {
return DAG.getSelect(DL, VT, Setcc, Truncated, Src);
}
-static SDValue lowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG,
- const RISCVSubtarget &Subtarget) {
+// ISD::FROUND is defined to round to nearest with ties rounding away from 0.
+// This mode isn't supported in vector hardware on RISCV. But as long as we
+// aren't compiling with trapping math, we can emulate this with
+// floor(X + copysign(nextafter(0.5, 0.0), X)).
+// FIXME: Could be shorter by changing rounding mode, but we don't have FRM
+// dependencies modeled yet.
+// FIXME: Use masked operations to avoid final merge.
+static SDValue lowerFROUND(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
- assert(VT.isFixedLengthVector() && "Unexpected vector!");
-
- MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
+ assert(VT.isVector() && "Unexpected type");
SDLoc DL(Op);
- SDValue Mask, VL;
- std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
- unsigned Opc =
- VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
- SDValue Splat = DAG.getNode(Opc, DL, ContainerVT, Op.getOperand(0), VL);
- return convertFromScalableVector(VT, Splat, DAG, Subtarget);
+ // Freeze the source since we are increasing the number of uses.
+ SDValue Src = DAG.getFreeze(Op.getOperand(0));
+
+ // We do the conversion on the absolute value and fix the sign at the end.
+ SDValue Abs = DAG.getNode(ISD::FABS, DL, VT, Src);
+
+ const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
+ bool Ignored;
+ APFloat Point5Pred = APFloat(0.5f);
+ Point5Pred.convert(FltSem, APFloat::rmNearestTiesToEven, &Ignored);
+ Point5Pred.next(/*nextDown*/ true);
+
+ // Add the adjustment.
+ SDValue Adjust = DAG.getNode(ISD::FADD, DL, VT, Abs,
+ DAG.getConstantFP(Point5Pred, DL, VT));
+
+ // Truncate to integer and convert back to fp.
+ MVT IntVT = VT.changeVectorElementTypeToInteger();
+ SDValue Truncated = DAG.getNode(ISD::FP_TO_SINT, DL, IntVT, Adjust);
+ Truncated = DAG.getNode(ISD::SINT_TO_FP, DL, VT, Truncated);
+
+ // Restore the original sign.
+ Truncated = DAG.getNode(ISD::FCOPYSIGN, DL, VT, Truncated, Src);
+
+ // Determine the largest integer that can be represented exactly. This and
+ // values larger than it don't have any fractional bits so don't need to
+ // be converted.
+ unsigned Precision = APFloat::semanticsPrecision(FltSem);
+ APFloat MaxVal = APFloat(FltSem);
+ MaxVal.convertFromAPInt(APInt::getOneBitSet(Precision, Precision - 1),
+ /*IsSigned*/ false, APFloat::rmNearestTiesToEven);
+ SDValue MaxValNode = DAG.getConstantFP(MaxVal, DL, VT);
+
+ // If abs(Src) was larger than MaxVal or nan, keep it.
+ MVT SetccVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
+ SDValue Setcc = DAG.getSetCC(DL, SetccVT, Abs, MaxValNode, ISD::SETOLT);
+ return DAG.getSelect(DL, VT, Setcc, Truncated, Src);
}
struct VIDSequence {
@@ -1908,37 +1897,27 @@ static Optional<VIDSequence> isSimpleVIDSequence(SDValue Op) {
// A zero-value value difference means that we're somewhere in the middle
// of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a
// step change before evaluating the sequence.
- if (ValDiff != 0) {
- int64_t Remainder = ValDiff % IdxDiff;
- // Normalize the step if it's greater than 1.
- if (Remainder != ValDiff) {
- // The difference must cleanly divide the element span.
- if (Remainder != 0)
- return None;
- ValDiff /= IdxDiff;
- IdxDiff = 1;
- }
-
- if (!SeqStepNum)
- SeqStepNum = ValDiff;
- else if (ValDiff != SeqStepNum)
- return None;
+ if (ValDiff == 0)
+ continue;
- if (!SeqStepDenom)
- SeqStepDenom = IdxDiff;
- else if (IdxDiff != *SeqStepDenom)
+ int64_t Remainder = ValDiff % IdxDiff;
+ // Normalize the step if it's greater than 1.
+ if (Remainder != ValDiff) {
+ // The difference must cleanly divide the element span.
+ if (Remainder != 0)
return None;
+ ValDiff /= IdxDiff;
+ IdxDiff = 1;
}
- }
- // Record and/or check any addend.
- if (SeqStepNum && SeqStepDenom) {
- uint64_t ExpectedVal =
- (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
- int64_t Addend = SignExtend64(Val - ExpectedVal, EltSizeInBits);
- if (!SeqAddend)
- SeqAddend = Addend;
- else if (SeqAddend != Addend)
+ if (!SeqStepNum)
+ SeqStepNum = ValDiff;
+ else if (ValDiff != SeqStepNum)
+ return None;
+
+ if (!SeqStepDenom)
+ SeqStepDenom = IdxDiff;
+ else if (IdxDiff != *SeqStepDenom)
return None;
}
@@ -1946,14 +1925,68 @@ static Optional<VIDSequence> isSimpleVIDSequence(SDValue Op) {
if (!PrevElt || PrevElt->first != Val)
PrevElt = std::make_pair(Val, Idx);
}
- // We need to have logged both a step and an addend for this to count as
- // a legal index sequence.
- if (!SeqStepNum || !SeqStepDenom || !SeqAddend)
+
+ // We need to have logged a step for this to count as a legal index sequence.
+ if (!SeqStepNum || !SeqStepDenom)
return None;
+ // Loop back through the sequence and validate elements we might have skipped
+ // while waiting for a valid step. While doing this, log any sequence addend.
+ for (unsigned Idx = 0; Idx < NumElts; Idx++) {
+ if (Op.getOperand(Idx).isUndef())
+ continue;
+ uint64_t Val = Op.getConstantOperandVal(Idx) &
+ maskTrailingOnes<uint64_t>(EltSizeInBits);
+ uint64_t ExpectedVal =
+ (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom;
+ int64_t Addend = SignExtend64(Val - ExpectedVal, EltSizeInBits);
+ if (!SeqAddend)
+ SeqAddend = Addend;
+ else if (Addend != SeqAddend)
+ return None;
+ }
+
+ assert(SeqAddend && "Must have an addend if we have a step");
+
return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend};
}
+// Match a splatted value (SPLAT_VECTOR/BUILD_VECTOR) of an EXTRACT_VECTOR_ELT
+// and lower it as a VRGATHER_VX_VL from the source vector.
+static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
+ SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ if (SplatVal.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+ return SDValue();
+ SDValue Vec = SplatVal.getOperand(0);
+ // Only perform this optimization on vectors of the same size for simplicity.
+ // Don't perform this optimization for i1 vectors.
+ // FIXME: Support i1 vectors, maybe by promoting to i8?
+ if (Vec.getValueType() != VT || VT.getVectorElementType() == MVT::i1)
+ return SDValue();
+ SDValue Idx = SplatVal.getOperand(1);
+ // The index must be a legal type.
+ if (Idx.getValueType() != Subtarget.getXLenVT())
+ return SDValue();
+
+ MVT ContainerVT = VT;
+ if (VT.isFixedLengthVector()) {
+ ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
+ Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
+ }
+
+ SDValue Mask, VL;
+ std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
+
+ SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, Vec,
+ Idx, Mask, DAG.getUNDEF(ContainerVT), VL);
+
+ if (!VT.isFixedLengthVector())
+ return Gather;
+
+ return convertFromScalableVector(VT, Gather, DAG, Subtarget);
+}
+
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
MVT VT = Op.getSimpleValueType();
@@ -1989,8 +2022,7 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// codegen across RV32 and RV64.
unsigned NumViaIntegerBits =
std::min(std::max(NumElts, 8u), Subtarget.getXLen());
- NumViaIntegerBits = std::min(NumViaIntegerBits,
- Subtarget.getMaxELENForFixedLengthVectors());
+ NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELEN());
if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
// If we have to use more than one INSERT_VECTOR_ELT then this
// optimization is likely to increase code size; avoid peforming it in
@@ -2012,7 +2044,7 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// our vector and clear our accumulated data.
if (I != 0 && I % NumViaIntegerBits == 0) {
if (NumViaIntegerBits <= 32)
- Bits = SignExtend64(Bits, 32);
+ Bits = SignExtend64<32>(Bits);
SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec,
Elt, DAG.getConstant(IntegerEltIdx, DL, XLenVT));
@@ -2028,7 +2060,7 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// Insert the (remaining) scalar value into position in our integer
// vector type.
if (NumViaIntegerBits <= 32)
- Bits = SignExtend64(Bits, 32);
+ Bits = SignExtend64<32>(Bits);
SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec, Elt,
DAG.getConstant(IntegerEltIdx, DL, XLenVT));
@@ -2077,9 +2109,12 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
}
if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
+ if (auto Gather = matchSplatAsGather(Splat, VT, DL, DAG, Subtarget))
+ return Gather;
unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
: RISCVISD::VMV_V_X_VL;
- Splat = DAG.getNode(Opc, DL, ContainerVT, Splat, VL);
+ Splat =
+ DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Splat, VL);
return convertFromScalableVector(VT, Splat, DAG, Subtarget);
}
@@ -2109,7 +2144,8 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// a single addi instruction.
if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) ||
(StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) &&
- isPowerOf2_32(StepDenominator) && isInt<5>(Addend)) {
+ isPowerOf2_32(StepDenominator) &&
+ (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) {
SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL);
// Convert right out of the scalable type so we can use standard ISD
// nodes for the rest of the computation. If we used scalable types with
@@ -2118,18 +2154,18 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
VID = convertFromScalableVector(VT, VID, DAG, Subtarget);
if ((StepOpcode == ISD::MUL && SplatStepVal != 1) ||
(StepOpcode == ISD::SHL && SplatStepVal != 0)) {
- SDValue SplatStep = DAG.getSplatVector(
+ SDValue SplatStep = DAG.getSplatBuildVector(
VT, DL, DAG.getConstant(SplatStepVal, DL, XLenVT));
VID = DAG.getNode(StepOpcode, DL, VT, VID, SplatStep);
}
if (StepDenominator != 1) {
- SDValue SplatStep = DAG.getSplatVector(
+ SDValue SplatStep = DAG.getSplatBuildVector(
VT, DL, DAG.getConstant(Log2_64(StepDenominator), DL, XLenVT));
VID = DAG.getNode(ISD::SRL, DL, VT, VID, SplatStep);
}
if (Addend != 0 || Negate) {
- SDValue SplatAddend =
- DAG.getSplatVector(VT, DL, DAG.getConstant(Addend, DL, XLenVT));
+ SDValue SplatAddend = DAG.getSplatBuildVector(
+ VT, DL, DAG.getConstant(Addend, DL, XLenVT));
VID = DAG.getNode(Negate ? ISD::SUB : ISD::ADD, DL, VT, SplatAddend, VID);
}
return VID;
@@ -2172,7 +2208,7 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// On RV64, sign-extend from 32 to 64 bits where possible in order to
// achieve better constant materializion.
if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
- SplatValue = SignExtend64(SplatValue, 32);
+ SplatValue = SignExtend64<32>(SplatValue);
// Since we can't introduce illegal i64 types at this stage, we can only
// perform an i64 splat on RV32 if it is its own sign-extended value. That
@@ -2187,6 +2223,7 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
SDValue Splat =
DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
+ DAG.getUNDEF(ViaContainerVT),
DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
return DAG.getBitcast(VT, Splat);
@@ -2274,57 +2311,66 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
return SDValue();
}
-static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Lo,
- SDValue Hi, SDValue VL, SelectionDAG &DAG) {
+static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
+ SDValue Lo, SDValue Hi, SDValue VL,
+ SelectionDAG &DAG) {
+ if (!Passthru)
+ Passthru = DAG.getUNDEF(VT);
if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
// If Hi constant is all the same sign bit as Lo, lower this as a custom
// node in order to try and match RVV vector/scalar instructions.
if ((LoC >> 31) == HiC)
- return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Lo, VL);
+ return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Lo, VL);
- // If vl is equal to VLMax and Hi constant is equal to Lo, we could use
+ // If vl is equal to XLEN_MAX and Hi constant is equal to Lo, we could use
// vmv.v.x whose EEW = 32 to lower it.
auto *Const = dyn_cast<ConstantSDNode>(VL);
- if (LoC == HiC && Const && Const->getSExtValue() == RISCV::VLMaxSentinel) {
+ if (LoC == HiC && Const && Const->isAllOnesValue()) {
MVT InterVT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
// TODO: if vl <= min(VLMAX), we can also do this. But we could not
// access the subtarget here now.
- auto InterVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterVT, Lo, VL);
+ auto InterVec = DAG.getNode(
+ RISCVISD::VMV_V_X_VL, DL, InterVT, DAG.getUNDEF(InterVT), Lo,
+ DAG.getRegister(RISCV::X0, MVT::i32));
return DAG.getNode(ISD::BITCAST, DL, VT, InterVec);
}
}
// Fall back to a stack store and stride x0 vector load.
- return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Lo, Hi, VL);
+ return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VT, Passthru, Lo,
+ Hi, VL);
}
// Called by type legalization to handle splat of i64 on RV32.
// FIXME: We can optimize this when the type has sign or zero bits in one
// of the halves.
-static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Scalar,
- SDValue VL, SelectionDAG &DAG) {
+static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Passthru,
+ SDValue Scalar, SDValue VL,
+ SelectionDAG &DAG) {
assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
DAG.getConstant(0, DL, MVT::i32));
SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
DAG.getConstant(1, DL, MVT::i32));
- return splatPartsI64WithVL(DL, VT, Lo, Hi, VL, DAG);
+ return splatPartsI64WithVL(DL, VT, Passthru, Lo, Hi, VL, DAG);
}
// This function lowers a splat of a scalar operand Splat with the vector
// length VL. It ensures the final sequence is type legal, which is useful when
// lowering a splat after type legalization.
-static SDValue lowerScalarSplat(SDValue Scalar, SDValue VL, MVT VT, SDLoc DL,
- SelectionDAG &DAG,
+static SDValue lowerScalarSplat(SDValue Passthru, SDValue Scalar, SDValue VL,
+ MVT VT, SDLoc DL, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
+ bool HasPassthru = Passthru && !Passthru.isUndef();
+ if (!HasPassthru && !Passthru)
+ Passthru = DAG.getUNDEF(VT);
if (VT.isFloatingPoint()) {
// If VL is 1, we could use vfmv.s.f.
if (isOneConstant(VL))
- return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, DAG.getUNDEF(VT),
- Scalar, VL);
- return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Scalar, VL);
+ return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, Passthru, Scalar, VL);
+ return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Passthru, Scalar, VL);
}
MVT XLenVT = Subtarget.getXLenVT();
@@ -2343,55 +2389,25 @@ static SDValue lowerScalarSplat(SDValue Scalar, SDValue VL, MVT VT, SDLoc DL,
// use vmv.s.x.
if (isOneConstant(VL) &&
(!Const || isNullConstant(Scalar) || !isInt<5>(Const->getSExtValue())))
- return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, DAG.getUNDEF(VT), Scalar,
- VL);
- return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Scalar, VL);
+ return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru, Scalar, VL);
+ return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL);
}
assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
"Unexpected scalar for splat lowering!");
if (isOneConstant(VL) && isNullConstant(Scalar))
- return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, DAG.getUNDEF(VT),
+ return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, Passthru,
DAG.getConstant(0, DL, XLenVT), VL);
// Otherwise use the more complicated splatting algorithm.
- return splatSplitI64WithVL(DL, VT, Scalar, VL, DAG);
-}
-
-// Is the mask a slidedown that shifts in undefs.
-static int matchShuffleAsSlideDown(ArrayRef<int> Mask) {
- int Size = Mask.size();
-
- // Elements shifted in should be undef.
- auto CheckUndefs = [&](int Shift) {
- for (int i = Size - Shift; i != Size; ++i)
- if (Mask[i] >= 0)
- return false;
- return true;
- };
-
- // Elements should be shifted or undef.
- auto MatchShift = [&](int Shift) {
- for (int i = 0; i != Size - Shift; ++i)
- if (Mask[i] >= 0 && Mask[i] != Shift + i)
- return false;
- return true;
- };
-
- // Try all possible shifts.
- for (int Shift = 1; Shift != Size; ++Shift)
- if (CheckUndefs(Shift) && MatchShift(Shift))
- return Shift;
-
- // No match.
- return -1;
+ return splatSplitI64WithVL(DL, VT, Passthru, Scalar, VL, DAG);
}
static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, bool &SwapSources,
const RISCVSubtarget &Subtarget) {
// We need to be able to widen elements to the next larger integer type.
- if (VT.getScalarSizeInBits() >= Subtarget.getMaxELENForFixedLengthVectors())
+ if (VT.getScalarSizeInBits() >= Subtarget.getELEN())
return false;
int Size = Mask.size();
@@ -2430,6 +2446,79 @@ static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, bool &SwapSources,
return true;
}
+/// Match shuffles that concatenate two vectors, rotate the concatenation,
+/// and then extract the original number of elements from the rotated result.
+/// This is equivalent to vector.splice or X86's PALIGNR instruction. The
+/// returned rotation amount is for a rotate right, where elements move from
+/// higher elements to lower elements. \p LoSrc indicates the first source
+/// vector of the rotate or -1 for undef. \p HiSrc indicates the second vector
+/// of the rotate or -1 for undef. At least one of \p LoSrc and \p HiSrc will be
+/// 0 or 1 if a rotation is found.
+///
+/// NOTE: We talk about rotate to the right which matches how bit shift and
+/// rotate instructions are described where LSBs are on the right, but LLVM IR
+/// and the table below write vectors with the lowest elements on the left.
+static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef<int> Mask) {
+ int Size = Mask.size();
+
+ // We need to detect various ways of spelling a rotation:
+ // [11, 12, 13, 14, 15, 0, 1, 2]
+ // [-1, 12, 13, 14, -1, -1, 1, -1]
+ // [-1, -1, -1, -1, -1, -1, 1, 2]
+ // [ 3, 4, 5, 6, 7, 8, 9, 10]
+ // [-1, 4, 5, 6, -1, -1, 9, -1]
+ // [-1, 4, 5, 6, -1, -1, -1, -1]
+ int Rotation = 0;
+ LoSrc = -1;
+ HiSrc = -1;
+ for (int i = 0; i != Size; ++i) {
+ int M = Mask[i];
+ if (M < 0)
+ continue;
+
+ // Determine where a rotate vector would have started.
+ int StartIdx = i - (M % Size);
+ // The identity rotation isn't interesting, stop.
+ if (StartIdx == 0)
+ return -1;
+
+ // If we found the tail of a vector the rotation must be the missing
+ // front. If we found the head of a vector, it must be how much of the
+ // head.
+ int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
+
+ if (Rotation == 0)
+ Rotation = CandidateRotation;
+ else if (Rotation != CandidateRotation)
+ // The rotations don't match, so we can't match this mask.
+ return -1;
+
+ // Compute which value this mask is pointing at.
+ int MaskSrc = M < Size ? 0 : 1;
+
+ // Compute which of the two target values this index should be assigned to.
+ // This reflects whether the high elements are remaining or the low elemnts
+ // are remaining.
+ int &TargetSrc = StartIdx < 0 ? HiSrc : LoSrc;
+
+ // Either set up this value if we've not encountered it before, or check
+ // that it remains consistent.
+ if (TargetSrc < 0)
+ TargetSrc = MaskSrc;
+ else if (TargetSrc != MaskSrc)
+ // This may be a rotation, but it pulls from the inputs in some
+ // unsupported interleaving.
+ return -1;
+ }
+
+ // Check that we successfully analyzed the mask, and normalize the results.
+ assert(Rotation != 0 && "Failed to locate a viable rotation!");
+ assert((LoSrc >= 0 || HiSrc >= 0) &&
+ "Failed to find a rotated input vector!");
+
+ return Rotation;
+}
+
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
SDValue V1 = Op.getOperand(0);
@@ -2506,33 +2595,59 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
unsigned Opc =
VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
- SDValue Splat = DAG.getNode(Opc, DL, ContainerVT, V, VL);
+ SDValue Splat =
+ DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), V, VL);
return convertFromScalableVector(VT, Splat, DAG, Subtarget);
}
V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
assert(Lane < (int)NumElts && "Unexpected lane!");
- SDValue Gather =
- DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, V1,
- DAG.getConstant(Lane, DL, XLenVT), TrueMask, VL);
+ SDValue Gather = DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT,
+ V1, DAG.getConstant(Lane, DL, XLenVT),
+ TrueMask, DAG.getUNDEF(ContainerVT), VL);
return convertFromScalableVector(VT, Gather, DAG, Subtarget);
}
}
ArrayRef<int> Mask = SVN->getMask();
- // Try to match as a slidedown.
- int SlideAmt = matchShuffleAsSlideDown(Mask);
- if (SlideAmt >= 0) {
- // TODO: Should we reduce the VL to account for the upper undef elements?
- // Requires additional vsetvlis, but might be faster to execute.
- V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
- SDValue SlideDown =
- DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
- DAG.getUNDEF(ContainerVT), V1,
- DAG.getConstant(SlideAmt, DL, XLenVT),
- TrueMask, VL);
- return convertFromScalableVector(VT, SlideDown, DAG, Subtarget);
+ // Lower rotations to a SLIDEDOWN and a SLIDEUP. One of the source vectors may
+ // be undef which can be handled with a single SLIDEDOWN/UP.
+ int LoSrc, HiSrc;
+ int Rotation = isElementRotate(LoSrc, HiSrc, Mask);
+ if (Rotation > 0) {
+ SDValue LoV, HiV;
+ if (LoSrc >= 0) {
+ LoV = LoSrc == 0 ? V1 : V2;
+ LoV = convertToScalableVector(ContainerVT, LoV, DAG, Subtarget);
+ }
+ if (HiSrc >= 0) {
+ HiV = HiSrc == 0 ? V1 : V2;
+ HiV = convertToScalableVector(ContainerVT, HiV, DAG, Subtarget);
+ }
+
+ // We found a rotation. We need to slide HiV down by Rotation. Then we need
+ // to slide LoV up by (NumElts - Rotation).
+ unsigned InvRotate = NumElts - Rotation;
+
+ SDValue Res = DAG.getUNDEF(ContainerVT);
+ if (HiV) {
+ // If we are doing a SLIDEDOWN+SLIDEUP, reduce the VL for the SLIDEDOWN.
+ // FIXME: If we are only doing a SLIDEDOWN, don't reduce the VL as it
+ // causes multiple vsetvlis in some test cases such as lowering
+ // reduce.mul
+ SDValue DownVL = VL;
+ if (LoV)
+ DownVL = DAG.getConstant(InvRotate, DL, XLenVT);
+ Res =
+ DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT, Res, HiV,
+ DAG.getConstant(Rotation, DL, XLenVT), TrueMask, DownVL);
+ }
+ if (LoV)
+ Res = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Res, LoV,
+ DAG.getConstant(InvRotate, DL, XLenVT), TrueMask, VL);
+
+ return convertFromScalableVector(VT, Res, DAG, Subtarget);
}
// Detect an interleave shuffle and lower to
@@ -2576,18 +2691,17 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
// Freeze V2 since we use it twice and we need to be sure that the add and
// multiply see the same value.
- V2 = DAG.getNode(ISD::FREEZE, DL, IntHalfVT, V2);
+ V2 = DAG.getFreeze(V2);
// Recreate TrueMask using the widened type's element count.
- MVT MaskVT =
- MVT::getVectorVT(MVT::i1, HalfContainerVT.getVectorElementCount());
- TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
+ TrueMask = getAllOnesMask(HalfContainerVT, VL, DL, DAG);
// Widen V1 and V2 with 0s and add one copy of V2 to V1.
SDValue Add = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideIntContainerVT, V1,
V2, TrueMask, VL);
// Create 2^eltbits - 1 copies of V2 by multiplying by the largest integer.
SDValue Multiplier = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntHalfVT,
+ DAG.getUNDEF(IntHalfVT),
DAG.getAllOnesConstant(DL, XLenVT));
SDValue WidenMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideIntContainerVT,
V2, Multiplier, TrueMask, VL);
@@ -2691,7 +2805,8 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
// TODO: This doesn't trigger for i64 vectors on RV32, since there we
// encounter a bitcasted BUILD_VECTOR with low/high i32 values.
if (SDValue SplatValue = DAG.getSplatValue(V1, /*LegalTypes*/ true)) {
- Gather = lowerScalarSplat(SplatValue, VL, ContainerVT, DL, DAG, Subtarget);
+ Gather = lowerScalarSplat(SDValue(), SplatValue, VL, ContainerVT, DL, DAG,
+ Subtarget);
} else {
V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
// If only one index is used, we can use a "splat" vrgather.
@@ -2699,16 +2814,16 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
// that's beneficial.
if (LHSIndexCounts.size() == 1) {
int SplatIndex = LHSIndexCounts.begin()->getFirst();
- Gather =
- DAG.getNode(GatherVXOpc, DL, ContainerVT, V1,
- DAG.getConstant(SplatIndex, DL, XLenVT), TrueMask, VL);
+ Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V1,
+ DAG.getConstant(SplatIndex, DL, XLenVT), TrueMask,
+ DAG.getUNDEF(ContainerVT), VL);
} else {
SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
LHSIndices =
convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V1, LHSIndices,
- TrueMask, VL);
+ TrueMask, DAG.getUNDEF(ContainerVT), VL);
}
}
@@ -2716,45 +2831,46 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
// additional vrgather.
if (!V2.isUndef()) {
V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
+
+ MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
+ SelectMask =
+ convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget);
+
// If only one index is used, we can use a "splat" vrgather.
// TODO: We can splat the most-common index and fix-up any stragglers, if
// that's beneficial.
if (RHSIndexCounts.size() == 1) {
int SplatIndex = RHSIndexCounts.begin()->getFirst();
- V2 = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2,
- DAG.getConstant(SplatIndex, DL, XLenVT), TrueMask, VL);
+ Gather = DAG.getNode(GatherVXOpc, DL, ContainerVT, V2,
+ DAG.getConstant(SplatIndex, DL, XLenVT), SelectMask,
+ Gather, VL);
} else {
SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS);
RHSIndices =
convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget);
- V2 = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices, TrueMask,
- VL);
+ Gather = DAG.getNode(GatherVVOpc, DL, ContainerVT, V2, RHSIndices,
+ SelectMask, Gather, VL);
}
-
- MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
- SelectMask =
- convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget);
-
- Gather = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, SelectMask, V2,
- Gather, VL);
}
return convertFromScalableVector(VT, Gather, DAG, Subtarget);
}
-static SDValue getRVVFPExtendOrRound(SDValue Op, MVT VT, MVT ContainerVT,
- SDLoc DL, SelectionDAG &DAG,
- const RISCVSubtarget &Subtarget) {
- if (VT.isScalableVector())
- return DAG.getFPExtendOrRound(Op, DL, VT);
- assert(VT.isFixedLengthVector() &&
- "Unexpected value type for RVV FP extend/round lowering");
- SDValue Mask, VL;
- std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
- unsigned RVVOpc = ContainerVT.bitsGT(Op.getSimpleValueType())
- ? RISCVISD::FP_EXTEND_VL
- : RISCVISD::FP_ROUND_VL;
- return DAG.getNode(RVVOpc, DL, ContainerVT, Op, Mask, VL);
+bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
+ // Support splats for any type. These should type legalize well.
+ if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
+ return true;
+
+ // Only support legal VTs for other shuffles for now.
+ if (!isTypeLegal(VT))
+ return false;
+
+ MVT SVT = VT.getSimpleVT();
+
+ bool SwapSources;
+ int LoSrc, HiSrc;
+ return (isElementRotate(LoSrc, HiSrc, M) > 0) ||
+ isInterleaveShuffle(M, SVT, SwapSources, Subtarget);
}
// Lower CTLZ_ZERO_UNDEF or CTTZ_ZERO_UNDEF by converting to FP and extracting
@@ -2868,6 +2984,32 @@ SDValue RISCVTargetLowering::expandUnalignedRVVStore(SDValue Op,
Store->getMemOperand()->getFlags());
}
+static SDValue lowerConstant(SDValue Op, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ assert(Op.getValueType() == MVT::i64 && "Unexpected VT");
+
+ int64_t Imm = cast<ConstantSDNode>(Op)->getSExtValue();
+
+ // All simm32 constants should be handled by isel.
+ // NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making
+ // this check redundant, but small immediates are common so this check
+ // should have better compile time.
+ if (isInt<32>(Imm))
+ return Op;
+
+ // We only need to cost the immediate, if constant pool lowering is enabled.
+ if (!Subtarget.useConstantPoolForLargeInts())
+ return Op;
+
+ RISCVMatInt::InstSeq Seq =
+ RISCVMatInt::generateInstSeq(Imm, Subtarget.getFeatureBits());
+ if (Seq.size() <= Subtarget.getMaxBuildIntsCost())
+ return Op;
+
+ // Expand to a constant pool using the default expansion code.
+ return SDValue();
+}
+
SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
@@ -2883,6 +3025,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerJumpTable(Op, DAG);
case ISD::GlobalTLSAddress:
return lowerGlobalTLSAddress(Op, DAG);
+ case ISD::Constant:
+ return lowerConstant(Op, DAG, Subtarget);
case ISD::SELECT:
return lowerSELECT(Op, DAG);
case ISD::BRCOND:
@@ -2905,6 +3049,30 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
SDValue Op0 = Op.getOperand(0);
EVT Op0VT = Op0.getValueType();
MVT XLenVT = Subtarget.getXLenVT();
+ if (VT == MVT::f16 && Op0VT == MVT::i16 && Subtarget.hasStdExtZfh()) {
+ SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
+ SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
+ return FPConv;
+ }
+ if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
+ Subtarget.hasStdExtF()) {
+ SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
+ SDValue FPConv =
+ DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
+ return FPConv;
+ }
+
+ // Consider other scalar<->scalar casts as legal if the types are legal.
+ // Otherwise expand them.
+ if (!VT.isVector() && !Op0VT.isVector()) {
+ if (isTypeLegal(VT) && isTypeLegal(Op0VT))
+ return Op;
+ return SDValue();
+ }
+
+ assert(!VT.isScalableVector() && !Op0VT.isScalableVector() &&
+ "Unexpected types");
+
if (VT.isFixedLengthVector()) {
// We can handle fixed length vector bitcasts with a simple replacement
// in isel.
@@ -2934,18 +3102,6 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
DAG.getConstant(0, DL, XLenVT));
}
- if (VT == MVT::f16 && Op0VT == MVT::i16 && Subtarget.hasStdExtZfh()) {
- SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
- SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
- return FPConv;
- }
- if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
- Subtarget.hasStdExtF()) {
- SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
- SDValue FPConv =
- DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
- return FPConv;
- }
return SDValue();
}
case ISD::INTRINSIC_WO_CHAIN:
@@ -3002,55 +3158,11 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
}
return DAG.getNode(Opc, DL, VT, Op0, Op1, ShAmt);
}
- case ISD::TRUNCATE: {
- SDLoc DL(Op);
- MVT VT = Op.getSimpleValueType();
+ case ISD::TRUNCATE:
// Only custom-lower vector truncates
- if (!VT.isVector())
+ if (!Op.getSimpleValueType().isVector())
return Op;
-
- // Truncates to mask types are handled differently
- if (VT.getVectorElementType() == MVT::i1)
- return lowerVectorMaskTrunc(Op, DAG);
-
- // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
- // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
- // truncate by one power of two at a time.
- MVT DstEltVT = VT.getVectorElementType();
-
- SDValue Src = Op.getOperand(0);
- MVT SrcVT = Src.getSimpleValueType();
- MVT SrcEltVT = SrcVT.getVectorElementType();
-
- assert(DstEltVT.bitsLT(SrcEltVT) &&
- isPowerOf2_64(DstEltVT.getSizeInBits()) &&
- isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
- "Unexpected vector truncate lowering");
-
- MVT ContainerVT = SrcVT;
- if (SrcVT.isFixedLengthVector()) {
- ContainerVT = getContainerForFixedLengthVector(SrcVT);
- Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
- }
-
- SDValue Result = Src;
- SDValue Mask, VL;
- std::tie(Mask, VL) =
- getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
- LLVMContext &Context = *DAG.getContext();
- const ElementCount Count = ContainerVT.getVectorElementCount();
- do {
- SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
- EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
- Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result,
- Mask, VL);
- } while (SrcEltVT != DstEltVT);
-
- if (SrcVT.isFixedLengthVector())
- Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
-
- return Result;
- }
+ return lowerVectorTruncLike(Op, DAG);
case ISD::ANY_EXTEND:
case ISD::ZERO_EXTEND:
if (Op.getOperand(0).getValueType().isVector() &&
@@ -3076,28 +3188,26 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
// minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
// vscale as VLENB / 8.
static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");
- if (Subtarget.getMinVLen() < RISCV::RVVBitsPerBlock)
+ if (Subtarget.getRealMinVLen() < RISCV::RVVBitsPerBlock)
report_fatal_error("Support for VLEN==32 is incomplete.");
- if (isa<ConstantSDNode>(Op.getOperand(0))) {
- // We assume VLENB is a multiple of 8. We manually choose the best shift
- // here because SimplifyDemandedBits isn't always able to simplify it.
- uint64_t Val = Op.getConstantOperandVal(0);
- if (isPowerOf2_64(Val)) {
- uint64_t Log2 = Log2_64(Val);
- if (Log2 < 3)
- return DAG.getNode(ISD::SRL, DL, VT, VLENB,
- DAG.getConstant(3 - Log2, DL, VT));
- if (Log2 > 3)
- return DAG.getNode(ISD::SHL, DL, VT, VLENB,
- DAG.getConstant(Log2 - 3, DL, VT));
- return VLENB;
- }
- // If the multiplier is a multiple of 8, scale it down to avoid needing
- // to shift the VLENB value.
- if ((Val % 8) == 0)
- return DAG.getNode(ISD::MUL, DL, VT, VLENB,
- DAG.getConstant(Val / 8, DL, VT));
- }
+ // We assume VLENB is a multiple of 8. We manually choose the best shift
+ // here because SimplifyDemandedBits isn't always able to simplify it.
+ uint64_t Val = Op.getConstantOperandVal(0);
+ if (isPowerOf2_64(Val)) {
+ uint64_t Log2 = Log2_64(Val);
+ if (Log2 < 3)
+ return DAG.getNode(ISD::SRL, DL, VT, VLENB,
+ DAG.getConstant(3 - Log2, DL, VT));
+ if (Log2 > 3)
+ return DAG.getNode(ISD::SHL, DL, VT, VLENB,
+ DAG.getConstant(Log2 - 3, DL, VT));
+ return VLENB;
+ }
+ // If the multiplier is a multiple of 8, scale it down to avoid needing
+ // to shift the VLENB value.
+ if ((Val % 8) == 0)
+ return DAG.getNode(ISD::MUL, DL, VT, VLENB,
+ DAG.getConstant(Val / 8, DL, VT));
SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB,
DAG.getConstant(3, DL, VT));
@@ -3117,88 +3227,11 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
}
return SDValue();
}
- case ISD::FP_EXTEND: {
- // RVV can only do fp_extend to types double the size as the source. We
- // custom-lower f16->f64 extensions to two hops of ISD::FP_EXTEND, going
- // via f32.
- SDLoc DL(Op);
- MVT VT = Op.getSimpleValueType();
- SDValue Src = Op.getOperand(0);
- MVT SrcVT = Src.getSimpleValueType();
-
- // Prepare any fixed-length vector operands.
- MVT ContainerVT = VT;
- if (SrcVT.isFixedLengthVector()) {
- ContainerVT = getContainerForFixedLengthVector(VT);
- MVT SrcContainerVT =
- ContainerVT.changeVectorElementType(SrcVT.getVectorElementType());
- Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
- }
-
- if (!VT.isVector() || VT.getVectorElementType() != MVT::f64 ||
- SrcVT.getVectorElementType() != MVT::f16) {
- // For scalable vectors, we only need to close the gap between
- // vXf16->vXf64.
- if (!VT.isFixedLengthVector())
- return Op;
- // For fixed-length vectors, lower the FP_EXTEND to a custom "VL" version.
- Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget);
- return convertFromScalableVector(VT, Src, DAG, Subtarget);
- }
-
- MVT InterVT = VT.changeVectorElementType(MVT::f32);
- MVT InterContainerVT = ContainerVT.changeVectorElementType(MVT::f32);
- SDValue IntermediateExtend = getRVVFPExtendOrRound(
- Src, InterVT, InterContainerVT, DL, DAG, Subtarget);
-
- SDValue Extend = getRVVFPExtendOrRound(IntermediateExtend, VT, ContainerVT,
- DL, DAG, Subtarget);
- if (VT.isFixedLengthVector())
- return convertFromScalableVector(VT, Extend, DAG, Subtarget);
- return Extend;
- }
- case ISD::FP_ROUND: {
- // RVV can only do fp_round to types half the size as the source. We
- // custom-lower f64->f16 rounds via RVV's round-to-odd float
- // conversion instruction.
- SDLoc DL(Op);
- MVT VT = Op.getSimpleValueType();
- SDValue Src = Op.getOperand(0);
- MVT SrcVT = Src.getSimpleValueType();
-
- // Prepare any fixed-length vector operands.
- MVT ContainerVT = VT;
- if (VT.isFixedLengthVector()) {
- MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
- ContainerVT =
- SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
- Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
- }
-
- if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 ||
- SrcVT.getVectorElementType() != MVT::f64) {
- // For scalable vectors, we only need to close the gap between
- // vXf64<->vXf16.
- if (!VT.isFixedLengthVector())
- return Op;
- // For fixed-length vectors, lower the FP_ROUND to a custom "VL" version.
- Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget);
- return convertFromScalableVector(VT, Src, DAG, Subtarget);
- }
-
- SDValue Mask, VL;
- std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
-
- MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
- SDValue IntermediateRound =
- DAG.getNode(RISCVISD::VFNCVT_ROD_VL, DL, InterVT, Src, Mask, VL);
- SDValue Round = getRVVFPExtendOrRound(IntermediateRound, VT, ContainerVT,
- DL, DAG, Subtarget);
-
- if (VT.isFixedLengthVector())
- return convertFromScalableVector(VT, Round, DAG, Subtarget);
- return Round;
- }
+ case ISD::FP_EXTEND:
+ case ISD::FP_ROUND:
+ if (!Op.getValueType().isVector())
+ return Op;
+ return lowerVectorFPExtendOrRoundLike(Op, DAG);
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::SINT_TO_FP:
@@ -3221,10 +3254,10 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
bool IsInt2FP = SrcEltVT.isInteger();
// Widening conversions
- if (EltSize > SrcEltSize && (EltSize / SrcEltSize >= 4)) {
+ if (EltSize > (2 * SrcEltSize)) {
if (IsInt2FP) {
// Do a regular integer sign/zero extension then convert to float.
- MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltVT.getSizeInBits()),
+ MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize),
VT.getVectorElementCount());
unsigned ExtOpcode = Op.getOpcode() == ISD::UINT_TO_FP
? ISD::ZERO_EXTEND
@@ -3242,7 +3275,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
}
// Narrowing conversions
- if (SrcEltSize > EltSize && (SrcEltSize / EltSize >= 4)) {
+ if (SrcEltSize > (2 * EltSize)) {
if (IsInt2FP) {
// One narrowing int_to_fp, then an fp_round.
assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
@@ -3253,9 +3286,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
// FP2Int
// One narrowing fp_to_int, then truncate the integer. If the float isn't
// representable by the integer, the result is poison.
- MVT IVecVT =
- MVT::getVectorVT(MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2),
- VT.getVectorElementCount());
+ MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
+ VT.getVectorElementCount());
SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
}
@@ -3309,6 +3341,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::FCEIL:
case ISD::FFLOOR:
return lowerFTRUNC_FCEIL_FFLOOR(Op, DAG);
+ case ISD::FROUND:
+ return lowerFROUND(Op, DAG);
case ISD::VECREDUCE_ADD:
case ISD::VECREDUCE_UMAX:
case ISD::VECREDUCE_SMAX:
@@ -3350,12 +3384,14 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerSTEP_VECTOR(Op, DAG);
case ISD::VECTOR_REVERSE:
return lowerVECTOR_REVERSE(Op, DAG);
+ case ISD::VECTOR_SPLICE:
+ return lowerVECTOR_SPLICE(Op, DAG);
case ISD::BUILD_VECTOR:
return lowerBUILD_VECTOR(Op, DAG, Subtarget);
case ISD::SPLAT_VECTOR:
if (Op.getValueType().getVectorElementType() == MVT::i1)
return lowerVectorMaskSplat(Op, DAG);
- return lowerSPLAT_VECTOR(Op, DAG, Subtarget);
+ return SDValue();
case ISD::VECTOR_SHUFFLE:
return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
case ISD::CONCAT_VECTORS: {
@@ -3455,7 +3491,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
case ISD::FSQRT:
return lowerToScalableOp(Op, DAG, RISCVISD::FSQRT_VL);
case ISD::FMA:
- return lowerToScalableOp(Op, DAG, RISCVISD::FMA_VL);
+ return lowerToScalableOp(Op, DAG, RISCVISD::VFMADD_VL);
case ISD::SMIN:
return lowerToScalableOp(Op, DAG, RISCVISD::SMIN_VL);
case ISD::SMAX:
@@ -3487,6 +3523,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerGET_ROUNDING(Op, DAG);
case ISD::SET_ROUNDING:
return lowerSET_ROUNDING(Op, DAG);
+ case ISD::EH_DWARF_CFA:
+ return lowerEH_DWARF_CFA(Op, DAG);
case ISD::VP_SELECT:
return lowerVPOp(Op, DAG, RISCVISD::VSELECT_VL);
case ISD::VP_MERGE:
@@ -3525,6 +3563,35 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return lowerVPOp(Op, DAG, RISCVISD::FMUL_VL);
case ISD::VP_FDIV:
return lowerVPOp(Op, DAG, RISCVISD::FDIV_VL);
+ case ISD::VP_FNEG:
+ return lowerVPOp(Op, DAG, RISCVISD::FNEG_VL);
+ case ISD::VP_FMA:
+ return lowerVPOp(Op, DAG, RISCVISD::VFMADD_VL);
+ case ISD::VP_SIGN_EXTEND:
+ case ISD::VP_ZERO_EXTEND:
+ if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
+ return lowerVPExtMaskOp(Op, DAG);
+ return lowerVPOp(Op, DAG,
+ Op.getOpcode() == ISD::VP_SIGN_EXTEND
+ ? RISCVISD::VSEXT_VL
+ : RISCVISD::VZEXT_VL);
+ case ISD::VP_TRUNCATE:
+ return lowerVectorTruncLike(Op, DAG);
+ case ISD::VP_FP_EXTEND:
+ case ISD::VP_FP_ROUND:
+ return lowerVectorFPExtendOrRoundLike(Op, DAG);
+ case ISD::VP_FPTOSI:
+ return lowerVPFPIntConvOp(Op, DAG, RISCVISD::FP_TO_SINT_VL);
+ case ISD::VP_FPTOUI:
+ return lowerVPFPIntConvOp(Op, DAG, RISCVISD::FP_TO_UINT_VL);
+ case ISD::VP_SITOFP:
+ return lowerVPFPIntConvOp(Op, DAG, RISCVISD::SINT_TO_FP_VL);
+ case ISD::VP_UITOFP:
+ return lowerVPFPIntConvOp(Op, DAG, RISCVISD::UINT_TO_FP_VL);
+ case ISD::VP_SETCC:
+ if (Op.getOperand(0).getSimpleValueType().getVectorElementType() == MVT::i1)
+ return lowerVPSetCCMaskOp(Op, DAG);
+ return lowerVPOp(Op, DAG, RISCVISD::SETCC_VL);
}
}
@@ -3562,12 +3629,21 @@ SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
// Use PC-relative addressing to access the symbol. This generates the
// pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
// %pcrel_lo(auipc)).
- return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
+ return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
// Use PC-relative addressing to access the GOT for this symbol, then load
// the address from the GOT. This generates the pattern (PseudoLA sym),
// which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
- return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0);
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineMemOperand *MemOp = MF.getMachineMemOperand(
+ MachinePointerInfo::getGOT(MF),
+ MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOInvariant,
+ LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
+ SDValue Load =
+ DAG.getMemIntrinsicNode(RISCVISD::LA, DL, DAG.getVTList(Ty, MVT::Other),
+ {DAG.getEntryNode(), Addr}, Ty, MemOp);
+ return Load;
}
switch (getTargetMachine().getCodeModel()) {
@@ -3578,15 +3654,15 @@ SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
// address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
- SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
- return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0);
+ SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
+ return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNHi, AddrLo);
}
case CodeModel::Medium: {
// Generate a sequence for accessing addresses within any 2GiB range within
// the address space. This generates the pattern (PseudoLLA sym), which
// expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
- return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
+ return DAG.getNode(RISCVISD::LLA, DL, Ty, Addr);
}
}
}
@@ -3594,23 +3670,12 @@ SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
- EVT Ty = Op.getValueType();
GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
- int64_t Offset = N->getOffset();
- MVT XLenVT = Subtarget.getXLenVT();
+ assert(N->getOffset() == 0 && "unexpected offset in global node");
const GlobalValue *GV = N->getGlobal();
bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
- SDValue Addr = getAddr(N, DAG, IsLocal);
-
- // In order to maximise the opportunity for common subexpression elimination,
- // emit a separate ADD node for the global address offset instead of folding
- // it in the global address node. Later peephole optimisations may choose to
- // fold it back in when profitable.
- if (Offset != 0)
- return DAG.getNode(ISD::ADD, DL, Ty, Addr,
- DAG.getConstant(Offset, DL, XLenVT));
- return Addr;
+ return getAddr(N, DAG, IsLocal);
}
SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
@@ -3648,8 +3713,15 @@ SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
// the pattern (PseudoLA_TLS_IE sym), which expands to
// (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
- SDValue Load =
- SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineMemOperand *MemOp = MF.getMachineMemOperand(
+ MachinePointerInfo::getGOT(MF),
+ MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
+ MachineMemOperand::MOInvariant,
+ LLT(Ty.getSimpleVT()), Align(Ty.getFixedSizeInBits() / 8));
+ SDValue Load = DAG.getMemIntrinsicNode(
+ RISCVISD::LA_TLS_IE, DL, DAG.getVTList(Ty, MVT::Other),
+ {DAG.getEntryNode(), Addr}, Ty, MemOp);
// Add the thread pointer.
SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
@@ -3667,12 +3739,11 @@ SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
SDValue AddrLo =
DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO);
- SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
+ SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
- SDValue MNAdd = SDValue(
- DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd),
- 0);
- return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0);
+ SDValue MNAdd =
+ DAG.getNode(RISCVISD::ADD_TPREL, DL, Ty, MNHi, TPReg, AddrAdd);
+ return DAG.getNode(RISCVISD::ADD_LO, DL, Ty, MNAdd, AddrLo);
}
SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
@@ -3686,8 +3757,7 @@ SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
// This generates the pattern (PseudoLA_TLS_GD sym), which expands to
// (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
- SDValue Load =
- SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
+ SDValue Load = DAG.getNode(RISCVISD::LA_TLS_GD, DL, Ty, Addr);
// Prepare argument list to generate call.
ArgListTy Args;
@@ -3710,10 +3780,8 @@ SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
- EVT Ty = Op.getValueType();
GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
- int64_t Offset = N->getOffset();
- MVT XLenVT = Subtarget.getXLenVT();
+ assert(N->getOffset() == 0 && "unexpected offset in global node");
TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal());
@@ -3735,13 +3803,6 @@ SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
break;
}
- // In order to maximise the opportunity for common subexpression elimination,
- // emit a separate ADD node for the global address offset instead of folding
- // it in the global address node. Later peephole optimisations may choose to
- // fold it back in when profitable.
- if (Offset != 0)
- return DAG.getNode(ISD::ADD, DL, Ty, Addr,
- DAG.getConstant(Offset, DL, XLenVT));
return Addr;
}
@@ -3911,7 +3972,7 @@ SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
// if Shamt-XLEN < 0: // Shamt < XLEN
// Lo = Lo << Shamt
- // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
+ // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 ^ Shamt))
// else:
// Lo = 0
// Hi = Lo << (Shamt-XLEN)
@@ -3921,7 +3982,7 @@ SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
- SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
+ SDValue XLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, XLenMinus1);
SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
@@ -3950,7 +4011,7 @@ SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
// SRA expansion:
// if Shamt-XLEN < 0: // Shamt < XLEN
- // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
+ // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ XLEN-1))
// Hi = Hi >>s Shamt
// else:
// Lo = Hi >>s (Shamt-XLEN);
@@ -3958,7 +4019,7 @@ SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
//
// SRL expansion:
// if Shamt-XLEN < 0: // Shamt < XLEN
- // Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
+ // Lo = (Lo >>u Shamt) | ((Hi << 1) << (ShAmt ^ XLEN-1))
// Hi = Hi >>u Shamt
// else:
// Lo = Hi >>u (Shamt-XLEN);
@@ -3971,7 +4032,7 @@ SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
- SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
+ SDValue XLenMinus1Shamt = DAG.getNode(ISD::XOR, DL, VT, Shamt, XLenMinus1);
SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
@@ -4022,7 +4083,7 @@ SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
// Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
// illegal (currently only vXi64 RV32).
// FIXME: We could also catch non-constant sign-extended i32 values and lower
-// them to SPLAT_VECTOR_I64
+// them to VMV_V_X_VL.
SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
@@ -4041,7 +4102,8 @@ SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
std::tie(Mask, VL) =
getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
- SDValue Res = splatPartsI64WithVL(DL, ContainerVT, Lo, Hi, VL, DAG);
+ SDValue Res =
+ splatPartsI64WithVL(DL, ContainerVT, SDValue(), Lo, Hi, VL, DAG);
return convertFromScalableVector(VecVT, Res, DAG, Subtarget);
}
@@ -4051,18 +4113,21 @@ SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
// If Hi constant is all the same sign bit as Lo, lower this as a custom
// node in order to try and match RVV vector/scalar instructions.
if ((LoC >> 31) == HiC)
- return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo);
+ return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
+ Lo, DAG.getRegister(RISCV::X0, MVT::i32));
}
// Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
isa<ConstantSDNode>(Hi.getOperand(1)) &&
Hi.getConstantOperandVal(1) == 31)
- return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo);
+ return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT), Lo,
+ DAG.getRegister(RISCV::X0, MVT::i32));
// Fall back to use a stack store and stride x0 vector load. Use X0 as VL.
- return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VecVT, Lo, Hi,
- DAG.getTargetConstant(RISCV::VLMaxSentinel, DL, MVT::i64));
+ return DAG.getNode(RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL, DL, VecVT,
+ DAG.getUNDEF(VecVT), Lo, Hi,
+ DAG.getRegister(RISCV::X0, MVT::i32));
}
// Custom-lower extensions from mask vectors by using a vselect either with 1
@@ -4078,27 +4143,9 @@ SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
assert(Src.getValueType().isVector() &&
Src.getValueType().getVectorElementType() == MVT::i1);
- MVT XLenVT = Subtarget.getXLenVT();
- SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
- SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT);
-
if (VecVT.isScalableVector()) {
- // Be careful not to introduce illegal scalar types at this stage, and be
- // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is
- // illegal and must be expanded. Since we know that the constants are
- // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly.
- bool IsRV32E64 =
- !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64;
-
- if (!IsRV32E64) {
- SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero);
- SplatTrueVal = DAG.getSplatVector(VecVT, DL, SplatTrueVal);
- } else {
- SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero);
- SplatTrueVal =
- DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatTrueVal);
- }
-
+ SDValue SplatZero = DAG.getConstant(0, DL, VecVT);
+ SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, VecVT);
return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
}
@@ -4111,9 +4158,14 @@ SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
SDValue Mask, VL;
std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
- SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero, VL);
- SplatTrueVal =
- DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatTrueVal, VL);
+ MVT XLenVT = Subtarget.getXLenVT();
+ SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
+ SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT);
+
+ SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
+ DAG.getUNDEF(ContainerVT), SplatZero, VL);
+ SplatTrueVal = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
+ DAG.getUNDEF(ContainerVT), SplatTrueVal, VL);
SDValue Select = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC,
SplatTrueVal, SplatZero, VL);
@@ -4151,8 +4203,9 @@ SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
// Custom-lower truncations from vectors to mask vectors by using a mask and a
// setcc operation:
// (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
-SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op,
- SelectionDAG &DAG) const {
+SDValue RISCVTargetLowering::lowerVectorMaskTruncLike(SDValue Op,
+ SelectionDAG &DAG) const {
+ bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
SDLoc DL(Op);
EVT MaskVT = Op.getValueType();
// Only expect to custom-lower truncations to mask types
@@ -4160,34 +4213,176 @@ SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op,
"Unexpected type for vector mask lowering");
SDValue Src = Op.getOperand(0);
MVT VecVT = Src.getSimpleValueType();
-
+ SDValue Mask, VL;
+ if (IsVPTrunc) {
+ Mask = Op.getOperand(1);
+ VL = Op.getOperand(2);
+ }
// If this is a fixed vector, we need to convert it to a scalable vector.
MVT ContainerVT = VecVT;
+
if (VecVT.isFixedLengthVector()) {
ContainerVT = getContainerForFixedLengthVector(VecVT);
Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
+ if (IsVPTrunc) {
+ MVT MaskContainerVT =
+ getContainerForFixedLengthVector(Mask.getSimpleValueType());
+ Mask = convertToScalableVector(MaskContainerVT, Mask, DAG, Subtarget);
+ }
+ }
+
+ if (!IsVPTrunc) {
+ std::tie(Mask, VL) =
+ getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
}
SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
- SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatOne);
- SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero);
-
- if (VecVT.isScalableVector()) {
- SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne);
- return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE);
- }
-
- SDValue Mask, VL;
- std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
+ SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
+ DAG.getUNDEF(ContainerVT), SplatOne, VL);
+ SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
+ DAG.getUNDEF(ContainerVT), SplatZero, VL);
MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
SDValue Trunc =
DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne, Mask, VL);
Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT, Trunc, SplatZero,
DAG.getCondCode(ISD::SETNE), Mask, VL);
- return convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
+ if (MaskVT.isFixedLengthVector())
+ Trunc = convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
+ return Trunc;
+}
+
+SDValue RISCVTargetLowering::lowerVectorTruncLike(SDValue Op,
+ SelectionDAG &DAG) const {
+ bool IsVPTrunc = Op.getOpcode() == ISD::VP_TRUNCATE;
+ SDLoc DL(Op);
+
+ MVT VT = Op.getSimpleValueType();
+ // Only custom-lower vector truncates
+ assert(VT.isVector() && "Unexpected type for vector truncate lowering");
+
+ // Truncates to mask types are handled differently
+ if (VT.getVectorElementType() == MVT::i1)
+ return lowerVectorMaskTruncLike(Op, DAG);
+
+ // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
+ // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
+ // truncate by one power of two at a time.
+ MVT DstEltVT = VT.getVectorElementType();
+
+ SDValue Src = Op.getOperand(0);
+ MVT SrcVT = Src.getSimpleValueType();
+ MVT SrcEltVT = SrcVT.getVectorElementType();
+
+ assert(DstEltVT.bitsLT(SrcEltVT) && isPowerOf2_64(DstEltVT.getSizeInBits()) &&
+ isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
+ "Unexpected vector truncate lowering");
+
+ MVT ContainerVT = SrcVT;
+ SDValue Mask, VL;
+ if (IsVPTrunc) {
+ Mask = Op.getOperand(1);
+ VL = Op.getOperand(2);
+ }
+ if (SrcVT.isFixedLengthVector()) {
+ ContainerVT = getContainerForFixedLengthVector(SrcVT);
+ Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
+ if (IsVPTrunc) {
+ MVT MaskVT = getMaskTypeFor(ContainerVT);
+ Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
+ }
+ }
+
+ SDValue Result = Src;
+ if (!IsVPTrunc) {
+ std::tie(Mask, VL) =
+ getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
+ }
+
+ LLVMContext &Context = *DAG.getContext();
+ const ElementCount Count = ContainerVT.getVectorElementCount();
+ do {
+ SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
+ EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
+ Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result,
+ Mask, VL);
+ } while (SrcEltVT != DstEltVT);
+
+ if (SrcVT.isFixedLengthVector())
+ Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
+
+ return Result;
+}
+
+SDValue
+RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
+ SelectionDAG &DAG) const {
+ bool IsVP =
+ Op.getOpcode() == ISD::VP_FP_ROUND || Op.getOpcode() == ISD::VP_FP_EXTEND;
+ bool IsExtend =
+ Op.getOpcode() == ISD::VP_FP_EXTEND || Op.getOpcode() == ISD::FP_EXTEND;
+ // RVV can only do truncate fp to types half the size as the source. We
+ // custom-lower f64->f16 rounds via RVV's round-to-odd float
+ // conversion instruction.
+ SDLoc DL(Op);
+ MVT VT = Op.getSimpleValueType();
+
+ assert(VT.isVector() && "Unexpected type for vector truncate lowering");
+
+ SDValue Src = Op.getOperand(0);
+ MVT SrcVT = Src.getSimpleValueType();
+
+ bool IsDirectExtend = IsExtend && (VT.getVectorElementType() != MVT::f64 ||
+ SrcVT.getVectorElementType() != MVT::f16);
+ bool IsDirectTrunc = !IsExtend && (VT.getVectorElementType() != MVT::f16 ||
+ SrcVT.getVectorElementType() != MVT::f64);
+
+ bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
+
+ // Prepare any fixed-length vector operands.
+ MVT ContainerVT = VT;
+ SDValue Mask, VL;
+ if (IsVP) {
+ Mask = Op.getOperand(1);
+ VL = Op.getOperand(2);
+ }
+ if (VT.isFixedLengthVector()) {
+ MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
+ ContainerVT =
+ SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
+ Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
+ if (IsVP) {
+ MVT MaskVT = getMaskTypeFor(ContainerVT);
+ Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
+ }
+ }
+
+ if (!IsVP)
+ std::tie(Mask, VL) =
+ getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
+
+ unsigned ConvOpc = IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::FP_ROUND_VL;
+
+ if (IsDirectConv) {
+ Src = DAG.getNode(ConvOpc, DL, ContainerVT, Src, Mask, VL);
+ if (VT.isFixedLengthVector())
+ Src = convertFromScalableVector(VT, Src, DAG, Subtarget);
+ return Src;
+ }
+
+ unsigned InterConvOpc =
+ IsExtend ? RISCVISD::FP_EXTEND_VL : RISCVISD::VFNCVT_ROD_VL;
+
+ MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
+ SDValue IntermediateConv =
+ DAG.getNode(InterConvOpc, DL, InterVT, Src, Mask, VL);
+ SDValue Result =
+ DAG.getNode(ConvOpc, DL, ContainerVT, IntermediateConv, Mask, VL);
+ if (VT.isFixedLengthVector())
+ return convertFromScalableVector(VT, Result, DAG, Subtarget);
+ return Result;
}
// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
@@ -4268,13 +4463,15 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
// Note: We can't pass a UNDEF to the first VSLIDE1UP_VL since an untied
// undef doesn't obey the earlyclobber constraint. Just splat a zero value.
- ValInVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, I32ContainerVT, Zero,
- InsertI64VL);
+ ValInVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, I32ContainerVT,
+ DAG.getUNDEF(I32ContainerVT), Zero, InsertI64VL);
// First slide in the hi value, then the lo in underneath it.
- ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec,
- ValHi, I32Mask, InsertI64VL);
- ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec,
- ValLo, I32Mask, InsertI64VL);
+ ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT,
+ DAG.getUNDEF(I32ContainerVT), ValInVec, ValHi,
+ I32Mask, InsertI64VL);
+ ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT,
+ DAG.getUNDEF(I32ContainerVT), ValInVec, ValLo,
+ I32Mask, InsertI64VL);
// Bitcast back to the right container type.
ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
}
@@ -4310,7 +4507,7 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
unsigned WidenVecLen;
SDValue ExtractElementIdx;
SDValue ExtractBitIdx;
- unsigned MaxEEW = Subtarget.getMaxELENForFixedLengthVectors();
+ unsigned MaxEEW = Subtarget.getELEN();
MVT LargestEltVT = MVT::getIntegerVT(
std::min(MaxEEW, unsigned(XLenVT.getSizeInBits())));
if (NumElts <= LargestEltVT.getSizeInBits()) {
@@ -4360,8 +4557,7 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
if (!isNullConstant(Idx)) {
// Use a VL of 1 to avoid processing more elements than we need.
SDValue VL = DAG.getConstant(1, DL, XLenVT);
- MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
- SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
+ SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
}
@@ -4378,8 +4574,8 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
// Some RVV intrinsics may claim that they want an integer operand to be
// promoted or expanded.
-static SDValue lowerVectorIntrinsicSplats(SDValue Op, SelectionDAG &DAG,
- const RISCVSubtarget &Subtarget) {
+static SDValue lowerVectorIntrinsicScalars(SDValue Op, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
"Unexpected opcode");
@@ -4393,10 +4589,10 @@ static SDValue lowerVectorIntrinsicSplats(SDValue Op, SelectionDAG &DAG,
const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
- if (!II || !II->hasSplatOperand())
+ if (!II || !II->hasScalarOperand())
return SDValue();
- unsigned SplatOp = II->SplatOperand + 1 + HasChain;
+ unsigned SplatOp = II->ScalarOperand + 1 + HasChain;
assert(SplatOp < Op.getNumOperands());
SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
@@ -4426,28 +4622,141 @@ static SDValue lowerVectorIntrinsicSplats(SDValue Op, SelectionDAG &DAG,
// that a widening operation never uses SEW=64.
// NOTE: If this fails the below assert, we can probably just find the
// element count from any operand or result and use it to construct the VT.
- assert(II->SplatOperand > 0 && "Unexpected splat operand!");
+ assert(II->ScalarOperand > 0 && "Unexpected splat operand!");
MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
// The more complex case is when the scalar is larger than XLenVT.
assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
- // If this is a sign-extended 32-bit constant, we can truncate it and rely
- // on the instruction to sign-extend since SEW>XLEN.
- if (auto *CVal = dyn_cast<ConstantSDNode>(ScalarOp)) {
- if (isInt<32>(CVal->getSExtValue())) {
- ScalarOp = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
- return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
+ // If this is a sign-extended 32-bit value, we can truncate it and rely on the
+ // instruction to sign-extend since SEW>XLEN.
+ if (DAG.ComputeNumSignBits(ScalarOp) > 32) {
+ ScalarOp = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ScalarOp);
+ return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
+ }
+
+ switch (IntNo) {
+ case Intrinsic::riscv_vslide1up:
+ case Intrinsic::riscv_vslide1down:
+ case Intrinsic::riscv_vslide1up_mask:
+ case Intrinsic::riscv_vslide1down_mask: {
+ // We need to special case these when the scalar is larger than XLen.
+ unsigned NumOps = Op.getNumOperands();
+ bool IsMasked = NumOps == 7;
+
+ // Convert the vector source to the equivalent nxvXi32 vector.
+ MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
+ SDValue Vec = DAG.getBitcast(I32VT, Operands[2]);
+
+ SDValue ScalarLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, ScalarOp,
+ DAG.getConstant(0, DL, XLenVT));
+ SDValue ScalarHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, ScalarOp,
+ DAG.getConstant(1, DL, XLenVT));
+
+ // Double the VL since we halved SEW.
+ SDValue AVL = getVLOperand(Op);
+ SDValue I32VL;
+
+ // Optimize for constant AVL
+ if (isa<ConstantSDNode>(AVL)) {
+ unsigned EltSize = VT.getScalarSizeInBits();
+ unsigned MinSize = VT.getSizeInBits().getKnownMinValue();
+
+ unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
+ unsigned MaxVLMAX =
+ RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
+
+ unsigned VectorBitsMin = Subtarget.getRealMinVLen();
+ unsigned MinVLMAX =
+ RISCVTargetLowering::computeVLMAX(VectorBitsMin, EltSize, MinSize);
+
+ uint64_t AVLInt = cast<ConstantSDNode>(AVL)->getZExtValue();
+ if (AVLInt <= MinVLMAX) {
+ I32VL = DAG.getConstant(2 * AVLInt, DL, XLenVT);
+ } else if (AVLInt >= 2 * MaxVLMAX) {
+ // Just set vl to VLMAX in this situation
+ RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(I32VT);
+ SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
+ unsigned Sew = RISCVVType::encodeSEW(I32VT.getScalarSizeInBits());
+ SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
+ SDValue SETVLMAX = DAG.getTargetConstant(
+ Intrinsic::riscv_vsetvlimax_opt, DL, MVT::i32);
+ I32VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVLMAX, SEW,
+ LMUL);
+ } else {
+ // For AVL between (MinVLMAX, 2 * MaxVLMAX), the actual working vl
+ // is related to the hardware implementation.
+ // So let the following code handle
+ }
}
+ if (!I32VL) {
+ RISCVII::VLMUL Lmul = RISCVTargetLowering::getLMUL(VT);
+ SDValue LMUL = DAG.getConstant(Lmul, DL, XLenVT);
+ unsigned Sew = RISCVVType::encodeSEW(VT.getScalarSizeInBits());
+ SDValue SEW = DAG.getConstant(Sew, DL, XLenVT);
+ SDValue SETVL =
+ DAG.getTargetConstant(Intrinsic::riscv_vsetvli_opt, DL, MVT::i32);
+ // Using vsetvli instruction to get actually used length which related to
+ // the hardware implementation
+ SDValue VL = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, XLenVT, SETVL, AVL,
+ SEW, LMUL);
+ I32VL =
+ DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
+ }
+
+ SDValue I32Mask = getAllOnesMask(I32VT, I32VL, DL, DAG);
+
+ // Shift the two scalar parts in using SEW=32 slide1up/slide1down
+ // instructions.
+ SDValue Passthru;
+ if (IsMasked)
+ Passthru = DAG.getUNDEF(I32VT);
+ else
+ Passthru = DAG.getBitcast(I32VT, Operands[1]);
+
+ if (IntNo == Intrinsic::riscv_vslide1up ||
+ IntNo == Intrinsic::riscv_vslide1up_mask) {
+ Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
+ ScalarHi, I32Mask, I32VL);
+ Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Passthru, Vec,
+ ScalarLo, I32Mask, I32VL);
+ } else {
+ Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
+ ScalarLo, I32Mask, I32VL);
+ Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Passthru, Vec,
+ ScalarHi, I32Mask, I32VL);
+ }
+
+ // Convert back to nxvXi64.
+ Vec = DAG.getBitcast(VT, Vec);
+
+ if (!IsMasked)
+ return Vec;
+ // Apply mask after the operation.
+ SDValue Mask = Operands[NumOps - 3];
+ SDValue MaskedOff = Operands[1];
+ // Assume Policy operand is the last operand.
+ uint64_t Policy =
+ cast<ConstantSDNode>(Operands[NumOps - 1])->getZExtValue();
+ // We don't need to select maskedoff if it's undef.
+ if (MaskedOff.isUndef())
+ return Vec;
+ // TAMU
+ if (Policy == RISCVII::TAIL_AGNOSTIC)
+ return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, Mask, Vec, MaskedOff,
+ AVL);
+ // TUMA or TUMU: Currently we always emit tumu policy regardless of tuma.
+ // It's fine because vmerge does not care mask policy.
+ return DAG.getNode(RISCVISD::VP_MERGE_VL, DL, VT, Mask, Vec, MaskedOff,
+ AVL);
+ }
}
// We need to convert the scalar to a splat vector.
- // FIXME: Can we implicitly truncate the scalar if it is known to
- // be sign extended?
SDValue VL = getVLOperand(Op);
assert(VL.getValueType() == XLenVT);
- ScalarOp = splatSplitI64WithVL(DL, VT, ScalarOp, VL, DAG);
+ ScalarOp = splatSplitI64WithVL(DL, VT, SDValue(), ScalarOp, VL, DAG);
return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
}
@@ -4481,7 +4790,7 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::riscv_zip:
case Intrinsic::riscv_unzip: {
// Lower to the SHFLI encoding for zip or the UNSHFLI encoding for unzip.
- // For i32 the immdiate is 15. For i64 the immediate is 31.
+ // For i32 the immediate is 15. For i64 the immediate is 31.
unsigned Opc =
IntNo == Intrinsic::riscv_zip ? RISCVISD::SHFL : RISCVISD::UNSHFL;
unsigned BitWidth = Op.getValueSizeInBits();
@@ -4516,10 +4825,11 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Op.getOperand(1));
case Intrinsic::riscv_vmv_v_x:
return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
- Op.getSimpleValueType(), DL, DAG, Subtarget);
+ Op.getOperand(3), Op.getSimpleValueType(), DL, DAG,
+ Subtarget);
case Intrinsic::riscv_vfmv_v_f:
return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
- Op.getOperand(1), Op.getOperand(2));
+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
case Intrinsic::riscv_vmv_s_x: {
SDValue Scalar = Op.getOperand(2);
@@ -4533,7 +4843,7 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
// This is an i64 value that lives in two scalar registers. We have to
// insert this in a convoluted way. First we build vXi64 splat containing
- // the/ two values that we assemble using some bit math. Next we'll use
+ // the two values that we assemble using some bit math. Next we'll use
// vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
// to merge element 0 from our splat into the source vector.
// FIXME: This is probably not the best way to do this, but it is
@@ -4550,12 +4860,15 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SDValue Vec = Op.getOperand(1);
SDValue VL = getVLOperand(Op);
- SDValue SplattedVal = splatSplitI64WithVL(DL, VT, Scalar, VL, DAG);
- SDValue SplattedIdx = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT,
- DAG.getConstant(0, DL, MVT::i32), VL);
+ SDValue SplattedVal = splatSplitI64WithVL(DL, VT, SDValue(), Scalar, VL, DAG);
+ if (Op.getOperand(1).isUndef())
+ return SplattedVal;
+ SDValue SplattedIdx =
+ DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
+ DAG.getConstant(0, DL, MVT::i32), VL);
- MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
- SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
+ MVT MaskVT = getMaskTypeFor(VT);
+ SDValue Mask = getAllOnesMask(VT, VL, DL, DAG);
SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
SDValue SelectCond =
DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, VID, SplattedIdx,
@@ -4563,73 +4876,9 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, SelectCond, SplattedVal,
Vec, VL);
}
- case Intrinsic::riscv_vslide1up:
- case Intrinsic::riscv_vslide1down:
- case Intrinsic::riscv_vslide1up_mask:
- case Intrinsic::riscv_vslide1down_mask: {
- // We need to special case these when the scalar is larger than XLen.
- unsigned NumOps = Op.getNumOperands();
- bool IsMasked = NumOps == 7;
- unsigned OpOffset = IsMasked ? 1 : 0;
- SDValue Scalar = Op.getOperand(2 + OpOffset);
- if (Scalar.getValueType().bitsLE(XLenVT))
- break;
-
- // Splatting a sign extended constant is fine.
- if (auto *CVal = dyn_cast<ConstantSDNode>(Scalar))
- if (isInt<32>(CVal->getSExtValue()))
- break;
-
- MVT VT = Op.getSimpleValueType();
- assert(VT.getVectorElementType() == MVT::i64 &&
- Scalar.getValueType() == MVT::i64 && "Unexpected VTs");
-
- // Convert the vector source to the equivalent nxvXi32 vector.
- MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
- SDValue Vec = DAG.getBitcast(I32VT, Op.getOperand(1 + OpOffset));
-
- SDValue ScalarLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
- DAG.getConstant(0, DL, XLenVT));
- SDValue ScalarHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
- DAG.getConstant(1, DL, XLenVT));
-
- // Double the VL since we halved SEW.
- SDValue VL = getVLOperand(Op);
- SDValue I32VL =
- DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
-
- MVT I32MaskVT = MVT::getVectorVT(MVT::i1, I32VT.getVectorElementCount());
- SDValue I32Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, I32MaskVT, VL);
-
- // Shift the two scalar parts in using SEW=32 slide1up/slide1down
- // instructions.
- if (IntNo == Intrinsic::riscv_vslide1up ||
- IntNo == Intrinsic::riscv_vslide1up_mask) {
- Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Vec, ScalarHi,
- I32Mask, I32VL);
- Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Vec, ScalarLo,
- I32Mask, I32VL);
- } else {
- Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Vec, ScalarLo,
- I32Mask, I32VL);
- Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Vec, ScalarHi,
- I32Mask, I32VL);
- }
-
- // Convert back to nxvXi64.
- Vec = DAG.getBitcast(VT, Vec);
-
- if (!IsMasked)
- return Vec;
-
- // Apply mask after the operation.
- SDValue Mask = Op.getOperand(NumOps - 3);
- SDValue MaskedOff = Op.getOperand(1);
- return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, Mask, Vec, MaskedOff, VL);
- }
}
- return lowerVectorIntrinsicSplats(Op, DAG, Subtarget);
+ return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
}
SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
@@ -4652,8 +4901,7 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
SDValue PassThru = Op.getOperand(2);
if (!IsUnmasked) {
- MVT MaskVT =
- MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
+ MVT MaskVT = getMaskTypeFor(ContainerVT);
Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
}
@@ -4688,9 +4936,48 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
return DAG.getMergeValues({Result, Chain}, DL);
}
+ case Intrinsic::riscv_seg2_load:
+ case Intrinsic::riscv_seg3_load:
+ case Intrinsic::riscv_seg4_load:
+ case Intrinsic::riscv_seg5_load:
+ case Intrinsic::riscv_seg6_load:
+ case Intrinsic::riscv_seg7_load:
+ case Intrinsic::riscv_seg8_load: {
+ SDLoc DL(Op);
+ static const Intrinsic::ID VlsegInts[7] = {
+ Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
+ Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
+ Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
+ Intrinsic::riscv_vlseg8};
+ unsigned NF = Op->getNumValues() - 1;
+ assert(NF >= 2 && NF <= 8 && "Unexpected seg number");
+ MVT XLenVT = Subtarget.getXLenVT();
+ MVT VT = Op->getSimpleValueType(0);
+ MVT ContainerVT = getContainerForFixedLengthVector(VT);
+
+ SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
+ SDValue IntID = DAG.getTargetConstant(VlsegInts[NF - 2], DL, XLenVT);
+ auto *Load = cast<MemIntrinsicSDNode>(Op);
+ SmallVector<EVT, 9> ContainerVTs(NF, ContainerVT);
+ ContainerVTs.push_back(MVT::Other);
+ SDVTList VTs = DAG.getVTList(ContainerVTs);
+ SmallVector<SDValue, 12> Ops = {Load->getChain(), IntID};
+ Ops.insert(Ops.end(), NF, DAG.getUNDEF(ContainerVT));
+ Ops.push_back(Op.getOperand(2));
+ Ops.push_back(VL);
+ SDValue Result =
+ DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
+ Load->getMemoryVT(), Load->getMemOperand());
+ SmallVector<SDValue, 9> Results;
+ for (unsigned int RetIdx = 0; RetIdx < NF; RetIdx++)
+ Results.push_back(convertFromScalableVector(VT, Result.getValue(RetIdx),
+ DAG, Subtarget));
+ Results.push_back(Result.getValue(NF));
+ return DAG.getMergeValues(Results, DL);
+ }
}
- return lowerVectorIntrinsicSplats(Op, DAG, Subtarget);
+ return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
}
SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
@@ -4714,8 +5001,7 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
if (!IsUnmasked) {
- MVT MaskVT =
- MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
+ MVT MaskVT = getMaskTypeFor(ContainerVT);
Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
}
@@ -4898,8 +5184,9 @@ SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
SDValue NeutralElem =
DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
- SDValue IdentitySplat = lowerScalarSplat(
- NeutralElem, DAG.getConstant(1, DL, XLenVT), M1VT, DL, DAG, Subtarget);
+ SDValue IdentitySplat =
+ lowerScalarSplat(SDValue(), NeutralElem, DAG.getConstant(1, DL, XLenVT),
+ M1VT, DL, DAG, Subtarget);
SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, DAG.getUNDEF(M1VT), Vec,
IdentitySplat, Mask, VL);
SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
@@ -4960,8 +5247,9 @@ SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
SDValue Mask, VL;
std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
- SDValue ScalarSplat = lowerScalarSplat(
- ScalarVal, DAG.getConstant(1, DL, XLenVT), M1VT, DL, DAG, Subtarget);
+ SDValue ScalarSplat =
+ lowerScalarSplat(SDValue(), ScalarVal, DAG.getConstant(1, DL, XLenVT),
+ M1VT, DL, DAG, Subtarget);
SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, DAG.getUNDEF(M1VT),
VectorVal, ScalarSplat, Mask, VL);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
@@ -5027,9 +5315,9 @@ SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
MVT XLenVT = Subtarget.getXLenVT();
MVT ResVT = !VecVT.isInteger() || VecEltVT.bitsGE(XLenVT) ? VecEltVT : XLenVT;
- SDValue StartSplat =
- lowerScalarSplat(Op.getOperand(0), DAG.getConstant(1, DL, XLenVT), M1VT,
- DL, DAG, Subtarget);
+ SDValue StartSplat = lowerScalarSplat(SDValue(), Op.getOperand(0),
+ DAG.getConstant(1, DL, XLenVT), M1VT,
+ DL, DAG, Subtarget);
SDValue Reduction =
DAG.getNode(RVVOpcode, DL, M1VT, StartSplat, Vec, StartSplat, Mask, VL);
SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction,
@@ -5331,13 +5619,13 @@ SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
if (StepValImm != 1) {
if (isPowerOf2_64(StepValImm)) {
SDValue StepVal =
- DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT,
+ DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
DAG.getConstant(Log2_64(StepValImm), DL, XLenVT));
StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
} else {
SDValue StepVal = lowerScalarSplat(
- DAG.getConstant(StepValImm, DL, VT.getVectorElementType()), VL, VT,
- DL, DAG, Subtarget);
+ SDValue(), DAG.getConstant(StepValImm, DL, VT.getVectorElementType()),
+ VL, VT, DL, DAG, Subtarget);
StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
}
}
@@ -5353,22 +5641,26 @@ SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
SelectionDAG &DAG) const {
SDLoc DL(Op);
MVT VecVT = Op.getSimpleValueType();
+ if (VecVT.getVectorElementType() == MVT::i1) {
+ MVT WidenVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
+ SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenVT, Op.getOperand(0));
+ SDValue Op2 = DAG.getNode(ISD::VECTOR_REVERSE, DL, WidenVT, Op1);
+ return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Op2);
+ }
unsigned EltSize = VecVT.getScalarSizeInBits();
unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
-
- unsigned MaxVLMAX = 0;
- unsigned VectorBitsMax = Subtarget.getMaxRVVVectorSizeInBits();
- if (VectorBitsMax != 0)
- MaxVLMAX = ((VectorBitsMax / EltSize) * MinSize) / RISCV::RVVBitsPerBlock;
+ unsigned VectorBitsMax = Subtarget.getRealMaxVLen();
+ unsigned MaxVLMAX =
+ RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
MVT IntVT = VecVT.changeVectorElementTypeToInteger();
- // If this is SEW=8 and VLMAX is unknown or more than 256, we need
+ // If this is SEW=8 and VLMAX is potentially more than 256, we need
// to use vrgatherei16.vv.
// TODO: It's also possible to use vrgatherei16.vv for other types to
// decrease register width for the index calculation.
- if ((MaxVLMAX == 0 || MaxVLMAX > 256) && EltSize == 8) {
+ if (MaxVLMAX > 256 && EltSize == 8) {
// If this is LMUL=8, we have to split before can use vrgatherei16.vv.
// Reverse each half, then reassemble them in reverse order.
// NOTE: It's also possible that after splitting that VLMAX no longer
@@ -5413,13 +5705,51 @@ SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
if (!IsRV32E64)
SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
else
- SplatVL = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, IntVT, VLMinus1);
+ SplatVL = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT, DAG.getUNDEF(IntVT),
+ VLMinus1, DAG.getRegister(RISCV::X0, XLenVT));
SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
SDValue Indices =
DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID, Mask, VL);
- return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices, Mask, VL);
+ return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices, Mask,
+ DAG.getUNDEF(VecVT), VL);
+}
+
+SDValue RISCVTargetLowering::lowerVECTOR_SPLICE(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ SDValue V1 = Op.getOperand(0);
+ SDValue V2 = Op.getOperand(1);
+ MVT XLenVT = Subtarget.getXLenVT();
+ MVT VecVT = Op.getSimpleValueType();
+
+ unsigned MinElts = VecVT.getVectorMinNumElements();
+ SDValue VLMax = DAG.getNode(ISD::VSCALE, DL, XLenVT,
+ DAG.getConstant(MinElts, DL, XLenVT));
+
+ int64_t ImmValue = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
+ SDValue DownOffset, UpOffset;
+ if (ImmValue >= 0) {
+ // The operand is a TargetConstant, we need to rebuild it as a regular
+ // constant.
+ DownOffset = DAG.getConstant(ImmValue, DL, XLenVT);
+ UpOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DownOffset);
+ } else {
+ // The operand is a TargetConstant, we need to rebuild it as a regular
+ // constant rather than negating the original operand.
+ UpOffset = DAG.getConstant(-ImmValue, DL, XLenVT);
+ DownOffset = DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, UpOffset);
+ }
+
+ SDValue TrueMask = getAllOnesMask(VecVT, VLMax, DL, DAG);
+
+ SDValue SlideDown =
+ DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, VecVT, DAG.getUNDEF(VecVT), V1,
+ DownOffset, TrueMask, UpOffset);
+ return DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, VecVT, SlideDown, V2, UpOffset,
+ TrueMask,
+ DAG.getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT));
}
SDValue
@@ -5434,18 +5764,26 @@ RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
"Expecting a correctly-aligned load");
MVT VT = Op.getSimpleValueType();
+ MVT XLenVT = Subtarget.getXLenVT();
MVT ContainerVT = getContainerForFixedLengthVector(VT);
- SDValue VL =
- DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
+ SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
+ bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
+ SDValue IntID = DAG.getTargetConstant(
+ IsMaskOp ? Intrinsic::riscv_vlm : Intrinsic::riscv_vle, DL, XLenVT);
+ SmallVector<SDValue, 4> Ops{Load->getChain(), IntID};
+ if (!IsMaskOp)
+ Ops.push_back(DAG.getUNDEF(ContainerVT));
+ Ops.push_back(Load->getBasePtr());
+ Ops.push_back(VL);
SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
- SDValue NewLoad = DAG.getMemIntrinsicNode(
- RISCVISD::VLE_VL, DL, VTs, {Load->getChain(), Load->getBasePtr(), VL},
- Load->getMemoryVT(), Load->getMemOperand());
+ SDValue NewLoad =
+ DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
+ Load->getMemoryVT(), Load->getMemOperand());
SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
- return DAG.getMergeValues({Result, Load->getChain()}, DL);
+ return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
}
SDValue
@@ -5461,6 +5799,7 @@ RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
SDValue StoreVal = Store->getValue();
MVT VT = StoreVal.getSimpleValueType();
+ MVT XLenVT = Subtarget.getXLenVT();
// If the size less than a byte, we need to pad with zeros to make a byte.
if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
@@ -5472,14 +5811,17 @@ RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
MVT ContainerVT = getContainerForFixedLengthVector(VT);
- SDValue VL =
- DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
+ SDValue VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
SDValue NewValue =
convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
+
+ bool IsMaskOp = VT.getVectorElementType() == MVT::i1;
+ SDValue IntID = DAG.getTargetConstant(
+ IsMaskOp ? Intrinsic::riscv_vsm : Intrinsic::riscv_vse, DL, XLenVT);
return DAG.getMemIntrinsicNode(
- RISCVISD::VSE_VL, DL, DAG.getVTList(MVT::Other),
- {Store->getChain(), NewValue, Store->getBasePtr(), VL},
+ ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
+ {Store->getChain(), IntID, NewValue, Store->getBasePtr(), VL},
Store->getMemoryVT(), Store->getMemOperand());
}
@@ -5514,8 +5856,7 @@ SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
ContainerVT = getContainerForFixedLengthVector(VT);
PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
if (!IsUnmasked) {
- MVT MaskVT =
- MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
+ MVT MaskVT = getMaskTypeFor(ContainerVT);
Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
}
}
@@ -5581,8 +5922,7 @@ SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
if (!IsUnmasked) {
- MVT MaskVT =
- MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
+ MVT MaskVT = getMaskTypeFor(ContainerVT);
Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
}
}
@@ -5620,8 +5960,8 @@ RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
SDValue VL =
DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
- MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
- SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
+ MVT MaskVT = getMaskTypeFor(ContainerVT);
+ SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
SDValue Cmp = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op1, Op2,
Op.getOperand(2), Mask, VL);
@@ -5667,9 +6007,9 @@ SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
SDValue Mask, VL;
std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
- SDValue SplatZero =
- DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
- DAG.getConstant(0, DL, Subtarget.getXLenVT()));
+ SDValue SplatZero = DAG.getNode(
+ RISCVISD::VMV_V_X_VL, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
+ DAG.getConstant(0, DL, Subtarget.getXLenVT()));
SDValue NegX =
DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X, Mask, VL);
SDValue Max =
@@ -5787,15 +6127,260 @@ SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG,
}
if (!VT.isFixedLengthVector())
- return DAG.getNode(RISCVISDOpc, DL, VT, Ops);
+ return DAG.getNode(RISCVISDOpc, DL, VT, Ops, Op->getFlags());
MVT ContainerVT = getContainerForFixedLengthVector(VT);
- SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops);
+ SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops, Op->getFlags());
return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
}
+SDValue RISCVTargetLowering::lowerVPExtMaskOp(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ MVT VT = Op.getSimpleValueType();
+
+ SDValue Src = Op.getOperand(0);
+ // NOTE: Mask is dropped.
+ SDValue VL = Op.getOperand(2);
+
+ MVT ContainerVT = VT;
+ if (VT.isFixedLengthVector()) {
+ ContainerVT = getContainerForFixedLengthVector(VT);
+ MVT SrcVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
+ Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
+ }
+
+ MVT XLenVT = Subtarget.getXLenVT();
+ SDValue Zero = DAG.getConstant(0, DL, XLenVT);
+ SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
+ DAG.getUNDEF(ContainerVT), Zero, VL);
+
+ SDValue SplatValue = DAG.getConstant(
+ Op.getOpcode() == ISD::VP_ZERO_EXTEND ? 1 : -1, DL, XLenVT);
+ SDValue Splat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
+ DAG.getUNDEF(ContainerVT), SplatValue, VL);
+
+ SDValue Result = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, Src,
+ Splat, ZeroSplat, VL);
+ if (!VT.isFixedLengthVector())
+ return Result;
+ return convertFromScalableVector(VT, Result, DAG, Subtarget);
+}
+
+SDValue RISCVTargetLowering::lowerVPSetCCMaskOp(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ MVT VT = Op.getSimpleValueType();
+
+ SDValue Op1 = Op.getOperand(0);
+ SDValue Op2 = Op.getOperand(1);
+ ISD::CondCode Condition = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ // NOTE: Mask is dropped.
+ SDValue VL = Op.getOperand(4);
+
+ MVT ContainerVT = VT;
+ if (VT.isFixedLengthVector()) {
+ ContainerVT = getContainerForFixedLengthVector(VT);
+ Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget);
+ Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget);
+ }
+
+ SDValue Result;
+ SDValue AllOneMask = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
+
+ switch (Condition) {
+ default:
+ break;
+ // X != Y --> (X^Y)
+ case ISD::SETNE:
+ Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
+ break;
+ // X == Y --> ~(X^Y)
+ case ISD::SETEQ: {
+ SDValue Temp =
+ DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, Op2, VL);
+ Result =
+ DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, AllOneMask, VL);
+ break;
+ }
+ // X >s Y --> X == 0 & Y == 1 --> ~X & Y
+ // X <u Y --> X == 0 & Y == 1 --> ~X & Y
+ case ISD::SETGT:
+ case ISD::SETULT: {
+ SDValue Temp =
+ DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
+ Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Temp, Op2, VL);
+ break;
+ }
+ // X <s Y --> X == 1 & Y == 0 --> ~Y & X
+ // X >u Y --> X == 1 & Y == 0 --> ~Y & X
+ case ISD::SETLT:
+ case ISD::SETUGT: {
+ SDValue Temp =
+ DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
+ Result = DAG.getNode(RISCVISD::VMAND_VL, DL, ContainerVT, Op1, Temp, VL);
+ break;
+ }
+ // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
+ // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
+ case ISD::SETGE:
+ case ISD::SETULE: {
+ SDValue Temp =
+ DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op1, AllOneMask, VL);
+ Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op2, VL);
+ break;
+ }
+ // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
+ // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
+ case ISD::SETLE:
+ case ISD::SETUGE: {
+ SDValue Temp =
+ DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Op2, AllOneMask, VL);
+ Result = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Temp, Op1, VL);
+ break;
+ }
+ }
+
+ if (!VT.isFixedLengthVector())
+ return Result;
+ return convertFromScalableVector(VT, Result, DAG, Subtarget);
+}
+
+// Lower Floating-Point/Integer Type-Convert VP SDNodes
+SDValue RISCVTargetLowering::lowerVPFPIntConvOp(SDValue Op, SelectionDAG &DAG,
+ unsigned RISCVISDOpc) const {
+ SDLoc DL(Op);
+
+ SDValue Src = Op.getOperand(0);
+ SDValue Mask = Op.getOperand(1);
+ SDValue VL = Op.getOperand(2);
+
+ MVT DstVT = Op.getSimpleValueType();
+ MVT SrcVT = Src.getSimpleValueType();
+ if (DstVT.isFixedLengthVector()) {
+ DstVT = getContainerForFixedLengthVector(DstVT);
+ SrcVT = getContainerForFixedLengthVector(SrcVT);
+ Src = convertToScalableVector(SrcVT, Src, DAG, Subtarget);
+ MVT MaskVT = getMaskTypeFor(DstVT);
+ Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
+ }
+
+ unsigned RISCVISDExtOpc = (RISCVISDOpc == RISCVISD::SINT_TO_FP_VL ||
+ RISCVISDOpc == RISCVISD::FP_TO_SINT_VL)
+ ? RISCVISD::VSEXT_VL
+ : RISCVISD::VZEXT_VL;
+
+ unsigned DstEltSize = DstVT.getScalarSizeInBits();
+ unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
+
+ SDValue Result;
+ if (DstEltSize >= SrcEltSize) { // Single-width and widening conversion.
+ if (SrcVT.isInteger()) {
+ assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
+
+ // Do we need to do any pre-widening before converting?
+ if (SrcEltSize == 1) {
+ MVT IntVT = DstVT.changeVectorElementTypeToInteger();
+ MVT XLenVT = Subtarget.getXLenVT();
+ SDValue Zero = DAG.getConstant(0, DL, XLenVT);
+ SDValue ZeroSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
+ DAG.getUNDEF(IntVT), Zero, VL);
+ SDValue One = DAG.getConstant(
+ RISCVISDExtOpc == RISCVISD::VZEXT_VL ? 1 : -1, DL, XLenVT);
+ SDValue OneSplat = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntVT,
+ DAG.getUNDEF(IntVT), One, VL);
+ Src = DAG.getNode(RISCVISD::VSELECT_VL, DL, IntVT, Src, OneSplat,
+ ZeroSplat, VL);
+ } else if (DstEltSize > (2 * SrcEltSize)) {
+ // Widen before converting.
+ MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2),
+ DstVT.getVectorElementCount());
+ Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL);
+ }
+
+ Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
+ } else {
+ assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
+ "Wrong input/output vector types");
+
+ // Convert f16 to f32 then convert f32 to i64.
+ if (DstEltSize > (2 * SrcEltSize)) {
+ assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
+ MVT InterimFVT =
+ MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
+ Src =
+ DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL);
+ }
+
+ Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL);
+ }
+ } else { // Narrowing + Conversion
+ if (SrcVT.isInteger()) {
+ assert(DstVT.isFloatingPoint() && "Wrong input/output vector types");
+ // First do a narrowing convert to an FP type half the size, then round
+ // the FP type to a small FP type if needed.
+
+ MVT InterimFVT = DstVT;
+ if (SrcEltSize > (2 * DstEltSize)) {
+ assert(SrcEltSize == (4 * DstEltSize) && "Unexpected types!");
+ assert(DstVT.getVectorElementType() == MVT::f16 && "Unexpected type!");
+ InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount());
+ }
+
+ Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL);
+
+ if (InterimFVT != DstVT) {
+ Src = Result;
+ Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL);
+ }
+ } else {
+ assert(SrcVT.isFloatingPoint() && DstVT.isInteger() &&
+ "Wrong input/output vector types");
+ // First do a narrowing conversion to an integer half the size, then
+ // truncate if needed.
+
+ if (DstEltSize == 1) {
+ // First convert to the same size integer, then convert to mask using
+ // setcc.
+ assert(SrcEltSize >= 16 && "Unexpected FP type!");
+ MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize),
+ DstVT.getVectorElementCount());
+ Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
+
+ // Compare the integer result to 0. The integer should be 0 or 1/-1,
+ // otherwise the conversion was undefined.
+ MVT XLenVT = Subtarget.getXLenVT();
+ SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
+ SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT,
+ DAG.getUNDEF(InterimIVT), SplatZero);
+ Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT, Result, SplatZero,
+ DAG.getCondCode(ISD::SETNE), Mask, VL);
+ } else {
+ MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
+ DstVT.getVectorElementCount());
+
+ Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL);
+
+ while (InterimIVT != DstVT) {
+ SrcEltSize /= 2;
+ Src = Result;
+ InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2),
+ DstVT.getVectorElementCount());
+ Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT,
+ Src, Mask, VL);
+ }
+ }
+ }
+ }
+
+ MVT VT = Op.getSimpleValueType();
+ if (!VT.isFixedLengthVector())
+ return Result;
+ return convertFromScalableVector(VT, Result, DAG, Subtarget);
+}
+
SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op, SelectionDAG &DAG,
unsigned MaskOpc,
unsigned VecOpc) const {
@@ -5876,23 +6461,14 @@ SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
MVT ContainerVT = VT;
if (VT.isFixedLengthVector()) {
- // We need to use the larger of the result and index type to determine the
- // scalable type to use so we don't increase LMUL for any operand/result.
- if (VT.bitsGE(IndexVT)) {
- ContainerVT = getContainerForFixedLengthVector(VT);
- IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
- ContainerVT.getVectorElementCount());
- } else {
- IndexVT = getContainerForFixedLengthVector(IndexVT);
- ContainerVT = MVT::getVectorVT(ContainerVT.getVectorElementType(),
- IndexVT.getVectorElementCount());
- }
+ ContainerVT = getContainerForFixedLengthVector(VT);
+ IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
+ ContainerVT.getVectorElementCount());
Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
if (!IsUnmasked) {
- MVT MaskVT =
- MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
+ MVT MaskVT = getMaskTypeFor(ContainerVT);
Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
}
@@ -5987,24 +6563,15 @@ SDValue RISCVTargetLowering::lowerMaskedScatter(SDValue Op,
MVT ContainerVT = VT;
if (VT.isFixedLengthVector()) {
- // We need to use the larger of the value and index type to determine the
- // scalable type to use so we don't increase LMUL for any operand/result.
- if (VT.bitsGE(IndexVT)) {
- ContainerVT = getContainerForFixedLengthVector(VT);
- IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
- ContainerVT.getVectorElementCount());
- } else {
- IndexVT = getContainerForFixedLengthVector(IndexVT);
- ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
- IndexVT.getVectorElementCount());
- }
+ ContainerVT = getContainerForFixedLengthVector(VT);
+ IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
+ ContainerVT.getVectorElementCount());
Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
if (!IsUnmasked) {
- MVT MaskVT =
- MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
+ MVT MaskVT = getMaskTypeFor(ContainerVT);
Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
}
}
@@ -6095,14 +6662,21 @@ SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
RMValue);
}
+SDValue RISCVTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
+ SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+
+ bool isRISCV64 = Subtarget.is64Bit();
+ EVT PtrVT = getPointerTy(DAG.getDataLayout());
+
+ int FI = MF.getFrameInfo().CreateFixedObject(isRISCV64 ? 8 : 4, 0, false);
+ return DAG.getFrameIndex(FI, PtrVT);
+}
+
static RISCVISD::NodeType getRISCVWOpcodeByIntr(unsigned IntNo) {
switch (IntNo) {
default:
llvm_unreachable("Unexpected Intrinsic");
- case Intrinsic::riscv_grev:
- return RISCVISD::GREVW;
- case Intrinsic::riscv_gorc:
- return RISCVISD::GORCW;
case Intrinsic::riscv_bcompress:
return RISCVISD::BCOMPRESSW;
case Intrinsic::riscv_bdecompress:
@@ -6121,9 +6695,12 @@ static SDValue customLegalizeToWOpByIntr(SDNode *N, SelectionDAG &DAG,
unsigned IntNo) {
SDLoc DL(N);
RISCVISD::NodeType WOpcode = getRISCVWOpcodeByIntr(IntNo);
- SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
- SDValue NewOp2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
- SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp1, NewOp2);
+ // Deal with the Instruction Operands
+ SmallVector<SDValue, 3> NewOps;
+ for (SDValue Op : drop_begin(N->ops()))
+ // Promote the operand to i64 type
+ NewOps.push_back(DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op));
+ SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOps);
// ReplaceNodeResults requires we maintain the same type for the return value.
return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
}
@@ -6150,10 +6727,6 @@ static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
return RISCVISD::ROLW;
case ISD::ROTR:
return RISCVISD::RORW;
- case RISCVISD::GREV:
- return RISCVISD::GREVW;
- case RISCVISD::GORC:
- return RISCVISD::GORCW;
}
}
@@ -6309,6 +6882,10 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
if (N->getOperand(1).getOpcode() != ISD::Constant) {
+ // If we can use a BSET instruction, allow default promotion to apply.
+ if (N->getOpcode() == ISD::SHL && Subtarget.hasStdExtZbs() &&
+ isOneConstant(N->getOperand(0)))
+ break;
Results.push_back(customLegalizeToWOp(N, DAG));
break;
}
@@ -6388,12 +6965,23 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
DAG.getValueType(MVT::i32));
- // Sign extend the LHS and perform an unsigned compare with the ADDW result.
- // Since the inputs are sign extended from i32, this is equivalent to
- // comparing the lower 32 bits.
- LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
- SDValue Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
- IsAdd ? ISD::SETULT : ISD::SETUGT);
+ SDValue Overflow;
+ if (IsAdd && isOneConstant(RHS)) {
+ // Special case uaddo X, 1 overflowed if the addition result is 0.
+ // The general case (X + C) < C is not necessarily beneficial. Although we
+ // reduce the live range of X, we may introduce the materialization of
+ // constant C, especially when the setcc result is used by branch. We have
+ // no compare with constant and branch instructions.
+ Overflow = DAG.getSetCC(DL, N->getValueType(1), Res,
+ DAG.getConstant(0, DL, MVT::i64), ISD::SETEQ);
+ } else {
+ // Sign extend the LHS and perform an unsigned compare with the ADDW
+ // result. Since the inputs are sign extended from i32, this is equivalent
+ // to comparing the lower 32 bits.
+ LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
+ Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
+ IsAdd ? ISD::SETULT : ISD::SETUGT);
+ }
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
Results.push_back(Overflow);
@@ -6421,6 +7009,33 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(expandAddSubSat(N, DAG));
return;
}
+ case ISD::ABS: {
+ assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
+ "Unexpected custom legalisation");
+
+ // Expand abs to Y = (sraiw X, 31); subw(xor(X, Y), Y)
+
+ SDValue Src = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
+
+ // Freeze the source so we can increase it's use count.
+ Src = DAG.getFreeze(Src);
+
+ // Copy sign bit to all bits using the sraiw pattern.
+ SDValue SignFill = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Src,
+ DAG.getValueType(MVT::i32));
+ SignFill = DAG.getNode(ISD::SRA, DL, MVT::i64, SignFill,
+ DAG.getConstant(31, DL, MVT::i64));
+
+ SDValue NewRes = DAG.getNode(ISD::XOR, DL, MVT::i64, Src, SignFill);
+ NewRes = DAG.getNode(ISD::SUB, DL, MVT::i64, NewRes, SignFill);
+
+ // NOTE: The result is only required to be anyextended, but sext is
+ // consistent with type legalization of sub.
+ NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewRes,
+ DAG.getValueType(MVT::i32));
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
+ return;
+ }
case ISD::BITCAST: {
EVT VT = N->getValueType(0);
assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
@@ -6451,37 +7066,24 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
break;
}
case RISCVISD::GREV:
- case RISCVISD::GORC: {
- assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
- "Unexpected custom legalisation");
- assert(isa<ConstantSDNode>(N->getOperand(1)) && "Expected constant");
- // This is similar to customLegalizeToWOp, except that we pass the second
- // operand (a TargetConstant) straight through: it is already of type
- // XLenVT.
- RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
- SDValue NewOp0 =
- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
- SDValue NewOp1 =
- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
- SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
- // ReplaceNodeResults requires we maintain the same type for the return
- // value.
- Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
- break;
- }
+ case RISCVISD::GORC:
case RISCVISD::SHFL: {
- // There is no SHFLIW instruction, but we can just promote the operation.
- assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
+ MVT VT = N->getSimpleValueType(0);
+ MVT XLenVT = Subtarget.getXLenVT();
+ assert((VT == MVT::i16 || (VT == MVT::i32 && Subtarget.is64Bit())) &&
"Unexpected custom legalisation");
assert(isa<ConstantSDNode>(N->getOperand(1)) && "Expected constant");
- SDValue NewOp0 =
- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
+ assert((Subtarget.hasStdExtZbp() ||
+ (Subtarget.hasStdExtZbkb() && N->getOpcode() == RISCVISD::GREV &&
+ N->getConstantOperandVal(1) == 7)) &&
+ "Unexpected extension");
+ SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, N->getOperand(0));
SDValue NewOp1 =
- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
- SDValue NewRes = DAG.getNode(RISCVISD::SHFL, DL, MVT::i64, NewOp0, NewOp1);
+ DAG.getNode(ISD::ZERO_EXTEND, DL, XLenVT, N->getOperand(1));
+ SDValue NewRes = DAG.getNode(N->getOpcode(), DL, XLenVT, NewOp0, NewOp1);
// ReplaceNodeResults requires we maintain the same type for the return
// value.
- Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
break;
}
case ISD::BSWAP:
@@ -6496,9 +7098,8 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
// If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
if (N->getOpcode() == ISD::BSWAP)
Imm &= ~0x7U;
- unsigned Opc = Subtarget.is64Bit() ? RISCVISD::GREVW : RISCVISD::GREV;
- SDValue GREVI =
- DAG.getNode(Opc, DL, XLenVT, NewOp0, DAG.getConstant(Imm, DL, XLenVT));
+ SDValue GREVI = DAG.getNode(RISCVISD::GREV, DL, XLenVT, NewOp0,
+ DAG.getConstant(Imm, DL, XLenVT));
// ReplaceNodeResults requires we maintain the same type for the return
// value.
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, GREVI));
@@ -6564,9 +7165,8 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
MVT XLenVT = Subtarget.getXLenVT();
// Use a VL of 1 to avoid processing more elements than we need.
- MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
SDValue VL = DAG.getConstant(1, DL, XLenVT);
- SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
+ SDValue Mask = getAllOnesMask(ContainerVT, VL, DL, DAG);
// Unless the index is known to be 0, we must slide the vector down to get
// the desired element into index 0.
@@ -6581,6 +7181,7 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
// To extract the upper XLEN bits of the vector element, shift the first
// element right by 32 bits and re-extract the lower XLEN bits.
SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
+ DAG.getUNDEF(ContainerVT),
DAG.getConstant(32, DL, XLenVT), VL);
SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec,
ThirtyTwoV, Mask, VL);
@@ -6597,38 +7198,42 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
llvm_unreachable(
"Don't know how to custom type legalize this intrinsic!");
case Intrinsic::riscv_grev:
- case Intrinsic::riscv_gorc:
- case Intrinsic::riscv_bcompress:
- case Intrinsic::riscv_bdecompress:
- case Intrinsic::riscv_bfp: {
+ case Intrinsic::riscv_gorc: {
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
- Results.push_back(customLegalizeToWOpByIntr(N, DAG, IntNo));
+ SDValue NewOp1 =
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
+ SDValue NewOp2 =
+ DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
+ unsigned Opc =
+ IntNo == Intrinsic::riscv_grev ? RISCVISD::GREVW : RISCVISD::GORCW;
+ // If the control is a constant, promote the node by clearing any extra
+ // bits bits in the control. isel will form greviw/gorciw if the result is
+ // sign extended.
+ if (isa<ConstantSDNode>(NewOp2)) {
+ NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2,
+ DAG.getConstant(0x1f, DL, MVT::i64));
+ Opc = IntNo == Intrinsic::riscv_grev ? RISCVISD::GREV : RISCVISD::GORC;
+ }
+ SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp1, NewOp2);
+ Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
break;
}
+ case Intrinsic::riscv_bcompress:
+ case Intrinsic::riscv_bdecompress:
+ case Intrinsic::riscv_bfp:
case Intrinsic::riscv_fsl:
case Intrinsic::riscv_fsr: {
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
- SDValue NewOp1 =
- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
- SDValue NewOp2 =
- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
- SDValue NewOp3 =
- DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(3));
- unsigned Opc = getRISCVWOpcodeByIntr(IntNo);
- SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp1, NewOp2, NewOp3);
- Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
+ Results.push_back(customLegalizeToWOpByIntr(N, DAG, IntNo));
break;
}
case Intrinsic::riscv_orc_b: {
// Lower to the GORCI encoding for orc.b with the operand extended.
SDValue NewOp =
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
- // If Zbp is enabled, use GORCIW which will sign extend the result.
- unsigned Opc =
- Subtarget.hasStdExtZbp() ? RISCVISD::GORCW : RISCVISD::GORC;
- SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp,
+ SDValue Res = DAG.getNode(RISCVISD::GORC, DL, MVT::i64, NewOp,
DAG.getConstant(7, DL, MVT::i64));
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
return;
@@ -6681,10 +7286,11 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
// To extract the upper XLEN bits of the vector element, shift the first
// element right by 32 bits and re-extract the lower XLEN bits.
SDValue VL = DAG.getConstant(1, DL, XLenVT);
- MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount());
- SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
- SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT,
- DAG.getConstant(32, DL, XLenVT), VL);
+ SDValue Mask = getAllOnesMask(VecVT, VL, DL, DAG);
+
+ SDValue ThirtyTwoV =
+ DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT, DAG.getUNDEF(VecVT),
+ DAG.getConstant(32, DL, XLenVT), VL);
SDValue LShr32 =
DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV, Mask, VL);
SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
@@ -6840,6 +7446,110 @@ static Optional<RISCVBitmanipPat> matchGREVIPat(SDValue Op) {
return matchRISCVBitmanipPat(Op, BitmanipMasks);
}
+// Try to fold (<bop> x, (reduction.<bop> vec, start))
+static SDValue combineBinOpToReduce(SDNode *N, SelectionDAG &DAG) {
+ auto BinOpToRVVReduce = [](unsigned Opc) {
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unhandled binary to transfrom reduction");
+ case ISD::ADD:
+ return RISCVISD::VECREDUCE_ADD_VL;
+ case ISD::UMAX:
+ return RISCVISD::VECREDUCE_UMAX_VL;
+ case ISD::SMAX:
+ return RISCVISD::VECREDUCE_SMAX_VL;
+ case ISD::UMIN:
+ return RISCVISD::VECREDUCE_UMIN_VL;
+ case ISD::SMIN:
+ return RISCVISD::VECREDUCE_SMIN_VL;
+ case ISD::AND:
+ return RISCVISD::VECREDUCE_AND_VL;
+ case ISD::OR:
+ return RISCVISD::VECREDUCE_OR_VL;
+ case ISD::XOR:
+ return RISCVISD::VECREDUCE_XOR_VL;
+ case ISD::FADD:
+ return RISCVISD::VECREDUCE_FADD_VL;
+ case ISD::FMAXNUM:
+ return RISCVISD::VECREDUCE_FMAX_VL;
+ case ISD::FMINNUM:
+ return RISCVISD::VECREDUCE_FMIN_VL;
+ }
+ };
+
+ auto IsReduction = [&BinOpToRVVReduce](SDValue V, unsigned Opc) {
+ return V.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ isNullConstant(V.getOperand(1)) &&
+ V.getOperand(0).getOpcode() == BinOpToRVVReduce(Opc);
+ };
+
+ unsigned Opc = N->getOpcode();
+ unsigned ReduceIdx;
+ if (IsReduction(N->getOperand(0), Opc))
+ ReduceIdx = 0;
+ else if (IsReduction(N->getOperand(1), Opc))
+ ReduceIdx = 1;
+ else
+ return SDValue();
+
+ // Skip if FADD disallows reassociation but the combiner needs.
+ if (Opc == ISD::FADD && !N->getFlags().hasAllowReassociation())
+ return SDValue();
+
+ SDValue Extract = N->getOperand(ReduceIdx);
+ SDValue Reduce = Extract.getOperand(0);
+ if (!Reduce.hasOneUse())
+ return SDValue();
+
+ SDValue ScalarV = Reduce.getOperand(2);
+
+ // Make sure that ScalarV is a splat with VL=1.
+ if (ScalarV.getOpcode() != RISCVISD::VFMV_S_F_VL &&
+ ScalarV.getOpcode() != RISCVISD::VMV_S_X_VL &&
+ ScalarV.getOpcode() != RISCVISD::VMV_V_X_VL)
+ return SDValue();
+
+ if (!isOneConstant(ScalarV.getOperand(2)))
+ return SDValue();
+
+ // TODO: Deal with value other than neutral element.
+ auto IsRVVNeutralElement = [Opc, &DAG](SDNode *N, SDValue V) {
+ if (Opc == ISD::FADD && N->getFlags().hasNoSignedZeros() &&
+ isNullFPConstant(V))
+ return true;
+ return DAG.getNeutralElement(Opc, SDLoc(V), V.getSimpleValueType(),
+ N->getFlags()) == V;
+ };
+
+ // Check the scalar of ScalarV is neutral element
+ if (!IsRVVNeutralElement(N, ScalarV.getOperand(1)))
+ return SDValue();
+
+ if (!ScalarV.hasOneUse())
+ return SDValue();
+
+ EVT SplatVT = ScalarV.getValueType();
+ SDValue NewStart = N->getOperand(1 - ReduceIdx);
+ unsigned SplatOpc = RISCVISD::VFMV_S_F_VL;
+ if (SplatVT.isInteger()) {
+ auto *C = dyn_cast<ConstantSDNode>(NewStart.getNode());
+ if (!C || C->isZero() || !isInt<5>(C->getSExtValue()))
+ SplatOpc = RISCVISD::VMV_S_X_VL;
+ else
+ SplatOpc = RISCVISD::VMV_V_X_VL;
+ }
+
+ SDValue NewScalarV =
+ DAG.getNode(SplatOpc, SDLoc(N), SplatVT, ScalarV.getOperand(0), NewStart,
+ ScalarV.getOperand(2));
+ SDValue NewReduce =
+ DAG.getNode(Reduce.getOpcode(), SDLoc(Reduce), Reduce.getValueType(),
+ Reduce.getOperand(0), Reduce.getOperand(1), NewScalarV,
+ Reduce.getOperand(3), Reduce.getOperand(4));
+ return DAG.getNode(Extract.getOpcode(), SDLoc(Extract),
+ Extract.getValueType(), NewReduce, Extract.getOperand(1));
+}
+
// Match the following pattern as a GREVI(W) operation
// (or (BITMANIP_SHL x), (BITMANIP_SRL x))
static SDValue combineORToGREV(SDValue Op, SelectionDAG &DAG,
@@ -7066,11 +7776,70 @@ static SDValue transformAddShlImm(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::SHL, DL, VT, NA1, DAG.getConstant(Bits, DL, VT));
}
+// Combine
+// ROTR ((GREVI x, 24), 16) -> (GREVI x, 8) for RV32
+// ROTL ((GREVI x, 24), 16) -> (GREVI x, 8) for RV32
+// ROTR ((GREVI x, 56), 32) -> (GREVI x, 24) for RV64
+// ROTL ((GREVI x, 56), 32) -> (GREVI x, 24) for RV64
+// RORW ((GREVI x, 24), 16) -> (GREVIW x, 8) for RV64
+// ROLW ((GREVI x, 24), 16) -> (GREVIW x, 8) for RV64
+// The grev patterns represents BSWAP.
+// FIXME: This can be generalized to any GREV. We just need to toggle the MSB
+// off the grev.
+static SDValue combineROTR_ROTL_RORW_ROLW(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ bool IsWInstruction =
+ N->getOpcode() == RISCVISD::RORW || N->getOpcode() == RISCVISD::ROLW;
+ assert((N->getOpcode() == ISD::ROTR || N->getOpcode() == ISD::ROTL ||
+ IsWInstruction) &&
+ "Unexpected opcode!");
+ SDValue Src = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ if (!Subtarget.hasStdExtZbp() || Src.getOpcode() != RISCVISD::GREV)
+ return SDValue();
+
+ if (!isa<ConstantSDNode>(N->getOperand(1)) ||
+ !isa<ConstantSDNode>(Src.getOperand(1)))
+ return SDValue();
+
+ unsigned BitWidth = IsWInstruction ? 32 : VT.getSizeInBits();
+ assert(isPowerOf2_32(BitWidth) && "Expected a power of 2");
+
+ // Needs to be a rotate by half the bitwidth for ROTR/ROTL or by 16 for
+ // RORW/ROLW. And the grev should be the encoding for bswap for this width.
+ unsigned ShAmt1 = N->getConstantOperandVal(1);
+ unsigned ShAmt2 = Src.getConstantOperandVal(1);
+ if (BitWidth < 32 || ShAmt1 != (BitWidth / 2) || ShAmt2 != (BitWidth - 8))
+ return SDValue();
+
+ Src = Src.getOperand(0);
+
+ // Toggle bit the MSB of the shift.
+ unsigned CombinedShAmt = ShAmt1 ^ ShAmt2;
+ if (CombinedShAmt == 0)
+ return Src;
+
+ SDValue Res = DAG.getNode(
+ RISCVISD::GREV, DL, VT, Src,
+ DAG.getConstant(CombinedShAmt, DL, N->getOperand(1).getValueType()));
+ if (!IsWInstruction)
+ return Res;
+
+ // Sign extend the result to match the behavior of the rotate. This will be
+ // selected to GREVIW in isel.
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Res,
+ DAG.getValueType(MVT::i32));
+}
+
// Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is
// non-zero, and to x when it is. Any repeated GREVI stage undoes itself.
// Combine (GORCI (GORCI x, C2), C1) -> (GORCI x, C1|C2). Repeated stage does
// not undo itself, but they are redundant.
static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) {
+ bool IsGORC = N->getOpcode() == RISCVISD::GORC;
+ assert((IsGORC || N->getOpcode() == RISCVISD::GREV) && "Unexpected opcode");
SDValue Src = N->getOperand(0);
if (Src.getOpcode() != N->getOpcode())
@@ -7085,7 +7854,7 @@ static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) {
Src = Src.getOperand(0);
unsigned CombinedShAmt;
- if (N->getOpcode() == RISCVISD::GORC || N->getOpcode() == RISCVISD::GORCW)
+ if (IsGORC)
CombinedShAmt = ShAmt1 | ShAmt2;
else
CombinedShAmt = ShAmt1 ^ ShAmt2;
@@ -7203,6 +7972,11 @@ static SDValue transformAddImmMulImm(SDNode *N, SelectionDAG &DAG,
auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (!N0C || !N1C)
return SDValue();
+ // If N0C has multiple uses it's possible one of the cases in
+ // DAGCombiner::isMulAddWithConstProfitable will be true, which would result
+ // in an infinite loop.
+ if (!N0C->hasOneUse())
+ return SDValue();
int64_t C0 = N0C->getSExtValue();
int64_t C1 = N1C->getSExtValue();
int64_t CA, CB;
@@ -7238,6 +8012,8 @@ static SDValue performADDCombine(SDNode *N, SelectionDAG &DAG,
return V;
if (SDValue V = transformAddShlImm(N, DAG, Subtarget))
return V;
+ if (SDValue V = combineBinOpToReduce(N, DAG))
+ return V;
// fold (add (select lhs, rhs, cc, 0, y), x) ->
// (select lhs, rhs, cc, x, (add x, y))
return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false);
@@ -7251,7 +8027,30 @@ static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG) {
return combineSelectAndUse(N, N1, N0, DAG, /*AllOnes*/ false);
}
-static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ SDValue N0 = N->getOperand(0);
+ // Pre-promote (i32 (and (srl X, Y), 1)) on RV64 with Zbs without zero
+ // extending X. This is safe since we only need the LSB after the shift and
+ // shift amounts larger than 31 would produce poison. If we wait until
+ // type legalization, we'll create RISCVISD::SRLW and we can't recover it
+ // to use a BEXT instruction.
+ if (Subtarget.is64Bit() && Subtarget.hasStdExtZbs() &&
+ N->getValueType(0) == MVT::i32 && isOneConstant(N->getOperand(1)) &&
+ N0.getOpcode() == ISD::SRL && !isa<ConstantSDNode>(N0.getOperand(1)) &&
+ N0.hasOneUse()) {
+ SDLoc DL(N);
+ SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
+ SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
+ SDValue Srl = DAG.getNode(ISD::SRL, DL, MVT::i64, Op0, Op1);
+ SDValue And = DAG.getNode(ISD::AND, DL, MVT::i64, Srl,
+ DAG.getConstant(1, DL, MVT::i64));
+ return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
+ }
+
+ if (SDValue V = combineBinOpToReduce(N, DAG))
+ return V;
+
// fold (and (select lhs, rhs, cc, -1, y), x) ->
// (select lhs, rhs, cc, x, (and x, y))
return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ true);
@@ -7268,99 +8067,197 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
return SHFL;
}
+ if (SDValue V = combineBinOpToReduce(N, DAG))
+ return V;
// fold (or (select cond, 0, y), x) ->
// (select cond, x, (or x, y))
return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false);
}
static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
+ // NOTE: Assumes ROL being legal means ROLW is legal.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (N0.getOpcode() == RISCVISD::SLLW &&
+ isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0)) &&
+ TLI.isOperationLegal(ISD::ROTL, MVT::i64)) {
+ SDLoc DL(N);
+ return DAG.getNode(RISCVISD::ROLW, DL, MVT::i64,
+ DAG.getConstant(~1, DL, MVT::i64), N0.getOperand(1));
+ }
+
+ if (SDValue V = combineBinOpToReduce(N, DAG))
+ return V;
// fold (xor (select cond, 0, y), x) ->
// (select cond, x, (xor x, y))
return combineSelectAndUseCommutative(N, DAG, /*AllOnes*/ false);
}
-// Attempt to turn ANY_EXTEND into SIGN_EXTEND if the input to the ANY_EXTEND
-// has users that require SIGN_EXTEND and the SIGN_EXTEND can be done for free
-// by an instruction like ADDW/SUBW/MULW. Without this the ANY_EXTEND would be
-// removed during type legalization leaving an ADD/SUB/MUL use that won't use
-// ADDW/SUBW/MULW.
-static SDValue performANY_EXTENDCombine(SDNode *N,
- TargetLowering::DAGCombinerInfo &DCI,
- const RISCVSubtarget &Subtarget) {
- if (!Subtarget.is64Bit())
- return SDValue();
-
- SelectionDAG &DAG = DCI.DAG;
-
+static SDValue
+performSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
SDValue Src = N->getOperand(0);
EVT VT = N->getValueType(0);
- if (VT != MVT::i64 || Src.getValueType() != MVT::i32)
- return SDValue();
- // The opcode must be one that can implicitly sign_extend.
- // FIXME: Additional opcodes.
- switch (Src.getOpcode()) {
- default:
- return SDValue();
- case ISD::MUL:
- if (!Subtarget.hasStdExtM())
- return SDValue();
- LLVM_FALLTHROUGH;
- case ISD::ADD:
- case ISD::SUB:
- break;
+ // Fold (sext_inreg (fmv_x_anyexth X), i16) -> (fmv_x_signexth X)
+ if (Src.getOpcode() == RISCVISD::FMV_X_ANYEXTH &&
+ cast<VTSDNode>(N->getOperand(1))->getVT().bitsGE(MVT::i16))
+ return DAG.getNode(RISCVISD::FMV_X_SIGNEXTH, SDLoc(N), VT,
+ Src.getOperand(0));
+
+ // Fold (i64 (sext_inreg (abs X), i32)) ->
+ // (i64 (smax (sext_inreg (neg X), i32), X)) if X has more than 32 sign bits.
+ // The (sext_inreg (neg X), i32) will be selected to negw by isel. This
+ // pattern occurs after type legalization of (i32 (abs X)) on RV64 if the user
+ // of the (i32 (abs X)) is a sext or setcc or something else that causes type
+ // legalization to add a sext_inreg after the abs. The (i32 (abs X)) will have
+ // been type legalized to (i64 (abs (sext_inreg X, i32))), but the sext_inreg
+ // may get combined into an earlier operation so we need to use
+ // ComputeNumSignBits.
+ // NOTE: (i64 (sext_inreg (abs X), i32)) can also be created for
+ // (i64 (ashr (shl (abs X), 32), 32)) without any type legalization so
+ // we can't assume that X has 33 sign bits. We must check.
+ if (Subtarget.hasStdExtZbb() && Subtarget.is64Bit() &&
+ Src.getOpcode() == ISD::ABS && Src.hasOneUse() && VT == MVT::i64 &&
+ cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32 &&
+ DAG.ComputeNumSignBits(Src.getOperand(0)) > 32) {
+ SDLoc DL(N);
+ SDValue Freeze = DAG.getFreeze(Src.getOperand(0));
+ SDValue Neg =
+ DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, MVT::i64), Freeze);
+ Neg = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Neg,
+ DAG.getValueType(MVT::i32));
+ return DAG.getNode(ISD::SMAX, DL, MVT::i64, Freeze, Neg);
}
- // Only handle cases where the result is used by a CopyToReg. That likely
- // means the value is a liveout of the basic block. This helps prevent
- // infinite combine loops like PR51206.
- if (none_of(N->uses(),
- [](SDNode *User) { return User->getOpcode() == ISD::CopyToReg; }))
- return SDValue();
+ return SDValue();
+}
- SmallVector<SDNode *, 4> SetCCs;
- for (SDNode::use_iterator UI = Src.getNode()->use_begin(),
- UE = Src.getNode()->use_end();
- UI != UE; ++UI) {
- SDNode *User = *UI;
- if (User == N)
- continue;
- if (UI.getUse().getResNo() != Src.getResNo())
- continue;
- // All i32 setccs are legalized by sign extending operands.
- if (User->getOpcode() == ISD::SETCC) {
- SetCCs.push_back(User);
- continue;
- }
- // We don't know if we can extend this user.
- break;
+// Try to form vwadd(u).wv/wx or vwsub(u).wv/wx. It might later be optimized to
+// vwadd(u).vv/vx or vwsub(u).vv/vx.
+static SDValue combineADDSUB_VLToVWADDSUB_VL(SDNode *N, SelectionDAG &DAG,
+ bool Commute = false) {
+ assert((N->getOpcode() == RISCVISD::ADD_VL ||
+ N->getOpcode() == RISCVISD::SUB_VL) &&
+ "Unexpected opcode");
+ bool IsAdd = N->getOpcode() == RISCVISD::ADD_VL;
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ if (Commute)
+ std::swap(Op0, Op1);
+
+ MVT VT = N->getSimpleValueType(0);
+
+ // Determine the narrow size for a widening add/sub.
+ unsigned NarrowSize = VT.getScalarSizeInBits() / 2;
+ MVT NarrowVT = MVT::getVectorVT(MVT::getIntegerVT(NarrowSize),
+ VT.getVectorElementCount());
+
+ SDValue Mask = N->getOperand(2);
+ SDValue VL = N->getOperand(3);
+
+ SDLoc DL(N);
+
+ // If the RHS is a sext or zext, we can form a widening op.
+ if ((Op1.getOpcode() == RISCVISD::VZEXT_VL ||
+ Op1.getOpcode() == RISCVISD::VSEXT_VL) &&
+ Op1.hasOneUse() && Op1.getOperand(1) == Mask && Op1.getOperand(2) == VL) {
+ unsigned ExtOpc = Op1.getOpcode();
+ Op1 = Op1.getOperand(0);
+ // Re-introduce narrower extends if needed.
+ if (Op1.getValueType() != NarrowVT)
+ Op1 = DAG.getNode(ExtOpc, DL, NarrowVT, Op1, Mask, VL);
+
+ unsigned WOpc;
+ if (ExtOpc == RISCVISD::VSEXT_VL)
+ WOpc = IsAdd ? RISCVISD::VWADD_W_VL : RISCVISD::VWSUB_W_VL;
+ else
+ WOpc = IsAdd ? RISCVISD::VWADDU_W_VL : RISCVISD::VWSUBU_W_VL;
+
+ return DAG.getNode(WOpc, DL, VT, Op0, Op1, Mask, VL);
}
- // If we don't have any SetCCs, this isn't worthwhile.
- if (SetCCs.empty())
- return SDValue();
+ // FIXME: Is it useful to form a vwadd.wx or vwsub.wx if it removes a scalar
+ // sext/zext?
+
+ return SDValue();
+}
+
+// Try to convert vwadd(u).wv/wx or vwsub(u).wv/wx to vwadd(u).vv/vx or
+// vwsub(u).vv/vx.
+static SDValue combineVWADD_W_VL_VWSUB_W_VL(SDNode *N, SelectionDAG &DAG) {
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ SDValue Mask = N->getOperand(2);
+ SDValue VL = N->getOperand(3);
+
+ MVT VT = N->getSimpleValueType(0);
+ MVT NarrowVT = Op1.getSimpleValueType();
+ unsigned NarrowSize = NarrowVT.getScalarSizeInBits();
+
+ unsigned VOpc;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Unexpected opcode");
+ case RISCVISD::VWADD_W_VL: VOpc = RISCVISD::VWADD_VL; break;
+ case RISCVISD::VWSUB_W_VL: VOpc = RISCVISD::VWSUB_VL; break;
+ case RISCVISD::VWADDU_W_VL: VOpc = RISCVISD::VWADDU_VL; break;
+ case RISCVISD::VWSUBU_W_VL: VOpc = RISCVISD::VWSUBU_VL; break;
+ }
+
+ bool IsSigned = N->getOpcode() == RISCVISD::VWADD_W_VL ||
+ N->getOpcode() == RISCVISD::VWSUB_W_VL;
SDLoc DL(N);
- SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Src);
- DCI.CombineTo(N, SExt);
- // Promote all the setccs.
- for (SDNode *SetCC : SetCCs) {
- SmallVector<SDValue, 4> Ops;
+ // If the LHS is a sext or zext, we can narrow this op to the same size as
+ // the RHS.
+ if (((Op0.getOpcode() == RISCVISD::VZEXT_VL && !IsSigned) ||
+ (Op0.getOpcode() == RISCVISD::VSEXT_VL && IsSigned)) &&
+ Op0.hasOneUse() && Op0.getOperand(1) == Mask && Op0.getOperand(2) == VL) {
+ unsigned ExtOpc = Op0.getOpcode();
+ Op0 = Op0.getOperand(0);
+ // Re-introduce narrower extends if needed.
+ if (Op0.getValueType() != NarrowVT)
+ Op0 = DAG.getNode(ExtOpc, DL, NarrowVT, Op0, Mask, VL);
+ return DAG.getNode(VOpc, DL, VT, Op0, Op1, Mask, VL);
+ }
- for (unsigned j = 0; j != 2; ++j) {
- SDValue SOp = SetCC->getOperand(j);
- if (SOp == Src)
- Ops.push_back(SExt);
- else
- Ops.push_back(DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, SOp));
+ bool IsAdd = N->getOpcode() == RISCVISD::VWADD_W_VL ||
+ N->getOpcode() == RISCVISD::VWADDU_W_VL;
+
+ // Look for splats on the left hand side of a vwadd(u).wv. We might be able
+ // to commute and use a vwadd(u).vx instead.
+ if (IsAdd && Op0.getOpcode() == RISCVISD::VMV_V_X_VL &&
+ Op0.getOperand(0).isUndef() && Op0.getOperand(2) == VL) {
+ Op0 = Op0.getOperand(1);
+
+ // See if have enough sign bits or zero bits in the scalar to use a
+ // widening add/sub by splatting to smaller element size.
+ unsigned EltBits = VT.getScalarSizeInBits();
+ unsigned ScalarBits = Op0.getValueSizeInBits();
+ // Make sure we're getting all element bits from the scalar register.
+ // FIXME: Support implicit sign extension of vmv.v.x?
+ if (ScalarBits < EltBits)
+ return SDValue();
+
+ if (IsSigned) {
+ if (DAG.ComputeNumSignBits(Op0) <= (ScalarBits - NarrowSize))
+ return SDValue();
+ } else {
+ APInt Mask = APInt::getBitsSetFrom(ScalarBits, NarrowSize);
+ if (!DAG.MaskedValueIsZero(Op0, Mask))
+ return SDValue();
}
- Ops.push_back(SetCC->getOperand(2));
- DCI.CombineTo(SetCC,
- DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
+ Op0 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
+ DAG.getUNDEF(NarrowVT), Op0, VL);
+ return DAG.getNode(VOpc, DL, VT, Op1, Op0, Mask, VL);
}
- return SDValue(N, 0);
+
+ return SDValue();
}
// Try to form VWMUL, VWMULU or VWMULSU.
@@ -7408,12 +8305,15 @@ static SDValue combineMUL_VLToVWMUL_VL(SDNode *N, SelectionDAG &DAG,
} else if (Op1.getOpcode() == RISCVISD::VMV_V_X_VL) {
// The operand is a splat of a scalar.
+ // The pasthru must be undef for tail agnostic
+ if (!Op1.getOperand(0).isUndef())
+ return SDValue();
// The VL must be the same.
- if (Op1.getOperand(1) != VL)
+ if (Op1.getOperand(2) != VL)
return SDValue();
// Get the scalar value.
- Op1 = Op1.getOperand(0);
+ Op1 = Op1.getOperand(1);
// See if have enough sign bits or zero bits in the scalar to use a
// widening multiply by splatting to smaller element size.
@@ -7424,16 +8324,20 @@ static SDValue combineMUL_VLToVWMUL_VL(SDNode *N, SelectionDAG &DAG,
if (ScalarBits < EltBits)
return SDValue();
- if (IsSignExt) {
- if (DAG.ComputeNumSignBits(Op1) <= (ScalarBits - NarrowSize))
- return SDValue();
+ // If the LHS is a sign extend, try to use vwmul.
+ if (IsSignExt && DAG.ComputeNumSignBits(Op1) > (ScalarBits - NarrowSize)) {
+ // Can use vwmul.
} else {
+ // Otherwise try to use vwmulu or vwmulsu.
APInt Mask = APInt::getBitsSetFrom(ScalarBits, NarrowSize);
- if (!DAG.MaskedValueIsZero(Op1, Mask))
+ if (DAG.MaskedValueIsZero(Op1, Mask))
+ IsVWMULSU = IsSignExt;
+ else
return SDValue();
}
- Op1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT, Op1, VL);
+ Op1 = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, NarrowVT,
+ DAG.getUNDEF(NarrowVT), Op1, VL);
} else
return SDValue();
@@ -7443,6 +8347,8 @@ static SDValue combineMUL_VLToVWMUL_VL(SDNode *N, SelectionDAG &DAG,
unsigned ExtOpc = IsSignExt ? RISCVISD::VSEXT_VL : RISCVISD::VZEXT_VL;
if (Op0.getValueType() != NarrowVT)
Op0 = DAG.getNode(ExtOpc, DL, NarrowVT, Op0, Mask, VL);
+ // vwmulsu requires second operand to be zero extended.
+ ExtOpc = IsVWMULSU ? RISCVISD::VZEXT_VL : ExtOpc;
if (Op1.getValueType() != NarrowVT)
Op1 = DAG.getNode(ExtOpc, DL, NarrowVT, Op1, Mask, VL);
@@ -7569,6 +8475,133 @@ static SDValue performFP_TO_INT_SATCombine(SDNode *N,
return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
}
+// Combine (bitreverse (bswap X)) to the BREV8 GREVI encoding if the type is
+// smaller than XLenVT.
+static SDValue performBITREVERSECombine(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ assert(Subtarget.hasStdExtZbkb() && "Unexpected extension");
+
+ SDValue Src = N->getOperand(0);
+ if (Src.getOpcode() != ISD::BSWAP)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (!VT.isScalarInteger() || VT.getSizeInBits() >= Subtarget.getXLen() ||
+ !isPowerOf2_32(VT.getSizeInBits()))
+ return SDValue();
+
+ SDLoc DL(N);
+ return DAG.getNode(RISCVISD::GREV, DL, VT, Src.getOperand(0),
+ DAG.getConstant(7, DL, VT));
+}
+
+// Convert from one FMA opcode to another based on whether we are negating the
+// multiply result and/or the accumulator.
+// NOTE: Only supports RVV operations with VL.
+static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc) {
+ assert((NegMul || NegAcc) && "Not negating anything?");
+
+ // Negating the multiply result changes ADD<->SUB and toggles 'N'.
+ if (NegMul) {
+ // clang-format off
+ switch (Opcode) {
+ default: llvm_unreachable("Unexpected opcode");
+ case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
+ case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
+ case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
+ case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
+ }
+ // clang-format on
+ }
+
+ // Negating the accumulator changes ADD<->SUB.
+ if (NegAcc) {
+ // clang-format off
+ switch (Opcode) {
+ default: llvm_unreachable("Unexpected opcode");
+ case RISCVISD::VFMADD_VL: Opcode = RISCVISD::VFMSUB_VL; break;
+ case RISCVISD::VFMSUB_VL: Opcode = RISCVISD::VFMADD_VL; break;
+ case RISCVISD::VFNMADD_VL: Opcode = RISCVISD::VFNMSUB_VL; break;
+ case RISCVISD::VFNMSUB_VL: Opcode = RISCVISD::VFNMADD_VL; break;
+ }
+ // clang-format on
+ }
+
+ return Opcode;
+}
+
+// Combine (sra (shl X, 32), 32 - C) -> (shl (sext_inreg X, i32), C)
+// FIXME: Should this be a generic combine? There's a similar combine on X86.
+//
+// Also try these folds where an add or sub is in the middle.
+// (sra (add (shl X, 32), C1), 32 - C) -> (shl (sext_inreg (add X, C1), C)
+// (sra (sub C1, (shl X, 32)), 32 - C) -> (shl (sext_inreg (sub C1, X), C)
+static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ assert(N->getOpcode() == ISD::SRA && "Unexpected opcode");
+
+ if (N->getValueType(0) != MVT::i64 || !Subtarget.is64Bit())
+ return SDValue();
+
+ auto *ShAmtC = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!ShAmtC || ShAmtC->getZExtValue() > 32)
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0);
+
+ SDValue Shl;
+ ConstantSDNode *AddC = nullptr;
+
+ // We might have an ADD or SUB between the SRA and SHL.
+ bool IsAdd = N0.getOpcode() == ISD::ADD;
+ if ((IsAdd || N0.getOpcode() == ISD::SUB)) {
+ if (!N0.hasOneUse())
+ return SDValue();
+ // Other operand needs to be a constant we can modify.
+ AddC = dyn_cast<ConstantSDNode>(N0.getOperand(IsAdd ? 1 : 0));
+ if (!AddC)
+ return SDValue();
+
+ // AddC needs to have at least 32 trailing zeros.
+ if (AddC->getAPIntValue().countTrailingZeros() < 32)
+ return SDValue();
+
+ Shl = N0.getOperand(IsAdd ? 0 : 1);
+ } else {
+ // Not an ADD or SUB.
+ Shl = N0;
+ }
+
+ // Look for a shift left by 32.
+ if (Shl.getOpcode() != ISD::SHL || !Shl.hasOneUse() ||
+ !isa<ConstantSDNode>(Shl.getOperand(1)) ||
+ Shl.getConstantOperandVal(1) != 32)
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue In = Shl.getOperand(0);
+
+ // If we looked through an ADD or SUB, we need to rebuild it with the shifted
+ // constant.
+ if (AddC) {
+ SDValue ShiftedAddC =
+ DAG.getConstant(AddC->getAPIntValue().lshr(32), DL, MVT::i64);
+ if (IsAdd)
+ In = DAG.getNode(ISD::ADD, DL, MVT::i64, In, ShiftedAddC);
+ else
+ In = DAG.getNode(ISD::SUB, DL, MVT::i64, ShiftedAddC, In);
+ }
+
+ SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, In,
+ DAG.getValueType(MVT::i32));
+ if (ShAmtC->getZExtValue() == 32)
+ return SExt;
+
+ return DAG.getNode(
+ ISD::SHL, DL, MVT::i64, SExt,
+ DAG.getConstant(32 - ShAmtC->getZExtValue(), DL, MVT::i64));
+}
+
SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -7597,6 +8630,12 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
if (Op0->getOpcode() == RISCVISD::BuildPairF64)
return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
+ if (Op0->isUndef()) {
+ SDValue Lo = DAG.getUNDEF(MVT::i32);
+ SDValue Hi = DAG.getUNDEF(MVT::i32);
+ return DCI.CombineTo(N, Lo, Hi);
+ }
+
SDLoc DL(N);
// It's cheaper to materialise two 32-bit integers than to load a double
@@ -7634,15 +8673,27 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
}
case RISCVISD::SLLW:
case RISCVISD::SRAW:
- case RISCVISD::SRLW:
- case RISCVISD::ROLW:
- case RISCVISD::RORW: {
+ case RISCVISD::SRLW: {
// Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
if (SimplifyDemandedLowBitsHelper(0, 32) ||
SimplifyDemandedLowBitsHelper(1, 5))
return SDValue(N, 0);
+
break;
}
+ case ISD::ROTR:
+ case ISD::ROTL:
+ case RISCVISD::RORW:
+ case RISCVISD::ROLW: {
+ if (N->getOpcode() == RISCVISD::RORW || N->getOpcode() == RISCVISD::ROLW) {
+ // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
+ if (SimplifyDemandedLowBitsHelper(0, 32) ||
+ SimplifyDemandedLowBitsHelper(1, 5))
+ return SDValue(N, 0);
+ }
+
+ return combineROTR_ROTL_RORW_ROLW(N, DAG, Subtarget);
+ }
case RISCVISD::CLZW:
case RISCVISD::CTZW: {
// Only the lower 32 bits of the first operand are read
@@ -7667,7 +8718,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
SimplifyDemandedLowBitsHelper(1, 5))
return SDValue(N, 0);
- return combineGREVI_GORCI(N, DAG);
+ break;
}
case RISCVISD::SHFL:
case RISCVISD::UNSHFL: {
@@ -7682,10 +8733,6 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
case RISCVISD::SHFLW:
case RISCVISD::UNSHFLW: {
// Only the lower 32 bits of LHS and lower 4 bits of RHS are read.
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
- APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
- APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 4);
if (SimplifyDemandedLowBitsHelper(0, 32) ||
SimplifyDemandedLowBitsHelper(1, 4))
return SDValue(N, 0);
@@ -7701,6 +8748,21 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
break;
}
+ case RISCVISD::FSR:
+ case RISCVISD::FSL:
+ case RISCVISD::FSRW:
+ case RISCVISD::FSLW: {
+ bool IsWInstruction =
+ N->getOpcode() == RISCVISD::FSRW || N->getOpcode() == RISCVISD::FSLW;
+ unsigned BitWidth =
+ IsWInstruction ? 32 : N->getSimpleValueType(0).getSizeInBits();
+ assert(isPowerOf2_32(BitWidth) && "Unexpected bit width");
+ // Only the lower log2(Bitwidth)+1 bits of the the shift amount are read.
+ if (SimplifyDemandedLowBitsHelper(1, Log2_32(BitWidth) + 1))
+ return SDValue(N, 0);
+
+ break;
+ }
case RISCVISD::FMV_X_ANYEXTH:
case RISCVISD::FMV_X_ANYEXTW_RV64: {
SDLoc DL(N);
@@ -7727,7 +8789,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
break;
SDValue NewFMV = DAG.getNode(N->getOpcode(), DL, VT, Op0.getOperand(0));
unsigned FPBits = N->getOpcode() == RISCVISD::FMV_X_ANYEXTW_RV64 ? 32 : 16;
- APInt SignBit = APInt::getSignMask(FPBits).sextOrSelf(VT.getSizeInBits());
+ APInt SignBit = APInt::getSignMask(FPBits).sext(VT.getSizeInBits());
if (Op0.getOpcode() == ISD::FNEG)
return DAG.getNode(ISD::XOR, DL, VT, NewFMV,
DAG.getConstant(SignBit, DL, VT));
@@ -7741,13 +8803,21 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SUB:
return performSUBCombine(N, DAG);
case ISD::AND:
- return performANDCombine(N, DAG);
+ return performANDCombine(N, DAG, Subtarget);
case ISD::OR:
return performORCombine(N, DAG, Subtarget);
case ISD::XOR:
return performXORCombine(N, DAG);
- case ISD::ANY_EXTEND:
- return performANY_EXTENDCombine(N, DCI, Subtarget);
+ case ISD::FADD:
+ case ISD::UMAX:
+ case ISD::UMIN:
+ case ISD::SMAX:
+ case ISD::SMIN:
+ case ISD::FMAXNUM:
+ case ISD::FMINNUM:
+ return combineBinOpToReduce(N, DAG);
+ case ISD::SIGN_EXTEND_INREG:
+ return performSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
case ISD::ZERO_EXTEND:
// Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during
// type legalization. This is safe because fp_to_uint produces poison if
@@ -7879,6 +8949,8 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
}
break;
}
+ case ISD::BITREVERSE:
+ return performBITREVERSECombine(N, DAG, Subtarget);
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
return performFP_TO_INTCombine(N, DCI, Subtarget);
@@ -7952,40 +9024,41 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
DL, IndexVT, Index);
}
- unsigned Scale = cast<ConstantSDNode>(ScaleOp)->getZExtValue();
- if (IsIndexScaled && Scale != 1) {
- // Manually scale the indices by the element size.
+ if (IsIndexScaled) {
+ // Manually scale the indices.
// TODO: Sanitize the scale operand here?
// TODO: For VP nodes, should we use VP_SHL here?
+ unsigned Scale = cast<ConstantSDNode>(ScaleOp)->getZExtValue();
assert(isPowerOf2_32(Scale) && "Expecting power-of-two types");
SDValue SplatScale = DAG.getConstant(Log2_32(Scale), DL, IndexVT);
Index = DAG.getNode(ISD::SHL, DL, IndexVT, Index, SplatScale);
+ ScaleOp = DAG.getTargetConstant(1, DL, ScaleOp.getValueType());
}
- ISD::MemIndexType NewIndexTy = ISD::UNSIGNED_UNSCALED;
+ ISD::MemIndexType NewIndexTy = ISD::UNSIGNED_SCALED;
if (const auto *VPGN = dyn_cast<VPGatherSDNode>(N))
return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
{VPGN->getChain(), VPGN->getBasePtr(), Index,
- VPGN->getScale(), VPGN->getMask(),
+ ScaleOp, VPGN->getMask(),
VPGN->getVectorLength()},
VPGN->getMemOperand(), NewIndexTy);
if (const auto *VPSN = dyn_cast<VPScatterSDNode>(N))
return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
{VPSN->getChain(), VPSN->getValue(),
- VPSN->getBasePtr(), Index, VPSN->getScale(),
+ VPSN->getBasePtr(), Index, ScaleOp,
VPSN->getMask(), VPSN->getVectorLength()},
VPSN->getMemOperand(), NewIndexTy);
if (const auto *MGN = dyn_cast<MaskedGatherSDNode>(N))
return DAG.getMaskedGather(
N->getVTList(), MGN->getMemoryVT(), DL,
{MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
- MGN->getBasePtr(), Index, MGN->getScale()},
+ MGN->getBasePtr(), Index, ScaleOp},
MGN->getMemOperand(), NewIndexTy, MGN->getExtensionType());
const auto *MSN = cast<MaskedScatterSDNode>(N);
return DAG.getMaskedScatter(
N->getVTList(), MSN->getMemoryVT(), DL,
{MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
- Index, MSN->getScale()},
+ Index, ScaleOp},
MSN->getMemOperand(), NewIndexTy, MSN->isTruncatingStore());
}
case RISCVISD::SRA_VL:
@@ -7997,14 +9070,17 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
SDLoc DL(N);
SDValue VL = N->getOperand(3);
EVT VT = N->getValueType(0);
- ShAmt =
- DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, ShAmt.getOperand(0), VL);
+ ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
+ ShAmt.getOperand(1), VL);
return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt,
N->getOperand(2), N->getOperand(3));
}
break;
}
case ISD::SRA:
+ if (SDValue V = performSRACombine(N, DAG, Subtarget))
+ return V;
+ LLVM_FALLTHROUGH;
case ISD::SRL:
case ISD::SHL: {
SDValue ShAmt = N->getOperand(1);
@@ -8012,17 +9088,63 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
// We don't need the upper 32 bits of a 64-bit element for a shift amount.
SDLoc DL(N);
EVT VT = N->getValueType(0);
- ShAmt =
- DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VT, ShAmt.getOperand(0));
+ ShAmt = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, DAG.getUNDEF(VT),
+ ShAmt.getOperand(1),
+ DAG.getRegister(RISCV::X0, Subtarget.getXLenVT()));
return DAG.getNode(N->getOpcode(), DL, VT, N->getOperand(0), ShAmt);
}
break;
}
+ case RISCVISD::ADD_VL:
+ if (SDValue V = combineADDSUB_VLToVWADDSUB_VL(N, DAG, /*Commute*/ false))
+ return V;
+ return combineADDSUB_VLToVWADDSUB_VL(N, DAG, /*Commute*/ true);
+ case RISCVISD::SUB_VL:
+ return combineADDSUB_VLToVWADDSUB_VL(N, DAG);
+ case RISCVISD::VWADD_W_VL:
+ case RISCVISD::VWADDU_W_VL:
+ case RISCVISD::VWSUB_W_VL:
+ case RISCVISD::VWSUBU_W_VL:
+ return combineVWADD_W_VL_VWSUB_W_VL(N, DAG);
case RISCVISD::MUL_VL:
if (SDValue V = combineMUL_VLToVWMUL_VL(N, DAG, /*Commute*/ false))
return V;
// Mul is commutative.
return combineMUL_VLToVWMUL_VL(N, DAG, /*Commute*/ true);
+ case RISCVISD::VFMADD_VL:
+ case RISCVISD::VFNMADD_VL:
+ case RISCVISD::VFMSUB_VL:
+ case RISCVISD::VFNMSUB_VL: {
+ // Fold FNEG_VL into FMA opcodes.
+ SDValue A = N->getOperand(0);
+ SDValue B = N->getOperand(1);
+ SDValue C = N->getOperand(2);
+ SDValue Mask = N->getOperand(3);
+ SDValue VL = N->getOperand(4);
+
+ auto invertIfNegative = [&Mask, &VL](SDValue &V) {
+ if (V.getOpcode() == RISCVISD::FNEG_VL && V.getOperand(1) == Mask &&
+ V.getOperand(2) == VL) {
+ // Return the negated input.
+ V = V.getOperand(0);
+ return true;
+ }
+
+ return false;
+ };
+
+ bool NegA = invertIfNegative(A);
+ bool NegB = invertIfNegative(B);
+ bool NegC = invertIfNegative(C);
+
+ // If no operands are negated, we're done.
+ if (!NegA && !NegB && !NegC)
+ return SDValue();
+
+ unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC);
+ return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), A, B, C, Mask,
+ VL);
+ }
case ISD::STORE: {
auto *Store = cast<StoreSDNode>(N);
SDValue Val = Store->getValue();
@@ -8035,7 +9157,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
// The memory VT and the element type must match.
if (VecVT.getVectorElementType() == MemVT) {
SDLoc DL(N);
- MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount());
+ MVT MaskVT = getMaskTypeFor(VecVT);
return DAG.getStoreVP(
Store->getChain(), DL, Src, Store->getBasePtr(), Store->getOffset(),
DAG.getConstant(1, DL, MaskVT),
@@ -8047,6 +9169,73 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
break;
}
+ case ISD::SPLAT_VECTOR: {
+ EVT VT = N->getValueType(0);
+ // Only perform this combine on legal MVT types.
+ if (!isTypeLegal(VT))
+ break;
+ if (auto Gather = matchSplatAsGather(N->getOperand(0), VT.getSimpleVT(), N,
+ DAG, Subtarget))
+ return Gather;
+ break;
+ }
+ case RISCVISD::VMV_V_X_VL: {
+ // Tail agnostic VMV.V.X only demands the vector element bitwidth from the
+ // scalar input.
+ unsigned ScalarSize = N->getOperand(1).getValueSizeInBits();
+ unsigned EltWidth = N->getValueType(0).getScalarSizeInBits();
+ if (ScalarSize > EltWidth && N->getOperand(0).isUndef())
+ if (SimplifyDemandedLowBitsHelper(1, EltWidth))
+ return SDValue(N, 0);
+
+ break;
+ }
+ case ISD::INTRINSIC_WO_CHAIN: {
+ unsigned IntNo = N->getConstantOperandVal(0);
+ switch (IntNo) {
+ // By default we do not combine any intrinsic.
+ default:
+ return SDValue();
+ case Intrinsic::riscv_vcpop:
+ case Intrinsic::riscv_vcpop_mask:
+ case Intrinsic::riscv_vfirst:
+ case Intrinsic::riscv_vfirst_mask: {
+ SDValue VL = N->getOperand(2);
+ if (IntNo == Intrinsic::riscv_vcpop_mask ||
+ IntNo == Intrinsic::riscv_vfirst_mask)
+ VL = N->getOperand(3);
+ if (!isNullConstant(VL))
+ return SDValue();
+ // If VL is 0, vcpop -> li 0, vfirst -> li -1.
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ if (IntNo == Intrinsic::riscv_vfirst ||
+ IntNo == Intrinsic::riscv_vfirst_mask)
+ return DAG.getConstant(-1, DL, VT);
+ return DAG.getConstant(0, DL, VT);
+ }
+ }
+ }
+ case ISD::BITCAST: {
+ assert(Subtarget.useRVVForFixedLengthVectors());
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ EVT SrcVT = N0.getValueType();
+ // If this is a bitcast between a MVT::v4i1/v2i1/v1i1 and an illegal integer
+ // type, widen both sides to avoid a trip through memory.
+ if ((SrcVT == MVT::v1i1 || SrcVT == MVT::v2i1 || SrcVT == MVT::v4i1) &&
+ VT.isScalarInteger()) {
+ unsigned NumConcats = 8 / SrcVT.getVectorNumElements();
+ SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT));
+ Ops[0] = N0;
+ SDLoc DL(N);
+ N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i1, Ops);
+ N0 = DAG.getBitcast(MVT::i8, N0);
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, N0);
+ }
+
+ return SDValue();
+ }
}
return SDValue();
@@ -8182,22 +9371,23 @@ bool RISCVTargetLowering::targetShrinkDemandedConstant(
return UseMask(NewMask);
}
-static void computeGREV(APInt &Src, unsigned ShAmt) {
- ShAmt &= Src.getBitWidth() - 1;
- uint64_t x = Src.getZExtValue();
- if (ShAmt & 1)
- x = ((x & 0x5555555555555555LL) << 1) | ((x & 0xAAAAAAAAAAAAAAAALL) >> 1);
- if (ShAmt & 2)
- x = ((x & 0x3333333333333333LL) << 2) | ((x & 0xCCCCCCCCCCCCCCCCLL) >> 2);
- if (ShAmt & 4)
- x = ((x & 0x0F0F0F0F0F0F0F0FLL) << 4) | ((x & 0xF0F0F0F0F0F0F0F0LL) >> 4);
- if (ShAmt & 8)
- x = ((x & 0x00FF00FF00FF00FFLL) << 8) | ((x & 0xFF00FF00FF00FF00LL) >> 8);
- if (ShAmt & 16)
- x = ((x & 0x0000FFFF0000FFFFLL) << 16) | ((x & 0xFFFF0000FFFF0000LL) >> 16);
- if (ShAmt & 32)
- x = ((x & 0x00000000FFFFFFFFLL) << 32) | ((x & 0xFFFFFFFF00000000LL) >> 32);
- Src = x;
+static uint64_t computeGREVOrGORC(uint64_t x, unsigned ShAmt, bool IsGORC) {
+ static const uint64_t GREVMasks[] = {
+ 0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
+ 0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
+
+ for (unsigned Stage = 0; Stage != 6; ++Stage) {
+ unsigned Shift = 1 << Stage;
+ if (ShAmt & Shift) {
+ uint64_t Mask = GREVMasks[Stage];
+ uint64_t Res = ((x & Mask) << Shift) | ((x >> Shift) & Mask);
+ if (IsGORC)
+ Res |= x;
+ x = Res;
+ }
+ }
+
+ return x;
}
void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
@@ -8263,28 +9453,28 @@ void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
break;
}
case RISCVISD::GREV:
- case RISCVISD::GREVW: {
+ case RISCVISD::GORC: {
if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
- if (Opc == RISCVISD::GREVW)
- Known = Known.trunc(32);
- unsigned ShAmt = C->getZExtValue();
- computeGREV(Known.Zero, ShAmt);
- computeGREV(Known.One, ShAmt);
- if (Opc == RISCVISD::GREVW)
- Known = Known.sext(BitWidth);
+ unsigned ShAmt = C->getZExtValue() & (Known.getBitWidth() - 1);
+ bool IsGORC = Op.getOpcode() == RISCVISD::GORC;
+ // To compute zeros, we need to invert the value and invert it back after.
+ Known.Zero =
+ ~computeGREVOrGORC(~Known.Zero.getZExtValue(), ShAmt, IsGORC);
+ Known.One = computeGREVOrGORC(Known.One.getZExtValue(), ShAmt, IsGORC);
}
break;
}
case RISCVISD::READ_VLENB: {
- // If we know the minimum VLen from Zvl extensions, we can use that to
- // determine the trailing zeros of VLENB.
- // FIXME: Limit to 128 bit vectors until we have more testing.
- unsigned MinVLenB = std::min(128U, Subtarget.getMinVLen()) / 8;
- if (MinVLenB > 0)
- Known.Zero.setLowBits(Log2_32(MinVLenB));
- // We assume VLENB is no more than 65536 / 8 bytes.
- Known.Zero.setBitsFrom(14);
+ // We can use the minimum and maximum VLEN values to bound VLENB. We
+ // know VLEN must be a power of two.
+ const unsigned MinVLenB = Subtarget.getRealMinVLen() / 8;
+ const unsigned MaxVLenB = Subtarget.getRealMaxVLen() / 8;
+ assert(MinVLenB > 0 && "READ_VLENB without vector extension enabled?");
+ Known.Zero.setLowBits(Log2_32(MinVLenB));
+ Known.Zero.setBitsFrom(Log2_32(MaxVLenB)+1);
+ if (MaxVLenB == MinVLenB)
+ Known.One.setBit(Log2_32(MinVLenB));
break;
}
case ISD::INTRINSIC_W_CHAIN:
@@ -8381,6 +9571,51 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
return 1;
}
+const Constant *
+RISCVTargetLowering::getTargetConstantFromLoad(LoadSDNode *Ld) const {
+ assert(Ld && "Unexpected null LoadSDNode");
+ if (!ISD::isNormalLoad(Ld))
+ return nullptr;
+
+ SDValue Ptr = Ld->getBasePtr();
+
+ // Only constant pools with no offset are supported.
+ auto GetSupportedConstantPool = [](SDValue Ptr) -> ConstantPoolSDNode * {
+ auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
+ if (!CNode || CNode->isMachineConstantPoolEntry() ||
+ CNode->getOffset() != 0)
+ return nullptr;
+
+ return CNode;
+ };
+
+ // Simple case, LLA.
+ if (Ptr.getOpcode() == RISCVISD::LLA) {
+ auto *CNode = GetSupportedConstantPool(Ptr);
+ if (!CNode || CNode->getTargetFlags() != 0)
+ return nullptr;
+
+ return CNode->getConstVal();
+ }
+
+ // Look for a HI and ADD_LO pair.
+ if (Ptr.getOpcode() != RISCVISD::ADD_LO ||
+ Ptr.getOperand(0).getOpcode() != RISCVISD::HI)
+ return nullptr;
+
+ auto *CNodeLo = GetSupportedConstantPool(Ptr.getOperand(1));
+ auto *CNodeHi = GetSupportedConstantPool(Ptr.getOperand(0).getOperand(0));
+
+ if (!CNodeLo || CNodeLo->getTargetFlags() != RISCVII::MO_LO ||
+ !CNodeHi || CNodeHi->getTargetFlags() != RISCVII::MO_HI)
+ return nullptr;
+
+ if (CNodeLo->getConstVal() != CNodeHi->getConstVal())
+ return nullptr;
+
+ return CNodeLo->getConstVal();
+}
+
static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI,
MachineBasicBlock *BB) {
assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction");
@@ -8559,6 +9794,109 @@ static MachineBasicBlock *emitQuietFCMP(MachineInstr &MI, MachineBasicBlock *BB,
return BB;
}
+static MachineBasicBlock *
+EmitLoweredCascadedSelect(MachineInstr &First, MachineInstr &Second,
+ MachineBasicBlock *ThisMBB,
+ const RISCVSubtarget &Subtarget) {
+ // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5)
+ // Without this, custom-inserter would have generated:
+ //
+ // A
+ // | \
+ // | B
+ // | /
+ // C
+ // | \
+ // | D
+ // | /
+ // E
+ //
+ // A: X = ...; Y = ...
+ // B: empty
+ // C: Z = PHI [X, A], [Y, B]
+ // D: empty
+ // E: PHI [X, C], [Z, D]
+ //
+ // If we lower both Select_FPRX_ in a single step, we can instead generate:
+ //
+ // A
+ // | \
+ // | C
+ // | /|
+ // |/ |
+ // | |
+ // | D
+ // | /
+ // E
+ //
+ // A: X = ...; Y = ...
+ // D: empty
+ // E: PHI [X, A], [X, C], [Y, D]
+
+ const RISCVInstrInfo &TII = *Subtarget.getInstrInfo();
+ const DebugLoc &DL = First.getDebugLoc();
+ const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock();
+ MachineFunction *F = ThisMBB->getParent();
+ MachineBasicBlock *FirstMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *SecondMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineFunction::iterator It = ++ThisMBB->getIterator();
+ F->insert(It, FirstMBB);
+ F->insert(It, SecondMBB);
+ F->insert(It, SinkMBB);
+
+ // Transfer the remainder of ThisMBB and its successor edges to SinkMBB.
+ SinkMBB->splice(SinkMBB->begin(), ThisMBB,
+ std::next(MachineBasicBlock::iterator(First)),
+ ThisMBB->end());
+ SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB);
+
+ // Fallthrough block for ThisMBB.
+ ThisMBB->addSuccessor(FirstMBB);
+ // Fallthrough block for FirstMBB.
+ FirstMBB->addSuccessor(SecondMBB);
+ ThisMBB->addSuccessor(SinkMBB);
+ FirstMBB->addSuccessor(SinkMBB);
+ // This is fallthrough.
+ SecondMBB->addSuccessor(SinkMBB);
+
+ auto FirstCC = static_cast<RISCVCC::CondCode>(First.getOperand(3).getImm());
+ Register FLHS = First.getOperand(1).getReg();
+ Register FRHS = First.getOperand(2).getReg();
+ // Insert appropriate branch.
+ BuildMI(ThisMBB, DL, TII.getBrCond(FirstCC))
+ .addReg(FLHS)
+ .addReg(FRHS)
+ .addMBB(SinkMBB);
+
+ Register SLHS = Second.getOperand(1).getReg();
+ Register SRHS = Second.getOperand(2).getReg();
+ Register Op1Reg4 = First.getOperand(4).getReg();
+ Register Op1Reg5 = First.getOperand(5).getReg();
+
+ auto SecondCC = static_cast<RISCVCC::CondCode>(Second.getOperand(3).getImm());
+ // Insert appropriate branch.
+ BuildMI(FirstMBB, DL, TII.getBrCond(SecondCC))
+ .addReg(SLHS)
+ .addReg(SRHS)
+ .addMBB(SinkMBB);
+
+ Register DestReg = Second.getOperand(0).getReg();
+ Register Op2Reg4 = Second.getOperand(4).getReg();
+ BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII.get(RISCV::PHI), DestReg)
+ .addReg(Op1Reg4)
+ .addMBB(ThisMBB)
+ .addReg(Op2Reg4)
+ .addMBB(FirstMBB)
+ .addReg(Op1Reg5)
+ .addMBB(SecondMBB);
+
+ // Now remove the Select_FPRX_s.
+ First.eraseFromParent();
+ Second.eraseFromParent();
+ return SinkMBB;
+}
+
static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
MachineBasicBlock *BB,
const RISCVSubtarget &Subtarget) {
@@ -8586,6 +9924,10 @@ static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
// previous selects in the sequence.
// These conditions could be further relaxed. See the X86 target for a
// related approach and more information.
+ //
+ // Select_FPRX_ (rs1, rs2, imm, rs4, (Select_FPRX_ rs1, rs2, imm, rs4, rs5))
+ // is checked here and handled by a separate function -
+ // EmitLoweredCascadedSelect.
Register LHS = MI.getOperand(1).getReg();
Register RHS = MI.getOperand(2).getReg();
auto CC = static_cast<RISCVCC::CondCode>(MI.getOperand(3).getImm());
@@ -8595,12 +9937,19 @@ static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
SelectDests.insert(MI.getOperand(0).getReg());
MachineInstr *LastSelectPseudo = &MI;
+ auto Next = next_nodbg(MI.getIterator(), BB->instr_end());
+ if (MI.getOpcode() != RISCV::Select_GPR_Using_CC_GPR && Next != BB->end() &&
+ Next->getOpcode() == MI.getOpcode() &&
+ Next->getOperand(5).getReg() == MI.getOperand(0).getReg() &&
+ Next->getOperand(5).isKill()) {
+ return EmitLoweredCascadedSelect(MI, *Next, BB, Subtarget);
+ }
for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
SequenceMBBI != E; ++SequenceMBBI) {
if (SequenceMBBI->isDebugInstr())
continue;
- else if (isSelectPseudo(*SequenceMBBI)) {
+ if (isSelectPseudo(*SequenceMBBI)) {
if (SequenceMBBI->getOperand(1).getReg() != LHS ||
SequenceMBBI->getOperand(2).getReg() != RHS ||
SequenceMBBI->getOperand(3).getImm() != CC ||
@@ -8831,7 +10180,7 @@ static unsigned allocateRVVReg(MVT ValVT, unsigned ValNo,
// Assign the first mask argument to V0.
// This is an interim calling convention and it may be changed in the
// future.
- if (FirstMaskArgument.hasValue() && ValNo == FirstMaskArgument.getValue())
+ if (FirstMaskArgument && ValNo == *FirstMaskArgument)
return State.AllocateReg(RISCV::V0);
return State.AllocateReg(ArgVRs);
}
@@ -10112,6 +11461,13 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(BuildPairF64)
NODE_NAME_CASE(SplitF64)
NODE_NAME_CASE(TAIL)
+ NODE_NAME_CASE(ADD_LO)
+ NODE_NAME_CASE(HI)
+ NODE_NAME_CASE(LLA)
+ NODE_NAME_CASE(ADD_TPREL)
+ NODE_NAME_CASE(LA)
+ NODE_NAME_CASE(LA_TLS_IE)
+ NODE_NAME_CASE(LA_TLS_GD)
NODE_NAME_CASE(MULHSU)
NODE_NAME_CASE(SLLW)
NODE_NAME_CASE(SRAW)
@@ -10129,6 +11485,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(FSR)
NODE_NAME_CASE(FMV_H_X)
NODE_NAME_CASE(FMV_X_ANYEXTH)
+ NODE_NAME_CASE(FMV_X_SIGNEXTH)
NODE_NAME_CASE(FMV_W_X_RV64)
NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
NODE_NAME_CASE(FCVT_X)
@@ -10157,7 +11514,6 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VMV_X_S)
NODE_NAME_CASE(VMV_S_X_VL)
NODE_NAME_CASE(VFMV_S_F_VL)
- NODE_NAME_CASE(SPLAT_VECTOR_I64)
NODE_NAME_CASE(SPLAT_VECTOR_SPLIT_I64_VL)
NODE_NAME_CASE(READ_VLENB)
NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
@@ -10203,7 +11559,10 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(FNEG_VL)
NODE_NAME_CASE(FABS_VL)
NODE_NAME_CASE(FSQRT_VL)
- NODE_NAME_CASE(FMA_VL)
+ NODE_NAME_CASE(VFMADD_VL)
+ NODE_NAME_CASE(VFNMADD_VL)
+ NODE_NAME_CASE(VFMSUB_VL)
+ NODE_NAME_CASE(VFNMSUB_VL)
NODE_NAME_CASE(FCOPYSIGN_VL)
NODE_NAME_CASE(SMIN_VL)
NODE_NAME_CASE(SMAX_VL)
@@ -10222,7 +11581,14 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VWMUL_VL)
NODE_NAME_CASE(VWMULU_VL)
NODE_NAME_CASE(VWMULSU_VL)
+ NODE_NAME_CASE(VWADD_VL)
NODE_NAME_CASE(VWADDU_VL)
+ NODE_NAME_CASE(VWSUB_VL)
+ NODE_NAME_CASE(VWSUBU_VL)
+ NODE_NAME_CASE(VWADD_W_VL)
+ NODE_NAME_CASE(VWADDU_W_VL)
+ NODE_NAME_CASE(VWSUB_W_VL)
+ NODE_NAME_CASE(VWSUBU_W_VL)
NODE_NAME_CASE(SETCC_VL)
NODE_NAME_CASE(VSELECT_VL)
NODE_NAME_CASE(VP_MERGE_VL)
@@ -10237,8 +11603,6 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VSEXT_VL)
NODE_NAME_CASE(VZEXT_VL)
NODE_NAME_CASE(VCPOP_VL)
- NODE_NAME_CASE(VLE_VL)
- NODE_NAME_CASE(VSE_VL)
NODE_NAME_CASE(READ_CSR)
NODE_NAME_CASE(WRITE_CSR)
NODE_NAME_CASE(SWAP_CSR)
@@ -10459,7 +11823,18 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
}
}
- return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
+ std::pair<Register, const TargetRegisterClass *> Res =
+ TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
+
+ // If we picked one of the Zfinx register classes, remap it to the GPR class.
+ // FIXME: When Zfinx is supported in CodeGen this will need to take the
+ // Subtarget into account.
+ if (Res.second == &RISCV::GPRF16RegClass ||
+ Res.second == &RISCV::GPRF32RegClass ||
+ Res.second == &RISCV::GPRF64RegClass)
+ return std::make_pair(Res.first, &RISCV::GPRRegClass);
+
+ return Res;
}
unsigned
@@ -10681,7 +12056,8 @@ Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
return Result;
}
-bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const {
+bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(EVT IndexVT,
+ EVT DataVT) const {
return false;
}
@@ -10797,7 +12173,7 @@ bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
APInt ImmS = Imm.ashr(Imm.countTrailingZeros());
if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
(1 - ImmS).isPowerOf2())
- return true;
+ return true;
}
}
}
@@ -10805,8 +12181,8 @@ bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
return false;
}
-bool RISCVTargetLowering::isMulAddWithConstProfitable(
- const SDValue &AddNode, const SDValue &ConstNode) const {
+bool RISCVTargetLowering::isMulAddWithConstProfitable(SDValue AddNode,
+ SDValue ConstNode) const {
// Let the DAGCombiner decide for vectors.
EVT VT = AddNode.getValueType();
if (VT.isVector())
@@ -10831,9 +12207,13 @@ bool RISCVTargetLowering::isMulAddWithConstProfitable(
bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
bool *Fast) const {
- if (!VT.isVector())
- return false;
+ if (!VT.isVector()) {
+ if (Fast)
+ *Fast = false;
+ return Subtarget.enableUnalignedScalarMem();
+ }
+ // All vector implementations must support element alignment
EVT ElemVT = VT.getVectorElementType();
if (Alignment >= ElemVT.getStoreSize()) {
if (Fast)
@@ -10847,7 +12227,7 @@ bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
bool RISCVTargetLowering::splitValueIntoRegisterParts(
SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const {
- bool IsABIRegCopy = CC.hasValue();
+ bool IsABIRegCopy = CC.has_value();
EVT ValueVT = Val.getValueType();
if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) {
// Cast the f16 to i16, extend to i32, pad with ones to make a float nan,
@@ -10901,7 +12281,7 @@ bool RISCVTargetLowering::splitValueIntoRegisterParts(
SDValue RISCVTargetLowering::joinRegisterPartsIntoValue(
SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
MVT PartVT, EVT ValueVT, Optional<CallingConv::ID> CC) const {
- bool IsABIRegCopy = CC.hasValue();
+ bool IsABIRegCopy = CC.has_value();
if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) {
SDValue Val = Parts[0];