diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2021-12-25 22:30:44 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2021-12-25 22:30:44 +0000 |
| commit | 77fc4c146f0870ffb09c1afb823ccbe742c5e6ff (patch) | |
| tree | 5c0eb39553003b9c75a901af6bc4ddabd6f2f28c /llvm/lib/Target/RISCV | |
| parent | f65dcba83ce5035ab88a85fe17628b447eb56e1b (diff) | |
Diffstat (limited to 'llvm/lib/Target/RISCV')
27 files changed, 1688 insertions, 1270 deletions
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index f00813f1301a..75592dd4c6f5 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -169,6 +169,7 @@ class RISCVAsmParser : public MCTargetAsmParser { OperandMatchResultTy parseJALOffset(OperandVector &Operands); OperandMatchResultTy parseVTypeI(OperandVector &Operands); OperandMatchResultTy parseMaskReg(OperandVector &Operands); + OperandMatchResultTy parseInsnDirectiveOpcode(OperandVector &Operands); bool parseOperand(OperandVector &Operands, StringRef Mnemonic); @@ -827,6 +828,7 @@ public: Op->SysReg.Length = Str.size(); Op->SysReg.Encoding = Encoding; Op->StartLoc = S; + Op->EndLoc = S; Op->IsRV64 = IsRV64; return Op; } @@ -836,6 +838,7 @@ public: auto Op = std::make_unique<RISCVOperand>(KindTy::VType); Op->VType.Val = VTypeI; Op->StartLoc = S; + Op->EndLoc = S; Op->IsRV64 = IsRV64; return Op; } @@ -1291,7 +1294,7 @@ OperandMatchResultTy RISCVAsmParser::parseRegister(OperandVector &Operands, if (HadParens) Operands.push_back(RISCVOperand::createToken("(", FirstS, isRV64())); SMLoc S = getLoc(); - SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1); + SMLoc E = SMLoc::getFromPointer(S.getPointer() + Name.size()); getLexer().Lex(); Operands.push_back(RISCVOperand::createReg(RegNo, S, E, isRV64())); } @@ -1305,6 +1308,67 @@ OperandMatchResultTy RISCVAsmParser::parseRegister(OperandVector &Operands, } OperandMatchResultTy +RISCVAsmParser::parseInsnDirectiveOpcode(OperandVector &Operands) { + SMLoc S = getLoc(); + SMLoc E; + const MCExpr *Res; + + switch (getLexer().getKind()) { + default: + return MatchOperand_NoMatch; + case AsmToken::LParen: + case AsmToken::Minus: + case AsmToken::Plus: + case AsmToken::Exclaim: + case AsmToken::Tilde: + case AsmToken::Integer: + case AsmToken::String: { + if (getParser().parseExpression(Res, E)) + return MatchOperand_ParseFail; + + auto *CE = dyn_cast<MCConstantExpr>(Res); + if (CE) { + int64_t Imm = CE->getValue(); + if (isUInt<7>(Imm)) { + Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64())); + return MatchOperand_Success; + } + } + + Twine Msg = "immediate must be an integer in the range"; + Error(S, Msg + " [" + Twine(0) + ", " + Twine((1 << 7) - 1) + "]"); + return MatchOperand_ParseFail; + } + case AsmToken::Identifier: { + StringRef Identifier; + if (getParser().parseIdentifier(Identifier)) + return MatchOperand_ParseFail; + + auto Opcode = RISCVInsnOpcode::lookupRISCVOpcodeByName(Identifier); + if (Opcode) { + Res = MCConstantExpr::create(Opcode->Value, getContext()); + E = SMLoc::getFromPointer(S.getPointer() + Identifier.size()); + Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64())); + return MatchOperand_Success; + } + + Twine Msg = "operand must be a valid opcode name or an " + "integer in the range"; + Error(S, Msg + " [" + Twine(0) + ", " + Twine((1 << 7) - 1) + "]"); + return MatchOperand_ParseFail; + } + case AsmToken::Percent: { + // Discard operand with modifier. + Twine Msg = "immediate must be an integer in the range"; + Error(S, Msg + " [" + Twine(0) + ", " + Twine((1 << 7) - 1) + "]"); + return MatchOperand_ParseFail; + } + } + + return MatchOperand_NoMatch; +} + +OperandMatchResultTy RISCVAsmParser::parseCSRSystemRegister(OperandVector &Operands) { SMLoc S = getLoc(); const MCExpr *Res; @@ -1381,7 +1445,7 @@ RISCVAsmParser::parseCSRSystemRegister(OperandVector &Operands) { OperandMatchResultTy RISCVAsmParser::parseImmediate(OperandVector &Operands) { SMLoc S = getLoc(); - SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1); + SMLoc E; const MCExpr *Res; switch (getLexer().getKind()) { @@ -1396,7 +1460,7 @@ OperandMatchResultTy RISCVAsmParser::parseImmediate(OperandVector &Operands) { case AsmToken::Integer: case AsmToken::String: case AsmToken::Identifier: - if (getParser().parseExpression(Res)) + if (getParser().parseExpression(Res, E)) return MatchOperand_ParseFail; break; case AsmToken::Percent: @@ -1410,7 +1474,7 @@ OperandMatchResultTy RISCVAsmParser::parseImmediate(OperandVector &Operands) { OperandMatchResultTy RISCVAsmParser::parseOperandWithModifier(OperandVector &Operands) { SMLoc S = getLoc(); - SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1); + SMLoc E; if (getLexer().getKind() != AsmToken::Percent) { Error(getLoc(), "expected '%' for operand modifier"); @@ -1449,7 +1513,6 @@ RISCVAsmParser::parseOperandWithModifier(OperandVector &Operands) { OperandMatchResultTy RISCVAsmParser::parseBareSymbol(OperandVector &Operands) { SMLoc S = getLoc(); - SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1); const MCExpr *Res; if (getLexer().getKind() != AsmToken::Identifier) @@ -1461,6 +1524,8 @@ OperandMatchResultTy RISCVAsmParser::parseBareSymbol(OperandVector &Operands) { if (getParser().parseIdentifier(Identifier)) return MatchOperand_ParseFail; + SMLoc E = SMLoc::getFromPointer(S.getPointer() + Identifier.size()); + if (Identifier.consume_back("@plt")) { Error(getLoc(), "'@plt' operand not valid for instruction"); return MatchOperand_ParseFail; @@ -1492,7 +1557,7 @@ OperandMatchResultTy RISCVAsmParser::parseBareSymbol(OperandVector &Operands) { } const MCExpr *Expr; - if (getParser().parseExpression(Expr)) + if (getParser().parseExpression(Expr, E)) return MatchOperand_ParseFail; Res = MCBinaryExpr::create(Opcode, Res, Expr, getContext()); Operands.push_back(RISCVOperand::createImm(Res, S, E, isRV64())); @@ -1501,7 +1566,6 @@ OperandMatchResultTy RISCVAsmParser::parseBareSymbol(OperandVector &Operands) { OperandMatchResultTy RISCVAsmParser::parseCallSymbol(OperandVector &Operands) { SMLoc S = getLoc(); - SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1); const MCExpr *Res; if (getLexer().getKind() != AsmToken::Identifier) @@ -1515,6 +1579,8 @@ OperandMatchResultTy RISCVAsmParser::parseCallSymbol(OperandVector &Operands) { if (getParser().parseIdentifier(Identifier)) return MatchOperand_ParseFail; + SMLoc E = SMLoc::getFromPointer(S.getPointer() + Identifier.size()); + RISCVMCExpr::VariantKind Kind = RISCVMCExpr::VK_RISCV_CALL; if (Identifier.consume_back("@plt")) Kind = RISCVMCExpr::VK_RISCV_CALL_PLT; @@ -1529,10 +1595,10 @@ OperandMatchResultTy RISCVAsmParser::parseCallSymbol(OperandVector &Operands) { OperandMatchResultTy RISCVAsmParser::parsePseudoJumpSymbol(OperandVector &Operands) { SMLoc S = getLoc(); - SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1); + SMLoc E; const MCExpr *Res; - if (getParser().parseExpression(Res)) + if (getParser().parseExpression(Res, E)) return MatchOperand_ParseFail; if (Res->getKind() != MCExpr::ExprKind::SymbolRef || @@ -1662,7 +1728,7 @@ OperandMatchResultTy RISCVAsmParser::parseMaskReg(OperandVector &Operands) { if (RegNo != RISCV::V0) return MatchOperand_NoMatch; SMLoc S = getLoc(); - SMLoc E = SMLoc::getFromPointer(S.getPointer() - 1); + SMLoc E = SMLoc::getFromPointer(S.getPointer() + Name.size()); getLexer().Lex(); Operands.push_back(RISCVOperand::createReg(RegNo, S, E, isRV64())); } @@ -2062,7 +2128,11 @@ bool RISCVAsmParser::parseDirectiveAttribute() { "unexpected token in '.attribute' directive")) return true; - if (Tag == RISCVAttrs::ARCH) { + if (IsIntegerValue) + getTargetStreamer().emitAttribute(Tag, IntegerValue); + else if (Tag != RISCVAttrs::ARCH) + getTargetStreamer().emitTextAttribute(Tag, StringValue); + else { StringRef Arch = StringValue; for (auto Feature : RISCVFeatureKV) if (llvm::RISCVISAInfo::isSupportedExtensionFeature(Feature.Key)) @@ -2070,7 +2140,7 @@ bool RISCVAsmParser::parseDirectiveAttribute() { auto ParseResult = llvm::RISCVISAInfo::parseArchString( StringValue, /*EnableExperimentalExtension=*/true, - /*ExperimentalExtensionVersionCheck=*/false); + /*ExperimentalExtensionVersionCheck=*/true); if (!ParseResult) { std::string Buffer; raw_string_ostream OutputErrMsg(Buffer); @@ -2093,35 +2163,9 @@ bool RISCVAsmParser::parseDirectiveAttribute() { setFeatureBits(RISCV::Feature64Bit, "64bit"); else return Error(ValueExprLoc, "bad arch string " + Arch); - } - if (IsIntegerValue) - getTargetStreamer().emitAttribute(Tag, IntegerValue); - else { - if (Tag != RISCVAttrs::ARCH) { - getTargetStreamer().emitTextAttribute(Tag, StringValue); - } else { - std::vector<std::string> FeatureVector; - RISCVFeatures::toFeatureVector(FeatureVector, getSTI().getFeatureBits()); - - // Parse that by RISCVISAInfo-> - unsigned XLen = getFeatureBits(RISCV::Feature64Bit) ? 64 : 32; - auto ParseResult = llvm::RISCVISAInfo::parseFeatures(XLen, FeatureVector); - if (!ParseResult) { - std::string Buffer; - raw_string_ostream OutputErrMsg(Buffer); - handleAllErrors(ParseResult.takeError(), - [&](llvm::StringError &ErrMsg) { - OutputErrMsg << ErrMsg.getMessage(); - }); - - return Error(ValueExprLoc, OutputErrMsg.str()); - } - auto &ISAInfo = *ParseResult; - - // Then emit the arch string. - getTargetStreamer().emitTextAttribute(Tag, ISAInfo->toString()); - } + // Then emit the arch string. + getTargetStreamer().emitTextAttribute(Tag, ISAInfo->toString()); } return false; diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp index 0aba18b20f0d..144e761f002d 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp @@ -27,6 +27,11 @@ namespace RISCVSysReg { #include "RISCVGenSearchableTables.inc" } // namespace RISCVSysReg +namespace RISCVInsnOpcode { +#define GET_RISCVOpcodesList_IMPL +#include "RISCVGenSearchableTables.inc" +} // namespace RISCVInsnOpcode + namespace RISCVABI { ABI computeTargetABI(const Triple &TT, FeatureBitset FeatureBits, StringRef ABIName) { diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h index d8f4403c824f..9cfd36745f46 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -299,6 +299,16 @@ struct SysReg { #include "RISCVGenSearchableTables.inc" } // end namespace RISCVSysReg +namespace RISCVInsnOpcode { +struct RISCVOpcode { + const char *Name; + unsigned Value; +}; + +#define GET_RISCVOpcodesList_DECL +#include "RISCVGenSearchableTables.inc" +} // end namespace RISCVInsnOpcode + namespace RISCVABI { enum ABI { diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp index f1c3810f4ee5..89a7d54f60f8 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp @@ -171,9 +171,9 @@ void RISCVInstPrinter::printVTypeI(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Imm = MI->getOperand(OpNo).getImm(); // Print the raw immediate for reserved values: vlmul[2:0]=4, vsew[2:0]=0b1xx, - // or non-zero bits 8/9/10. + // or non-zero in bits 8 and above. if (RISCVVType::getVLMUL(Imm) == RISCVII::VLMUL::LMUL_RESERVED || - RISCVVType::getSEW(Imm) > 64 || (Imm & 0x700) != 0) { + RISCVVType::getSEW(Imm) > 64 || (Imm >> 8) != 0) { O << Imm; return; } diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td index 772a4f8ecd53..6aa915c01929 100644 --- a/llvm/lib/Target/RISCV/RISCV.td +++ b/llvm/lib/Target/RISCV/RISCV.td @@ -168,14 +168,6 @@ def HasStdExtZvlsseg : Predicate<"Subtarget->hasStdExtZvlsseg()">, AssemblerPredicate<(all_of FeatureStdExtZvlsseg), "'Zvlsseg' (Vector segment load/store instructions)">; -def FeatureStdExtZvamo - : SubtargetFeature<"experimental-zvamo", "HasStdExtZvamo", "true", - "'Zvamo' (Vector AMO Operations)", - [FeatureStdExtV]>; -def HasStdExtZvamo : Predicate<"Subtarget->hasStdExtZvamo()">, - AssemblerPredicate<(all_of FeatureStdExtZvamo), - "'Zvamo' (Vector AMO Operations)">; - def Feature64Bit : SubtargetFeature<"64bit", "HasRV64", "true", "Implements RV64">; def IsRV64 : Predicate<"Subtarget->is64Bit()">, diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 66a34d73dd37..b24eb5f7bbf4 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -718,6 +718,71 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { break; } + case ISD::MUL: { + // Special case for calculating (mul (and X, C2), C1) where the full product + // fits in XLen bits. We can shift X left by the number of leading zeros in + // C2 and shift C1 left by XLen-lzcnt(C2). This will ensure the final + // product has XLen trailing zeros, putting it in the output of MULHU. This + // can avoid materializing a constant in a register for C2. + + // RHS should be a constant. + auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); + if (!N1C || !N1C->hasOneUse()) + break; + + // LHS should be an AND with constant. + SDValue N0 = Node->getOperand(0); + if (N0.getOpcode() != ISD::AND || !isa<ConstantSDNode>(N0.getOperand(1))) + break; + + uint64_t C2 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); + + // Constant should be a mask. + if (!isMask_64(C2)) + break; + + // This should be the only use of the AND unless we will use + // (SRLI (SLLI X, 32), 32). We don't use a shift pair for other AND + // constants. + if (!N0.hasOneUse() && C2 != UINT64_C(0xFFFFFFFF)) + break; + + // If this can be an ANDI, ZEXT.H or ZEXT.W we don't need to do this + // optimization. + if (isInt<12>(C2) || + (C2 == UINT64_C(0xFFFF) && + (Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbp())) || + (C2 == UINT64_C(0xFFFFFFFF) && Subtarget->hasStdExtZba())) + break; + + // We need to shift left the AND input and C1 by a total of XLen bits. + + // How far left do we need to shift the AND input? + unsigned XLen = Subtarget->getXLen(); + unsigned LeadingZeros = XLen - (64 - countLeadingZeros(C2)); + + // The constant gets shifted by the remaining amount unless that would + // shift bits out. + uint64_t C1 = N1C->getZExtValue(); + unsigned ConstantShift = XLen - LeadingZeros; + if (ConstantShift > (XLen - (64 - countLeadingZeros(C1)))) + break; + + uint64_t ShiftedC1 = C1 << ConstantShift; + // If this RV32, we need to sign extend the constant. + if (XLen == 32) + ShiftedC1 = SignExtend64(ShiftedC1, 32); + + // Create (mulhu (slli X, lzcnt(C2)), C1 << (XLen - lzcnt(C2))). + SDNode *Imm = selectImm(CurDAG, DL, ShiftedC1, *Subtarget); + SDNode *SLLI = + CurDAG->getMachineNode(RISCV::SLLI, DL, VT, N0.getOperand(0), + CurDAG->getTargetConstant(LeadingZeros, DL, VT)); + SDNode *MULHU = CurDAG->getMachineNode(RISCV::MULHU, DL, VT, + SDValue(SLLI, 0), SDValue(Imm, 0)); + ReplaceNode(Node, MULHU); + return; + } case ISD::INTRINSIC_WO_CHAIN: { unsigned IntNo = Node->getConstantOperandVal(0); switch (IntNo) { @@ -1450,6 +1515,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { ReplaceNode(Node, Extract.getNode()); return; } + case ISD::SPLAT_VECTOR: case RISCVISD::VMV_V_X_VL: case RISCVISD::VFMV_V_F_VL: { // Try to match splat of a scalar load to a strided load with stride of x0. @@ -1466,7 +1532,10 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { break; SDValue VL; - selectVLOp(Node->getOperand(1), VL); + if (Node->getOpcode() == ISD::SPLAT_VECTOR) + VL = CurDAG->getTargetConstant(RISCV::VLMaxSentinel, DL, XLenVT); + else + selectVLOp(Node->getOperand(1), VL); unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); SDValue SEW = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index f3331571fc55..4f5512e6fb37 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -330,6 +330,14 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::LLRINT, MVT::f16, Legal); setOperationAction(ISD::LROUND, MVT::f16, Legal); setOperationAction(ISD::LLROUND, MVT::f16, Legal); + setOperationAction(ISD::STRICT_FADD, MVT::f16, Legal); + setOperationAction(ISD::STRICT_FMA, MVT::f16, Legal); + setOperationAction(ISD::STRICT_FSUB, MVT::f16, Legal); + setOperationAction(ISD::STRICT_FMUL, MVT::f16, Legal); + setOperationAction(ISD::STRICT_FDIV, MVT::f16, Legal); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FSQRT, MVT::f16, Legal); for (auto CC : FPCCToExpand) setCondCodeAction(CC, MVT::f16, Expand); setOperationAction(ISD::SELECT_CC, MVT::f16, Expand); @@ -367,6 +375,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::LLRINT, MVT::f32, Legal); setOperationAction(ISD::LROUND, MVT::f32, Legal); setOperationAction(ISD::LLROUND, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FMA, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal); for (auto CC : FPCCToExpand) setCondCodeAction(CC, MVT::f32, Expand); setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); @@ -388,6 +402,14 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::LLRINT, MVT::f64, Legal); setOperationAction(ISD::LROUND, MVT::f64, Legal); setOperationAction(ISD::LLROUND, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FMA, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal); for (auto CC : FPCCToExpand) setCondCodeAction(CC, MVT::f64, Expand); setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); @@ -412,6 +434,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FP_TO_UINT_SAT, XLenVT, Custom); setOperationAction(ISD::FP_TO_SINT_SAT, XLenVT, Custom); + setOperationAction(ISD::STRICT_FP_TO_UINT, XLenVT, Legal); + setOperationAction(ISD::STRICT_FP_TO_SINT, XLenVT, Legal); + setOperationAction(ISD::STRICT_UINT_TO_FP, XLenVT, Legal); + setOperationAction(ISD::STRICT_SINT_TO_FP, XLenVT, Legal); + setOperationAction(ISD::FLT_ROUNDS_, XLenVT, Custom); setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom); } @@ -471,12 +498,13 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::VP_XOR, ISD::VP_ASHR, ISD::VP_LSHR, ISD::VP_SHL, ISD::VP_REDUCE_ADD, ISD::VP_REDUCE_AND, ISD::VP_REDUCE_OR, ISD::VP_REDUCE_XOR, ISD::VP_REDUCE_SMAX, - ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN}; + ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN, + ISD::VP_SELECT}; static const unsigned FloatingPointVPOps[] = { ISD::VP_FADD, ISD::VP_FSUB, ISD::VP_FMUL, ISD::VP_FDIV, ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD, - ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX}; + ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SELECT}; if (!Subtarget.is64Bit()) { // We must custom-lower certain vXi64 operations on RV32 due to the vector @@ -519,6 +547,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::SELECT_CC, VT, Expand); setOperationAction(ISD::VSELECT, VT, Expand); + setOperationAction(ISD::VP_AND, VT, Custom); + setOperationAction(ISD::VP_OR, VT, Custom); + setOperationAction(ISD::VP_XOR, VT, Custom); + setOperationAction(ISD::VECREDUCE_AND, VT, Custom); setOperationAction(ISD::VECREDUCE_OR, VT, Custom); setOperationAction(ISD::VECREDUCE_XOR, VT, Custom); @@ -803,6 +835,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, // Operations below are different for between masks and other vectors. if (VT.getVectorElementType() == MVT::i1) { + setOperationAction(ISD::VP_AND, VT, Custom); + setOperationAction(ISD::VP_OR, VT, Custom); + setOperationAction(ISD::VP_XOR, VT, Custom); setOperationAction(ISD::AND, VT, Custom); setOperationAction(ISD::OR, VT, Custom); setOperationAction(ISD::XOR, VT, Custom); @@ -1147,7 +1182,7 @@ bool RISCVTargetLowering::isCheapToSpeculateCtlz() const { return Subtarget.hasStdExtZbb(); } -bool RISCVTargetLowering::hasAndNot(SDValue Y) const { +bool RISCVTargetLowering::hasAndNotCompare(SDValue Y) const { EVT VT = Y.getValueType(); // FIXME: Support vectors once we have tests. @@ -1235,7 +1270,8 @@ bool RISCVTargetLowering::shouldSinkOperands( bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const { - if (VT == MVT::f16 && !Subtarget.hasStdExtZfhmin()) + // FIXME: Change to Zfhmin once f16 becomes a legal type with Zfhmin. + if (VT == MVT::f16 && !Subtarget.hasStdExtZfh()) return false; if (VT == MVT::f32 && !Subtarget.hasStdExtF()) return false; @@ -1255,9 +1291,10 @@ bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const { MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const { - // Use f32 to pass f16 if it is legal and Zfhmin/Zfh is not enabled. + // Use f32 to pass f16 if it is legal and Zfh is not enabled. // We might still end up using a GPR but that will be decided based on ABI. - if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfhmin()) + // FIXME: Change to Zfhmin once f16 becomes a legal type with Zfhmin. + if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh()) return MVT::f32; return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); @@ -1266,9 +1303,10 @@ MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const { - // Use f32 to pass f16 if it is legal and Zfhmin/Zfh is not enabled. + // Use f32 to pass f16 if it is legal and Zfh is not enabled. // We might still end up using a GPR but that will be decided based on ABI. - if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfhmin()) + // FIXME: Change to Zfhmin once f16 becomes a legal type with Zfhmin. + if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh()) return 1; return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); @@ -1959,29 +1997,37 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, int64_t StepNumerator = SimpleVID->StepNumerator; unsigned StepDenominator = SimpleVID->StepDenominator; int64_t Addend = SimpleVID->Addend; + + assert(StepNumerator != 0 && "Invalid step"); + bool Negate = false; + int64_t SplatStepVal = StepNumerator; + unsigned StepOpcode = ISD::MUL; + if (StepNumerator != 1) { + if (isPowerOf2_64(std::abs(StepNumerator))) { + Negate = StepNumerator < 0; + StepOpcode = ISD::SHL; + SplatStepVal = Log2_64(std::abs(StepNumerator)); + } + } + // Only emit VIDs with suitably-small steps/addends. We use imm5 is a // threshold since it's the immediate value many RVV instructions accept. - if (isInt<5>(StepNumerator) && isPowerOf2_32(StepDenominator) && - isInt<5>(Addend)) { + // There is no vmul.vi instruction so ensure multiply constant can fit in + // a single addi instruction. + if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) || + (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) && + isPowerOf2_32(StepDenominator) && isInt<5>(Addend)) { SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL); // Convert right out of the scalable type so we can use standard ISD // nodes for the rest of the computation. If we used scalable types with // these, we'd lose the fixed-length vector info and generate worse // vsetvli code. VID = convertFromScalableVector(VT, VID, DAG, Subtarget); - assert(StepNumerator != 0 && "Invalid step"); - bool Negate = false; - if (StepNumerator != 1) { - int64_t SplatStepVal = StepNumerator; - unsigned Opcode = ISD::MUL; - if (isPowerOf2_64(std::abs(StepNumerator))) { - Negate = StepNumerator < 0; - Opcode = ISD::SHL; - SplatStepVal = Log2_64(std::abs(StepNumerator)); - } + if ((StepOpcode == ISD::MUL && SplatStepVal != 1) || + (StepOpcode == ISD::SHL && SplatStepVal != 0)) { SDValue SplatStep = DAG.getSplatVector( VT, DL, DAG.getConstant(SplatStepVal, DL, XLenVT)); - VID = DAG.getNode(Opcode, DL, VT, VID, SplatStep); + VID = DAG.getNode(StepOpcode, DL, VT, VID, SplatStep); } if (StepDenominator != 1) { SDValue SplatStep = DAG.getSplatVector( @@ -3133,6 +3179,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, return lowerGET_ROUNDING(Op, DAG); case ISD::SET_ROUNDING: return lowerSET_ROUNDING(Op, DAG); + case ISD::VP_SELECT: + return lowerVPOp(Op, DAG, RISCVISD::VSELECT_VL); case ISD::VP_ADD: return lowerVPOp(Op, DAG, RISCVISD::ADD_VL); case ISD::VP_SUB: @@ -3148,11 +3196,11 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, case ISD::VP_UREM: return lowerVPOp(Op, DAG, RISCVISD::UREM_VL); case ISD::VP_AND: - return lowerVPOp(Op, DAG, RISCVISD::AND_VL); + return lowerLogicVPOp(Op, DAG, RISCVISD::VMAND_VL, RISCVISD::AND_VL); case ISD::VP_OR: - return lowerVPOp(Op, DAG, RISCVISD::OR_VL); + return lowerLogicVPOp(Op, DAG, RISCVISD::VMOR_VL, RISCVISD::OR_VL); case ISD::VP_XOR: - return lowerVPOp(Op, DAG, RISCVISD::XOR_VL); + return lowerLogicVPOp(Op, DAG, RISCVISD::VMXOR_VL, RISCVISD::XOR_VL); case ISD::VP_ASHR: return lowerVPOp(Op, DAG, RISCVISD::SRA_VL); case ISD::VP_LSHR: @@ -4469,19 +4517,19 @@ SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op, } MVT M1VT = getLMUL1VT(ContainerVT); + MVT XLenVT = Subtarget.getXLenVT(); SDValue Mask, VL; std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); - // FIXME: This is a VLMAX splat which might be too large and can prevent - // vsetvli removal. SDValue NeutralElem = DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags()); - SDValue IdentitySplat = DAG.getSplatVector(M1VT, DL, NeutralElem); + SDValue IdentitySplat = lowerScalarSplat( + NeutralElem, DAG.getConstant(1, DL, XLenVT), M1VT, DL, DAG, Subtarget); SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, DAG.getUNDEF(M1VT), Vec, IdentitySplat, Mask, VL); SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction, - DAG.getConstant(0, DL, Subtarget.getXLenVT())); + DAG.getConstant(0, DL, XLenVT)); return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType()); } @@ -4497,9 +4545,12 @@ getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT) { switch (Opcode) { default: llvm_unreachable("Unhandled reduction"); - case ISD::VECREDUCE_FADD: - return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), - DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags)); + case ISD::VECREDUCE_FADD: { + // Use positive zero if we can. It is cheaper to materialize. + SDValue Zero = + DAG.getConstantFP(Flags.hasNoSignedZeros() ? 0.0 : -0.0, DL, EltVT); + return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0), Zero); + } case ISD::VECREDUCE_SEQ_FADD: return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1), Op.getOperand(0)); @@ -4530,17 +4581,17 @@ SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op, } MVT M1VT = getLMUL1VT(VectorVal.getSimpleValueType()); + MVT XLenVT = Subtarget.getXLenVT(); SDValue Mask, VL; std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget); - // FIXME: This is a VLMAX splat which might be too large and can prevent - // vsetvli removal. - SDValue ScalarSplat = DAG.getSplatVector(M1VT, DL, ScalarVal); + SDValue ScalarSplat = lowerScalarSplat( + ScalarVal, DAG.getConstant(1, DL, XLenVT), M1VT, DL, DAG, Subtarget); SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, DAG.getUNDEF(M1VT), VectorVal, ScalarSplat, Mask, VL); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction, - DAG.getConstant(0, DL, Subtarget.getXLenVT())); + DAG.getConstant(0, DL, XLenVT)); } static unsigned getRVVVPReductionOp(unsigned ISDOpcode) { @@ -4602,13 +4653,13 @@ SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op, MVT XLenVT = Subtarget.getXLenVT(); MVT ResVT = !VecVT.isInteger() || VecEltVT.bitsGE(XLenVT) ? VecEltVT : XLenVT; - // FIXME: This is a VLMAX splat which might be too large and can prevent - // vsetvli removal. - SDValue StartSplat = DAG.getSplatVector(M1VT, DL, Op.getOperand(0)); + SDValue StartSplat = + lowerScalarSplat(Op.getOperand(0), DAG.getConstant(1, DL, XLenVT), M1VT, + DL, DAG, Subtarget); SDValue Reduction = DAG.getNode(RVVOpcode, DL, M1VT, StartSplat, Vec, StartSplat, Mask, VL); SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Reduction, - DAG.getConstant(0, DL, Subtarget.getXLenVT())); + DAG.getConstant(0, DL, XLenVT)); if (!VecVT.isInteger()) return Elt0; return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType()); @@ -5365,6 +5416,33 @@ SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG, return convertFromScalableVector(VT, VPOp, DAG, Subtarget); } +SDValue RISCVTargetLowering::lowerLogicVPOp(SDValue Op, SelectionDAG &DAG, + unsigned MaskOpc, + unsigned VecOpc) const { + MVT VT = Op.getSimpleValueType(); + if (VT.getVectorElementType() != MVT::i1) + return lowerVPOp(Op, DAG, VecOpc); + + // It is safe to drop mask parameter as masked-off elements are undef. + SDValue Op1 = Op->getOperand(0); + SDValue Op2 = Op->getOperand(1); + SDValue VL = Op->getOperand(3); + + MVT ContainerVT = VT; + const bool IsFixed = VT.isFixedLengthVector(); + if (IsFixed) { + ContainerVT = getContainerForFixedLengthVector(VT); + Op1 = convertToScalableVector(ContainerVT, Op1, DAG, Subtarget); + Op2 = convertToScalableVector(ContainerVT, Op2, DAG, Subtarget); + } + + SDLoc DL(Op); + SDValue Val = DAG.getNode(MaskOpc, DL, ContainerVT, Op1, Op2, VL); + if (!IsFixed) + return Val; + return convertFromScalableVector(VT, Val, DAG, Subtarget); +} + // Custom lower MGATHER/VP_GATHER to a legalized form for RVV. It will then be // matched to a RVV indexed load. The RVV indexed load instructions only // support the "unsigned unscaled" addressing mode; indices are implicitly @@ -5695,11 +5773,17 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0); if (getTypeAction(*DAG.getContext(), Op0.getValueType()) != TargetLowering::TypeSoftenFloat) { - // FIXME: Support strict FP. - if (IsStrict) - return; if (!isTypeLegal(Op0.getValueType())) return; + if (IsStrict) { + unsigned Opc = IsSigned ? RISCVISD::STRICT_FCVT_W_RTZ_RV64 + : RISCVISD::STRICT_FCVT_WU_RTZ_RV64; + SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other); + SDValue Res = DAG.getNode(Opc, DL, VTs, N->getOperand(0), Op0); + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); + Results.push_back(Res.getValue(1)); + return; + } unsigned Opc = IsSigned ? RISCVISD::FCVT_W_RTZ_RV64 : RISCVISD::FCVT_WU_RTZ_RV64; SDValue Res = DAG.getNode(Opc, DL, MVT::i64, Op0); @@ -7026,7 +7110,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, if (SimplifyDemandedLowBitsHelper(1, Log2_32(BitWidth))) return SDValue(N, 0); - return combineGREVI_GORCI(N, DCI.DAG); + return combineGREVI_GORCI(N, DAG); } case RISCVISD::GREVW: case RISCVISD::GORCW: { @@ -7035,7 +7119,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, SimplifyDemandedLowBitsHelper(1, 5)) return SDValue(N, 0); - return combineGREVI_GORCI(N, DCI.DAG); + return combineGREVI_GORCI(N, DAG); } case RISCVISD::SHFL: case RISCVISD::UNSHFL: { @@ -7120,11 +7204,23 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, // Fold (zero_extend (fp_to_uint X)) to prevent forming fcvt+zexti32 during // type legalization. This is safe because fp_to_uint produces poison if // it overflows. - if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit() && - N->getOperand(0).getOpcode() == ISD::FP_TO_UINT && - isTypeLegal(N->getOperand(0).getOperand(0).getValueType())) - return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64, - N->getOperand(0).getOperand(0)); + if (N->getValueType(0) == MVT::i64 && Subtarget.is64Bit()) { + SDValue Src = N->getOperand(0); + if (Src.getOpcode() == ISD::FP_TO_UINT && + isTypeLegal(Src.getOperand(0).getValueType())) + return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), MVT::i64, + Src.getOperand(0)); + if (Src.getOpcode() == ISD::STRICT_FP_TO_UINT && Src.hasOneUse() && + isTypeLegal(Src.getOperand(1).getValueType())) { + SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other); + SDValue Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, SDLoc(N), VTs, + Src.getOperand(0), Src.getOperand(1)); + DCI.CombineTo(N, Res); + DAG.ReplaceAllUsesOfValueWith(Src.getValue(1), Res.getValue(1)); + DCI.recursivelyDeleteUnusedNodes(Src.getNode()); + return SDValue(N, 0); // Return N so it doesn't get rechecked. + } + } return SDValue(); case RISCVISD::SELECT_CC: { // Transform @@ -7685,6 +7781,8 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( case RISCVISD::BDECOMPRESSW: case RISCVISD::FCVT_W_RTZ_RV64: case RISCVISD::FCVT_WU_RTZ_RV64: + case RISCVISD::STRICT_FCVT_W_RTZ_RV64: + case RISCVISD::STRICT_FCVT_WU_RTZ_RV64: // TODO: As the result is sign-extended, this is conservatively correct. A // more precise answer could be calculated for SRAW depending on known // bits in the shift amount. @@ -8004,6 +8102,22 @@ RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, } } +void RISCVTargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI, + SDNode *Node) const { + // Add FRM dependency to any instructions with dynamic rounding mode. + unsigned Opc = MI.getOpcode(); + auto Idx = RISCV::getNamedOperandIdx(Opc, RISCV::OpName::frm); + if (Idx < 0) + return; + if (MI.getOperand(Idx).getImm() != RISCVFPRndMode::DYN) + return; + // If the instruction already reads FRM, don't add another read. + if (MI.readsRegister(RISCV::FRM)) + return; + MI.addOperand( + MachineOperand::CreateReg(RISCV::FRM, /*isDef*/ false, /*isImp*/ true)); +} + // Calling Convention Implementation. // The expectations for frontend ABI lowering vary from target to target. // Ideally, an LLVM frontend would be able to avoid worrying about many ABI @@ -9400,6 +9514,8 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(FCVT_XU_RTZ) NODE_NAME_CASE(FCVT_W_RTZ_RV64) NODE_NAME_CASE(FCVT_WU_RTZ_RV64) + NODE_NAME_CASE(STRICT_FCVT_W_RTZ_RV64) + NODE_NAME_CASE(STRICT_FCVT_WU_RTZ_RV64) NODE_NAME_CASE(READ_CYCLE_WIDE) NODE_NAME_CASE(GREV) NODE_NAME_CASE(GREVW) @@ -9541,6 +9657,9 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, if (Constraint.size() == 1) { switch (Constraint[0]) { case 'r': + // TODO: Support fixed vectors up to XLen for P extension? + if (VT.isVector()) + break; return std::make_pair(0U, &RISCV::GPRRegClass); case 'f': if (Subtarget.hasStdExtZfh() && VT == MVT::f16) @@ -9553,17 +9672,15 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, default: break; } - } else { - if (Constraint == "vr") { - for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass, - &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) { - if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) - return std::make_pair(0U, RC); - } - } else if (Constraint == "vm") { - if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy)) - return std::make_pair(0U, &RISCV::VMRegClass); + } else if (Constraint == "vr") { + for (const auto *RC : {&RISCV::VRRegClass, &RISCV::VRM2RegClass, + &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) { + if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) + return std::make_pair(0U, RC); } + } else if (Constraint == "vm") { + if (TRI->isTypeLegalForClass(RISCV::VMV0RegClass, VT.SimpleTy)) + return std::make_pair(0U, &RISCV::VMV0RegClass); } // Clang will correctly decode the usage of register name aliases into their @@ -10101,17 +10218,29 @@ bool RISCVTargetLowering::splitValueIntoRegisterParts( unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinSize(); unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinSize(); if (PartVTBitSize % ValueVTBitSize == 0) { + assert(PartVTBitSize >= ValueVTBitSize); // If the element types are different, bitcast to the same element type of // PartVT first. + // Give an example here, we want copy a <vscale x 1 x i8> value to + // <vscale x 4 x i16>. + // We need to convert <vscale x 1 x i8> to <vscale x 8 x i8> by insert + // subvector, then we can bitcast to <vscale x 4 x i16>. if (ValueEltVT != PartEltVT) { - unsigned Count = ValueVTBitSize / PartEltVT.getSizeInBits(); - assert(Count != 0 && "The number of element should not be zero."); - EVT SameEltTypeVT = - EVT::getVectorVT(Context, PartEltVT, Count, /*IsScalable=*/true); - Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val); + if (PartVTBitSize > ValueVTBitSize) { + unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits(); + assert(Count != 0 && "The number of element should not be zero."); + EVT SameEltTypeVT = + EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true); + Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SameEltTypeVT, + DAG.getUNDEF(SameEltTypeVT), Val, + DAG.getVectorIdxConstant(0, DL)); + } + Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); + } else { + Val = + DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT), + Val, DAG.getVectorIdxConstant(0, DL)); } - Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT), - Val, DAG.getConstant(0, DL, Subtarget.getXLenVT())); Parts[0] = Val; return true; } @@ -10141,19 +10270,23 @@ SDValue RISCVTargetLowering::joinRegisterPartsIntoValue( unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinSize(); unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinSize(); if (PartVTBitSize % ValueVTBitSize == 0) { + assert(PartVTBitSize >= ValueVTBitSize); EVT SameEltTypeVT = ValueVT; // If the element types are different, convert it to the same element type // of PartVT. + // Give an example here, we want copy a <vscale x 1 x i8> value from + // <vscale x 4 x i16>. + // We need to convert <vscale x 4 x i16> to <vscale x 8 x i8> first, + // then we can extract <vscale x 1 x i8>. if (ValueEltVT != PartEltVT) { - unsigned Count = ValueVTBitSize / PartEltVT.getSizeInBits(); + unsigned Count = PartVTBitSize / ValueEltVT.getFixedSizeInBits(); assert(Count != 0 && "The number of element should not be zero."); SameEltTypeVT = - EVT::getVectorVT(Context, PartEltVT, Count, /*IsScalable=*/true); + EVT::getVectorVT(Context, ValueEltVT, Count, /*IsScalable=*/true); + Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val); } - Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SameEltTypeVT, Val, - DAG.getConstant(0, DL, Subtarget.getXLenVT())); - if (ValueEltVT != PartEltVT) - Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); + Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, + DAG.getVectorIdxConstant(0, DL)); return Val; } } diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 849928eb46ae..48c5ce730933 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -282,6 +282,11 @@ enum NodeType : unsigned { // the value read before the modification and the new chain pointer. SWAP_CSR, + // FP to 32 bit int conversions for RV64. These are used to keep track of the + // result being sign extended to 64 bit. These saturate out of range inputs. + STRICT_FCVT_W_RTZ_RV64 = ISD::FIRST_TARGET_STRICTFP_OPCODE, + STRICT_FCVT_WU_RTZ_RV64, + // Memory opcodes start here. VLE_VL = ISD::FIRST_TARGET_MEMORY_OPCODE, VSE_VL, @@ -315,7 +320,7 @@ public: bool isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const override; bool isCheapToSpeculateCttz() const override; bool isCheapToSpeculateCtlz() const override; - bool hasAndNot(SDValue Y) const override; + bool hasAndNotCompare(SDValue Y) const override; bool shouldSinkOperands(Instruction *I, SmallVectorImpl<Use *> &Ops) const override; bool isFPImmLegal(const APFloat &Imm, EVT VT, @@ -383,6 +388,9 @@ public: EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override; + void AdjustInstrPostInstrSelection(MachineInstr &MI, + SDNode *Node) const override; + EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override; @@ -593,6 +601,8 @@ private: SDValue lowerToScalableOp(SDValue Op, SelectionDAG &DAG, unsigned NewOpc, bool HasMask = true) const; SDValue lowerVPOp(SDValue Op, SelectionDAG &DAG, unsigned RISCVISDOpc) const; + SDValue lowerLogicVPOp(SDValue Op, SelectionDAG &DAG, unsigned MaskOpc, + unsigned VecOpc) const; SDValue lowerFixedLengthVectorExtendToRVV(SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const; SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td index cfad4cdb9364..6a16b6354f95 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td +++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td @@ -107,31 +107,44 @@ def Vcompress : RISCVVConstraint<!or(VS2Constraint.Value, // The following opcode names match those given in Table 19.1 in the // RISC-V User-level ISA specification ("RISC-V base opcode map"). -class RISCVOpcode<bits<7> val> { +class RISCVOpcode<string name, bits<7> val> { + string Name = name; bits<7> Value = val; } -def OPC_LOAD : RISCVOpcode<0b0000011>; -def OPC_LOAD_FP : RISCVOpcode<0b0000111>; -def OPC_MISC_MEM : RISCVOpcode<0b0001111>; -def OPC_OP_IMM : RISCVOpcode<0b0010011>; -def OPC_AUIPC : RISCVOpcode<0b0010111>; -def OPC_OP_IMM_32 : RISCVOpcode<0b0011011>; -def OPC_STORE : RISCVOpcode<0b0100011>; -def OPC_STORE_FP : RISCVOpcode<0b0100111>; -def OPC_AMO : RISCVOpcode<0b0101111>; -def OPC_OP : RISCVOpcode<0b0110011>; -def OPC_LUI : RISCVOpcode<0b0110111>; -def OPC_OP_32 : RISCVOpcode<0b0111011>; -def OPC_MADD : RISCVOpcode<0b1000011>; -def OPC_MSUB : RISCVOpcode<0b1000111>; -def OPC_NMSUB : RISCVOpcode<0b1001011>; -def OPC_NMADD : RISCVOpcode<0b1001111>; -def OPC_OP_FP : RISCVOpcode<0b1010011>; -def OPC_OP_V : RISCVOpcode<0b1010111>; -def OPC_BRANCH : RISCVOpcode<0b1100011>; -def OPC_JALR : RISCVOpcode<0b1100111>; -def OPC_JAL : RISCVOpcode<0b1101111>; -def OPC_SYSTEM : RISCVOpcode<0b1110011>; +def RISCVOpcodesList : GenericTable { + let FilterClass = "RISCVOpcode"; + let Fields = [ + "Name", "Value" + ]; + let PrimaryKey = [ "Value" ]; + let PrimaryKeyName = "lookupRISCVOpcodeByValue"; +} +def lookupRISCVOpcodeByName : SearchIndex { + let Table = RISCVOpcodesList; + let Key = [ "Name" ]; +} +def OPC_LOAD : RISCVOpcode<"LOAD", 0b0000011>; +def OPC_LOAD_FP : RISCVOpcode<"LOAD_FP", 0b0000111>; +def OPC_MISC_MEM : RISCVOpcode<"MISC_MEM", 0b0001111>; +def OPC_OP_IMM : RISCVOpcode<"OP_IMM", 0b0010011>; +def OPC_AUIPC : RISCVOpcode<"AUIPC", 0b0010111>; +def OPC_OP_IMM_32 : RISCVOpcode<"OP_IMM_32", 0b0011011>; +def OPC_STORE : RISCVOpcode<"STORE", 0b0100011>; +def OPC_STORE_FP : RISCVOpcode<"STORE_FP", 0b0100111>; +def OPC_AMO : RISCVOpcode<"AMO", 0b0101111>; +def OPC_OP : RISCVOpcode<"OP", 0b0110011>; +def OPC_LUI : RISCVOpcode<"LUI", 0b0110111>; +def OPC_OP_32 : RISCVOpcode<"OP_32", 0b0111011>; +def OPC_MADD : RISCVOpcode<"MADD", 0b1000011>; +def OPC_MSUB : RISCVOpcode<"MSUB", 0b1000111>; +def OPC_NMSUB : RISCVOpcode<"NMSUB", 0b1001011>; +def OPC_NMADD : RISCVOpcode<"NMADD", 0b1001111>; +def OPC_OP_FP : RISCVOpcode<"OP_FP", 0b1010011>; +def OPC_OP_V : RISCVOpcode<"OP_V", 0b1010111>; +def OPC_BRANCH : RISCVOpcode<"BRANCH", 0b1100011>; +def OPC_JALR : RISCVOpcode<"JALR", 0b1100111>; +def OPC_JAL : RISCVOpcode<"JAL", 0b1101111>; +def OPC_SYSTEM : RISCVOpcode<"SYSTEM", 0b1110011>; class RVInst<dag outs, dag ins, string opcodestr, string argstr, list<dag> pattern, InstFormat format> @@ -188,8 +201,7 @@ class RVInst<dag outs, dag ins, string opcodestr, string argstr, // Pseudo instructions class Pseudo<dag outs, dag ins, list<dag> pattern, string opcodestr = "", string argstr = ""> - : RVInst<outs, ins, opcodestr, argstr, pattern, InstFormatPseudo>, - Sched<[]> { + : RVInst<outs, ins, opcodestr, argstr, pattern, InstFormatPseudo> { let isPseudo = 1; let isCodeGenOnly = 1; } @@ -265,14 +277,14 @@ class RVInstR4Frm<bits<2> funct2, RISCVOpcode opcode, dag outs, dag ins, bits<5> rs3; bits<5> rs2; bits<5> rs1; - bits<3> funct3; + bits<3> frm; bits<5> rd; let Inst{31-27} = rs3; let Inst{26-25} = funct2; let Inst{24-20} = rs2; let Inst{19-15} = rs1; - let Inst{14-12} = funct3; + let Inst{14-12} = frm; let Inst{11-7} = rd; let Opcode = opcode.Value; } @@ -300,13 +312,13 @@ class RVInstRFrm<bits<7> funct7, RISCVOpcode opcode, dag outs, dag ins, : RVInst<outs, ins, opcodestr, argstr, [], InstFormatR> { bits<5> rs2; bits<5> rs1; - bits<3> funct3; + bits<3> frm; bits<5> rd; let Inst{31-25} = funct7; let Inst{24-20} = rs2; let Inst{19-15} = rs1; - let Inst{14-12} = funct3; + let Inst{14-12} = frm; let Inst{11-7} = rd; let Opcode = opcode.Value; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormatsV.td b/llvm/lib/Target/RISCV/RISCVInstrFormatsV.td index 80f46b73bfd7..69e9d3553b30 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrFormatsV.td +++ b/llvm/lib/Target/RISCV/RISCVInstrFormatsV.td @@ -45,19 +45,6 @@ def SUMOPUnitStride : RISCVLSUMOP<0b00000>; def SUMOPUnitStrideMask : RISCVLSUMOP<0b01011>; def SUMOPUnitStrideWholeReg : RISCVLSUMOP<0b01000>; -class RISCVAMOOP<bits<5> val> { - bits<5> Value = val; -} -def AMOOPVamoSwap : RISCVAMOOP<0b00001>; -def AMOOPVamoAdd : RISCVAMOOP<0b00000>; -def AMOOPVamoXor : RISCVAMOOP<0b00100>; -def AMOOPVamoAnd : RISCVAMOOP<0b01100>; -def AMOOPVamoOr : RISCVAMOOP<0b01000>; -def AMOOPVamoMin : RISCVAMOOP<0b10000>; -def AMOOPVamoMax : RISCVAMOOP<0b10100>; -def AMOOPVamoMinu : RISCVAMOOP<0b11000>; -def AMOOPVamoMaxu : RISCVAMOOP<0b11100>; - class RISCVWidth<bits<4> val> { bits<4> Value = val; } @@ -342,22 +329,3 @@ class RVInstVSX<bits<3> nf, bit mew, RISCVMOP mop, bits<3> width, let Uses = [VTYPE, VL]; } - -class RVInstVAMO<RISCVAMOOP amoop, bits<3> width, dag outs, - dag ins, string opcodestr, string argstr> - : RVInst<outs, ins, opcodestr, argstr, [], InstFormatR> { - bits<5> vs2; - bits<5> rs1; - bit wd; - bit vm; - - let Inst{31-27} = amoop.Value; - let Inst{26} = wd; - let Inst{25} = vm; - let Inst{24-20} = vs2; - let Inst{19-15} = rs1; - let Inst{14-12} = width; - let Opcode = OPC_AMO.Value; - - let Uses = [VTYPE, VL]; -} diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 547d82550cac..2e2e00886d57 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -35,6 +35,7 @@ using namespace llvm; #include "RISCVGenCompressInstEmitter.inc" #define GET_INSTRINFO_CTOR_DTOR +#define GET_INSTRINFO_NAMED_OPS #include "RISCVGenInstrInfo.inc" static cl::opt<bool> PreferWholeRegisterMove( @@ -1059,6 +1060,7 @@ bool RISCVInstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const { break; case RISCV::FSGNJ_D: case RISCV::FSGNJ_S: + case RISCV::FSGNJ_H: // The canonical floating-point move is fsgnj rd, rs, rs. return MI.getOperand(1).isReg() && MI.getOperand(2).isReg() && MI.getOperand(1).getReg() == MI.getOperand(2).getReg(); @@ -1087,6 +1089,7 @@ RISCVInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const { break; case RISCV::FSGNJ_D: case RISCV::FSGNJ_S: + case RISCV::FSGNJ_H: // The canonical floating-point move is fsgnj rd, rs, rs. if (MI.getOperand(1).isReg() && MI.getOperand(2).isReg() && MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) @@ -1254,7 +1257,7 @@ bool RISCVInstrInfo::isFunctionSafeToOutlineFrom( bool RISCVInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, unsigned &Flags) const { // More accurate safety checking is done in getOutliningCandidateInfo. - return true; + return TargetInstrInfo::isMBBSafeToOutlineFrom(MBB, Flags); } // Enum values indicating how an outlined call should be constructed. diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h index 2bfad7844c43..da0877c4299a 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -18,6 +18,7 @@ #include "llvm/IR/DiagnosticInfo.h" #define GET_INSTRINFO_HEADER +#define GET_INSTRINFO_OPERAND_ENUM #include "RISCVGenInstrInfo.inc" namespace llvm { @@ -181,6 +182,10 @@ protected: }; namespace RISCV { + +// Implemented in RISCVGenInstrInfo.inc +int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex); + // Special immediate for AVL operand of V pseudo instructions to indicate VLMax. static constexpr int64_t VLMaxSentinel = -1LL; } // namespace RISCV diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index 6f9cde966132..71eb6f01a4f4 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -174,6 +174,20 @@ def uimm5 : Operand<XLenVT>, ImmLeaf<XLenVT, [{return isUInt<5>(Imm);}]> { let OperandNamespace = "RISCVOp"; } +def InsnDirectiveOpcode : AsmOperandClass { + let Name = "InsnDirectiveOpcode"; + let ParserMethod = "parseInsnDirectiveOpcode"; + let RenderMethod = "addImmOperands"; + let PredicateMethod = "isImm"; +} + +def uimm7_opcode : Operand<XLenVT> { + let ParserMatchClass = InsnDirectiveOpcode; + let DecoderMethod = "decodeUImmOperand<7>"; + let OperandType = "OPERAND_UIMM7"; + let OperandNamespace = "RISCVOp"; +} + def uimm7 : Operand<XLenVT> { let ParserMatchClass = UImmAsmOperand<7>; let DecoderMethod = "decodeUImmOperand<7>"; @@ -878,35 +892,35 @@ def : InstAlias<"zext.b $rd, $rs", (ANDI GPR:$rd, GPR:$rs, 0xFF), 0>; // isCodeGenOnly = 1 to hide them from the tablegened assembly parser. let isCodeGenOnly = 1, hasSideEffects = 1, mayLoad = 1, mayStore = 1, hasNoSchedulingInfo = 1 in { -def InsnR : DirectiveInsnR<(outs AnyReg:$rd), (ins uimm7:$opcode, uimm3:$funct3, +def InsnR : DirectiveInsnR<(outs AnyReg:$rd), (ins uimm7_opcode:$opcode, uimm3:$funct3, uimm7:$funct7, AnyReg:$rs1, AnyReg:$rs2), "$opcode, $funct3, $funct7, $rd, $rs1, $rs2">; -def InsnR4 : DirectiveInsnR4<(outs AnyReg:$rd), (ins uimm7:$opcode, +def InsnR4 : DirectiveInsnR4<(outs AnyReg:$rd), (ins uimm7_opcode:$opcode, uimm3:$funct3, uimm2:$funct2, AnyReg:$rs1, AnyReg:$rs2, AnyReg:$rs3), "$opcode, $funct3, $funct2, $rd, $rs1, $rs2, $rs3">; -def InsnI : DirectiveInsnI<(outs AnyReg:$rd), (ins uimm7:$opcode, uimm3:$funct3, +def InsnI : DirectiveInsnI<(outs AnyReg:$rd), (ins uimm7_opcode:$opcode, uimm3:$funct3, AnyReg:$rs1, simm12:$imm12), "$opcode, $funct3, $rd, $rs1, $imm12">; -def InsnI_Mem : DirectiveInsnI<(outs AnyReg:$rd), (ins uimm7:$opcode, +def InsnI_Mem : DirectiveInsnI<(outs AnyReg:$rd), (ins uimm7_opcode:$opcode, uimm3:$funct3, AnyReg:$rs1, simm12:$imm12), "$opcode, $funct3, $rd, ${imm12}(${rs1})">; -def InsnB : DirectiveInsnB<(outs), (ins uimm7:$opcode, uimm3:$funct3, +def InsnB : DirectiveInsnB<(outs), (ins uimm7_opcode:$opcode, uimm3:$funct3, AnyReg:$rs1, AnyReg:$rs2, simm13_lsb0:$imm12), "$opcode, $funct3, $rs1, $rs2, $imm12">; -def InsnU : DirectiveInsnU<(outs AnyReg:$rd), (ins uimm7:$opcode, +def InsnU : DirectiveInsnU<(outs AnyReg:$rd), (ins uimm7_opcode:$opcode, uimm20_lui:$imm20), "$opcode, $rd, $imm20">; -def InsnJ : DirectiveInsnJ<(outs AnyReg:$rd), (ins uimm7:$opcode, +def InsnJ : DirectiveInsnJ<(outs AnyReg:$rd), (ins uimm7_opcode:$opcode, simm21_lsb0_jal:$imm20), "$opcode, $rd, $imm20">; -def InsnS : DirectiveInsnS<(outs), (ins uimm7:$opcode, uimm3:$funct3, +def InsnS : DirectiveInsnS<(outs), (ins uimm7_opcode:$opcode, uimm3:$funct3, AnyReg:$rs2, AnyReg:$rs1, simm12:$imm12), "$opcode, $funct3, $rs2, ${imm12}(${rs1})">; @@ -918,37 +932,37 @@ def InsnS : DirectiveInsnS<(outs), (ins uimm7:$opcode, uimm3:$funct3, // for known formats. let EmitPriority = 0 in { def : InstAlias<".insn_r $opcode, $funct3, $funct7, $rd, $rs1, $rs2", - (InsnR AnyReg:$rd, uimm7:$opcode, uimm3:$funct3, uimm7:$funct7, + (InsnR AnyReg:$rd, uimm7_opcode:$opcode, uimm3:$funct3, uimm7:$funct7, AnyReg:$rs1, AnyReg:$rs2)>; // Accept 4 register form of ".insn r" as alias for ".insn r4". def : InstAlias<".insn_r $opcode, $funct3, $funct2, $rd, $rs1, $rs2, $rs3", - (InsnR4 AnyReg:$rd, uimm7:$opcode, uimm3:$funct3, uimm2:$funct2, + (InsnR4 AnyReg:$rd, uimm7_opcode:$opcode, uimm3:$funct3, uimm2:$funct2, AnyReg:$rs1, AnyReg:$rs2, AnyReg:$rs3)>; def : InstAlias<".insn_r4 $opcode, $funct3, $funct2, $rd, $rs1, $rs2, $rs3", - (InsnR4 AnyReg:$rd, uimm7:$opcode, uimm3:$funct3, uimm2:$funct2, + (InsnR4 AnyReg:$rd, uimm7_opcode:$opcode, uimm3:$funct3, uimm2:$funct2, AnyReg:$rs1, AnyReg:$rs2, AnyReg:$rs3)>; def : InstAlias<".insn_i $opcode, $funct3, $rd, $rs1, $imm12", - (InsnI AnyReg:$rd, uimm7:$opcode, uimm3:$funct3, AnyReg:$rs1, + (InsnI AnyReg:$rd, uimm7_opcode:$opcode, uimm3:$funct3, AnyReg:$rs1, simm12:$imm12)>; def : InstAlias<".insn_i $opcode, $funct3, $rd, ${imm12}(${rs1})", - (InsnI_Mem AnyReg:$rd, uimm7:$opcode, uimm3:$funct3, + (InsnI_Mem AnyReg:$rd, uimm7_opcode:$opcode, uimm3:$funct3, AnyReg:$rs1, simm12:$imm12)>; def : InstAlias<".insn_b $opcode, $funct3, $rs1, $rs2, $imm12", - (InsnB uimm7:$opcode, uimm3:$funct3, AnyReg:$rs1, + (InsnB uimm7_opcode:$opcode, uimm3:$funct3, AnyReg:$rs1, AnyReg:$rs2, simm13_lsb0:$imm12)>; // Accept sb as an alias for b. def : InstAlias<".insn_sb $opcode, $funct3, $rs1, $rs2, $imm12", - (InsnB uimm7:$opcode, uimm3:$funct3, AnyReg:$rs1, + (InsnB uimm7_opcode:$opcode, uimm3:$funct3, AnyReg:$rs1, AnyReg:$rs2, simm13_lsb0:$imm12)>; def : InstAlias<".insn_u $opcode, $rd, $imm20", - (InsnU AnyReg:$rd, uimm7:$opcode, uimm20_lui:$imm20)>; + (InsnU AnyReg:$rd, uimm7_opcode:$opcode, uimm20_lui:$imm20)>; def : InstAlias<".insn_j $opcode, $rd, $imm20", - (InsnJ AnyReg:$rd, uimm7:$opcode, simm21_lsb0_jal:$imm20)>; + (InsnJ AnyReg:$rd, uimm7_opcode:$opcode, simm21_lsb0_jal:$imm20)>; // Accept uj as an alias for j. def : InstAlias<".insn_uj $opcode, $rd, $imm20", - (InsnJ AnyReg:$rd, uimm7:$opcode, simm21_lsb0_jal:$imm20)>; + (InsnJ AnyReg:$rd, uimm7_opcode:$opcode, simm21_lsb0_jal:$imm20)>; def : InstAlias<".insn_s $opcode, $funct3, $rs2, ${imm12}(${rs1})", - (InsnS uimm7:$opcode, uimm3:$funct3, AnyReg:$rs2, + (InsnS uimm7_opcode:$opcode, uimm3:$funct3, AnyReg:$rs2, AnyReg:$rs1, simm12:$imm12)>; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td index 2cd011a02345..d6c31c4804db 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td @@ -26,41 +26,6 @@ def RISCVBuildPairF64 : SDNode<"RISCVISD::BuildPairF64", SDT_RISCVBuildPairF64>; def RISCVSplitF64 : SDNode<"RISCVISD::SplitF64", SDT_RISCVSplitF64>; //===----------------------------------------------------------------------===// -// Instruction Class Templates -//===----------------------------------------------------------------------===// - -let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -class FPFMAD_rrr_frm<RISCVOpcode opcode, string opcodestr> - : RVInstR4Frm<0b01, opcode, (outs FPR64:$rd), - (ins FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, frmarg:$funct3), - opcodestr, "$rd, $rs1, $rs2, $rs3, $funct3">; - -class FPFMADDynFrmAlias<FPFMAD_rrr_frm Inst, string OpcodeStr> - : InstAlias<OpcodeStr#" $rd, $rs1, $rs2, $rs3", - (Inst FPR64:$rd, FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, 0b111)>; - -let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -class FPALUD_rr<bits<7> funct7, bits<3> funct3, string opcodestr> - : RVInstR<funct7, funct3, OPC_OP_FP, (outs FPR64:$rd), - (ins FPR64:$rs1, FPR64:$rs2), opcodestr, "$rd, $rs1, $rs2">; - -let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -class FPALUD_rr_frm<bits<7> funct7, string opcodestr> - : RVInstRFrm<funct7, OPC_OP_FP, (outs FPR64:$rd), - (ins FPR64:$rs1, FPR64:$rs2, frmarg:$funct3), opcodestr, - "$rd, $rs1, $rs2, $funct3">; - -class FPALUDDynFrmAlias<FPALUD_rr_frm Inst, string OpcodeStr> - : InstAlias<OpcodeStr#" $rd, $rs1, $rs2", - (Inst FPR64:$rd, FPR64:$rs1, FPR64:$rs2, 0b111)>; - -let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -class FPCmpD_rr<bits<3> funct3, string opcodestr> - : RVInstR<0b1010001, funct3, OPC_OP_FP, (outs GPR:$rd), - (ins FPR64:$rs1, FPR64:$rs2), opcodestr, "$rd, $rs1, $rs2">, - Sched<[WriteFCmp64, ReadFCmp64, ReadFCmp64]>; - -//===----------------------------------------------------------------------===// // Instructions //===----------------------------------------------------------------------===// @@ -81,126 +46,104 @@ def FSD : RVInstS<0b011, OPC_STORE_FP, (outs), "fsd", "$rs2, ${imm12}(${rs1})">, Sched<[WriteFST64, ReadStoreData, ReadFMemBase]>; -def FMADD_D : FPFMAD_rrr_frm<OPC_MADD, "fmadd.d">, - Sched<[WriteFMA64, ReadFMA64, ReadFMA64, ReadFMA64]>; -def : FPFMADDynFrmAlias<FMADD_D, "fmadd.d">; -def FMSUB_D : FPFMAD_rrr_frm<OPC_MSUB, "fmsub.d">, - Sched<[WriteFMA64, ReadFMA64, ReadFMA64, ReadFMA64]>; -def : FPFMADDynFrmAlias<FMSUB_D, "fmsub.d">; -def FNMSUB_D : FPFMAD_rrr_frm<OPC_NMSUB, "fnmsub.d">, - Sched<[WriteFMA64, ReadFMA64, ReadFMA64, ReadFMA64]>; -def : FPFMADDynFrmAlias<FNMSUB_D, "fnmsub.d">; -def FNMADD_D : FPFMAD_rrr_frm<OPC_NMADD, "fnmadd.d">, - Sched<[WriteFMA64, ReadFMA64, ReadFMA64, ReadFMA64]>; -def : FPFMADDynFrmAlias<FNMADD_D, "fnmadd.d">; - -def FADD_D : FPALUD_rr_frm<0b0000001, "fadd.d">, +let SchedRW = [WriteFMA64, ReadFMA64, ReadFMA64, ReadFMA64] in { +def FMADD_D : FPFMA_rrr_frm<OPC_MADD, 0b01, "fmadd.d", FPR64>; +def FMSUB_D : FPFMA_rrr_frm<OPC_MSUB, 0b01, "fmsub.d", FPR64>; +def FNMSUB_D : FPFMA_rrr_frm<OPC_NMSUB, 0b01, "fnmsub.d", FPR64>; +def FNMADD_D : FPFMA_rrr_frm<OPC_NMADD, 0b01, "fnmadd.d", FPR64>; +} + +def : FPFMADynFrmAlias<FMADD_D, "fmadd.d", FPR64>; +def : FPFMADynFrmAlias<FMSUB_D, "fmsub.d", FPR64>; +def : FPFMADynFrmAlias<FNMSUB_D, "fnmsub.d", FPR64>; +def : FPFMADynFrmAlias<FNMADD_D, "fnmadd.d", FPR64>; + +def FADD_D : FPALU_rr_frm<0b0000001, "fadd.d", FPR64>, Sched<[WriteFALU64, ReadFALU64, ReadFALU64]>; -def : FPALUDDynFrmAlias<FADD_D, "fadd.d">; -def FSUB_D : FPALUD_rr_frm<0b0000101, "fsub.d">, +def FSUB_D : FPALU_rr_frm<0b0000101, "fsub.d", FPR64>, Sched<[WriteFALU64, ReadFALU64, ReadFALU64]>; -def : FPALUDDynFrmAlias<FSUB_D, "fsub.d">; -def FMUL_D : FPALUD_rr_frm<0b0001001, "fmul.d">, +def FMUL_D : FPALU_rr_frm<0b0001001, "fmul.d", FPR64>, Sched<[WriteFMul64, ReadFMul64, ReadFMul64]>; -def : FPALUDDynFrmAlias<FMUL_D, "fmul.d">; -def FDIV_D : FPALUD_rr_frm<0b0001101, "fdiv.d">, +def FDIV_D : FPALU_rr_frm<0b0001101, "fdiv.d", FPR64>, Sched<[WriteFDiv64, ReadFDiv64, ReadFDiv64]>; -def : FPALUDDynFrmAlias<FDIV_D, "fdiv.d">; -def FSQRT_D : FPUnaryOp_r_frm<0b0101101, FPR64, FPR64, "fsqrt.d">, - Sched<[WriteFSqrt64, ReadFSqrt64]> { - let rs2 = 0b00000; -} +def : FPALUDynFrmAlias<FADD_D, "fadd.d", FPR64>; +def : FPALUDynFrmAlias<FSUB_D, "fsub.d", FPR64>; +def : FPALUDynFrmAlias<FMUL_D, "fmul.d", FPR64>; +def : FPALUDynFrmAlias<FDIV_D, "fdiv.d", FPR64>; + +def FSQRT_D : FPUnaryOp_r_frm<0b0101101, 0b00000, FPR64, FPR64, "fsqrt.d">, + Sched<[WriteFSqrt64, ReadFSqrt64]>; def : FPUnaryOpDynFrmAlias<FSQRT_D, "fsqrt.d", FPR64, FPR64>; -def FSGNJ_D : FPALUD_rr<0b0010001, 0b000, "fsgnj.d">, - Sched<[WriteFSGNJ64, ReadFSGNJ64, ReadFSGNJ64]>; -def FSGNJN_D : FPALUD_rr<0b0010001, 0b001, "fsgnjn.d">, - Sched<[WriteFSGNJ64, ReadFSGNJ64, ReadFSGNJ64]>; -def FSGNJX_D : FPALUD_rr<0b0010001, 0b010, "fsgnjx.d">, - Sched<[WriteFSGNJ64, ReadFSGNJ64, ReadFSGNJ64]>; -def FMIN_D : FPALUD_rr<0b0010101, 0b000, "fmin.d">, - Sched<[WriteFMinMax64, ReadFMinMax64, ReadFMinMax64]>; -def FMAX_D : FPALUD_rr<0b0010101, 0b001, "fmax.d">, - Sched<[WriteFMinMax64, ReadFMinMax64, ReadFMinMax64]>; - -def FCVT_S_D : FPUnaryOp_r_frm<0b0100000, FPR32, FPR64, "fcvt.s.d">, - Sched<[WriteFCvtF64ToF32, ReadFCvtF64ToF32]> { - let rs2 = 0b00001; +let SchedRW = [WriteFSGNJ64, ReadFSGNJ64, ReadFSGNJ64], + mayRaiseFPException = 0 in { +def FSGNJ_D : FPALU_rr<0b0010001, 0b000, "fsgnj.d", FPR64>; +def FSGNJN_D : FPALU_rr<0b0010001, 0b001, "fsgnjn.d", FPR64>; +def FSGNJX_D : FPALU_rr<0b0010001, 0b010, "fsgnjx.d", FPR64>; } -def : FPUnaryOpDynFrmAlias<FCVT_S_D, "fcvt.s.d", FPR32, FPR64>; -def FCVT_D_S : FPUnaryOp_r<0b0100001, 0b000, FPR64, FPR32, "fcvt.d.s">, - Sched<[WriteFCvtF32ToF64, ReadFCvtF32ToF64]> { - let rs2 = 0b00000; +let SchedRW = [WriteFMinMax64, ReadFMinMax64, ReadFMinMax64] in { +def FMIN_D : FPALU_rr<0b0010101, 0b000, "fmin.d", FPR64>; +def FMAX_D : FPALU_rr<0b0010101, 0b001, "fmax.d", FPR64>; } -def FEQ_D : FPCmpD_rr<0b010, "feq.d">; -def FLT_D : FPCmpD_rr<0b001, "flt.d">; -def FLE_D : FPCmpD_rr<0b000, "fle.d">; +def FCVT_S_D : FPUnaryOp_r_frm<0b0100000, 0b00001, FPR32, FPR64, "fcvt.s.d">, + Sched<[WriteFCvtF64ToF32, ReadFCvtF64ToF32]>; +def : FPUnaryOpDynFrmAlias<FCVT_S_D, "fcvt.s.d", FPR32, FPR64>; -def FCLASS_D : FPUnaryOp_r<0b1110001, 0b001, GPR, FPR64, "fclass.d">, - Sched<[WriteFClass64, ReadFClass64]> { - let rs2 = 0b00000; -} +def FCVT_D_S : FPUnaryOp_r<0b0100001, 0b00000, 0b000, FPR64, FPR32, "fcvt.d.s">, + Sched<[WriteFCvtF32ToF64, ReadFCvtF32ToF64]>; -def FCVT_W_D : FPUnaryOp_r_frm<0b1100001, GPR, FPR64, "fcvt.w.d">, - Sched<[WriteFCvtF64ToI32, ReadFCvtF64ToI32]> { - let rs2 = 0b00000; +let SchedRW = [WriteFCmp64, ReadFCmp64, ReadFCmp64] in { +def FEQ_D : FPCmp_rr<0b1010001, 0b010, "feq.d", FPR64>; +def FLT_D : FPCmp_rr<0b1010001, 0b001, "flt.d", FPR64>; +def FLE_D : FPCmp_rr<0b1010001, 0b000, "fle.d", FPR64>; } + +let mayRaiseFPException = 0 in +def FCLASS_D : FPUnaryOp_r<0b1110001, 0b00000, 0b001, GPR, FPR64, "fclass.d">, + Sched<[WriteFClass64, ReadFClass64]>; + +def FCVT_W_D : FPUnaryOp_r_frm<0b1100001, 0b00000, GPR, FPR64, "fcvt.w.d">, + Sched<[WriteFCvtF64ToI32, ReadFCvtF64ToI32]>; def : FPUnaryOpDynFrmAlias<FCVT_W_D, "fcvt.w.d", GPR, FPR64>; -def FCVT_WU_D : FPUnaryOp_r_frm<0b1100001, GPR, FPR64, "fcvt.wu.d">, - Sched<[WriteFCvtF64ToI32, ReadFCvtF64ToI32]> { - let rs2 = 0b00001; -} +def FCVT_WU_D : FPUnaryOp_r_frm<0b1100001, 0b00001, GPR, FPR64, "fcvt.wu.d">, + Sched<[WriteFCvtF64ToI32, ReadFCvtF64ToI32]>; def : FPUnaryOpDynFrmAlias<FCVT_WU_D, "fcvt.wu.d", GPR, FPR64>; -def FCVT_D_W : FPUnaryOp_r<0b1101001, 0b000, FPR64, GPR, "fcvt.d.w">, - Sched<[WriteFCvtI32ToF64, ReadFCvtI32ToF64]> { - let rs2 = 0b00000; -} +def FCVT_D_W : FPUnaryOp_r<0b1101001, 0b00000, 0b000, FPR64, GPR, "fcvt.d.w">, + Sched<[WriteFCvtI32ToF64, ReadFCvtI32ToF64]>; -def FCVT_D_WU : FPUnaryOp_r<0b1101001, 0b000, FPR64, GPR, "fcvt.d.wu">, - Sched<[WriteFCvtI32ToF64, ReadFCvtI32ToF64]> { - let rs2 = 0b00001; -} +def FCVT_D_WU : FPUnaryOp_r<0b1101001, 0b00001, 0b000, FPR64, GPR, "fcvt.d.wu">, + Sched<[WriteFCvtI32ToF64, ReadFCvtI32ToF64]>; } // Predicates = [HasStdExtD] let Predicates = [HasStdExtD, IsRV64] in { -def FCVT_L_D : FPUnaryOp_r_frm<0b1100001, GPR, FPR64, "fcvt.l.d">, - Sched<[WriteFCvtF64ToI64, ReadFCvtF64ToI64]> { - let rs2 = 0b00010; -} +def FCVT_L_D : FPUnaryOp_r_frm<0b1100001, 0b00010, GPR, FPR64, "fcvt.l.d">, + Sched<[WriteFCvtF64ToI64, ReadFCvtF64ToI64]>; def : FPUnaryOpDynFrmAlias<FCVT_L_D, "fcvt.l.d", GPR, FPR64>; -def FCVT_LU_D : FPUnaryOp_r_frm<0b1100001, GPR, FPR64, "fcvt.lu.d">, - Sched<[WriteFCvtF64ToI64, ReadFCvtF64ToI64]> { - let rs2 = 0b00011; -} +def FCVT_LU_D : FPUnaryOp_r_frm<0b1100001, 0b00011, GPR, FPR64, "fcvt.lu.d">, + Sched<[WriteFCvtF64ToI64, ReadFCvtF64ToI64]>; def : FPUnaryOpDynFrmAlias<FCVT_LU_D, "fcvt.lu.d", GPR, FPR64>; -def FMV_X_D : FPUnaryOp_r<0b1110001, 0b000, GPR, FPR64, "fmv.x.d">, - Sched<[WriteFMovF64ToI64, ReadFMovF64ToI64]> { - let rs2 = 0b00000; -} +let mayRaiseFPException = 0 in +def FMV_X_D : FPUnaryOp_r<0b1110001, 0b00000, 0b000, GPR, FPR64, "fmv.x.d">, + Sched<[WriteFMovF64ToI64, ReadFMovF64ToI64]>; -def FCVT_D_L : FPUnaryOp_r_frm<0b1101001, FPR64, GPR, "fcvt.d.l">, - Sched<[WriteFCvtI64ToF64, ReadFCvtI64ToF64]> { - let rs2 = 0b00010; -} +def FCVT_D_L : FPUnaryOp_r_frm<0b1101001, 0b00010, FPR64, GPR, "fcvt.d.l">, + Sched<[WriteFCvtI64ToF64, ReadFCvtI64ToF64]>; def : FPUnaryOpDynFrmAlias<FCVT_D_L, "fcvt.d.l", FPR64, GPR>; -def FCVT_D_LU : FPUnaryOp_r_frm<0b1101001, FPR64, GPR, "fcvt.d.lu">, - Sched<[WriteFCvtI64ToF64, ReadFCvtI64ToF64]> { - let rs2 = 0b00011; -} +def FCVT_D_LU : FPUnaryOp_r_frm<0b1101001, 0b00011, FPR64, GPR, "fcvt.d.lu">, + Sched<[WriteFCvtI64ToF64, ReadFCvtI64ToF64]>; def : FPUnaryOpDynFrmAlias<FCVT_D_LU, "fcvt.d.lu", FPR64, GPR>; -def FMV_D_X : FPUnaryOp_r<0b1111001, 0b000, FPR64, GPR, "fmv.d.x">, - Sched<[WriteFMovI64ToF64, ReadFMovI64ToF64]> { - let rs2 = 0b00000; -} +let mayRaiseFPException = 0 in +def FMV_D_X : FPUnaryOp_r<0b1111001, 0b00000, 0b000, FPR64, GPR, "fmv.d.x">, + Sched<[WriteFMovI64ToF64, ReadFMovI64ToF64]>; } // Predicates = [HasStdExtD, IsRV64] //===----------------------------------------------------------------------===// @@ -241,20 +184,20 @@ let Predicates = [HasStdExtD] in { /// Float conversion operations // f64 -> f32, f32 -> f64 -def : Pat<(fpround FPR64:$rs1), (FCVT_S_D FPR64:$rs1, 0b111)>; -def : Pat<(fpextend FPR32:$rs1), (FCVT_D_S FPR32:$rs1)>; +def : Pat<(any_fpround FPR64:$rs1), (FCVT_S_D FPR64:$rs1, 0b111)>; +def : Pat<(any_fpextend FPR32:$rs1), (FCVT_D_S FPR32:$rs1)>; // [u]int<->double conversion patterns must be gated on IsRV32 or IsRV64, so // are defined later. /// Float arithmetic operations -def : PatFpr64Fpr64DynFrm<fadd, FADD_D>; -def : PatFpr64Fpr64DynFrm<fsub, FSUB_D>; -def : PatFpr64Fpr64DynFrm<fmul, FMUL_D>; -def : PatFpr64Fpr64DynFrm<fdiv, FDIV_D>; +def : PatFpr64Fpr64DynFrm<any_fadd, FADD_D>; +def : PatFpr64Fpr64DynFrm<any_fsub, FSUB_D>; +def : PatFpr64Fpr64DynFrm<any_fmul, FMUL_D>; +def : PatFpr64Fpr64DynFrm<any_fdiv, FDIV_D>; -def : Pat<(fsqrt FPR64:$rs1), (FSQRT_D FPR64:$rs1, 0b111)>; +def : Pat<(any_fsqrt FPR64:$rs1), (FSQRT_D FPR64:$rs1, 0b111)>; def : Pat<(fneg FPR64:$rs1), (FSGNJN_D $rs1, $rs1)>; def : Pat<(fabs FPR64:$rs1), (FSGNJX_D $rs1, $rs1)>; @@ -266,19 +209,19 @@ def : Pat<(fcopysign FPR32:$rs1, FPR64:$rs2), (FSGNJ_S $rs1, (FCVT_S_D $rs2, 0b111))>; // fmadd: rs1 * rs2 + rs3 -def : Pat<(fma FPR64:$rs1, FPR64:$rs2, FPR64:$rs3), +def : Pat<(any_fma FPR64:$rs1, FPR64:$rs2, FPR64:$rs3), (FMADD_D $rs1, $rs2, $rs3, 0b111)>; // fmsub: rs1 * rs2 - rs3 -def : Pat<(fma FPR64:$rs1, FPR64:$rs2, (fneg FPR64:$rs3)), +def : Pat<(any_fma FPR64:$rs1, FPR64:$rs2, (fneg FPR64:$rs3)), (FMSUB_D FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, 0b111)>; // fnmsub: -rs1 * rs2 + rs3 -def : Pat<(fma (fneg FPR64:$rs1), FPR64:$rs2, FPR64:$rs3), +def : Pat<(any_fma (fneg FPR64:$rs1), FPR64:$rs2, FPR64:$rs3), (FNMSUB_D FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, 0b111)>; // fnmadd: -rs1 * rs2 - rs3 -def : Pat<(fma (fneg FPR64:$rs1), FPR64:$rs2, (fneg FPR64:$rs3)), +def : Pat<(any_fma (fneg FPR64:$rs1), FPR64:$rs2, (fneg FPR64:$rs3)), (FNMADD_D FPR64:$rs1, FPR64:$rs2, FPR64:$rs3, 0b111)>; // The ratified 20191213 ISA spec defines fmin and fmax in a way that matches @@ -328,8 +271,8 @@ let Predicates = [HasStdExtD, IsRV32] in { def : Pat<(f64 (fpimm0)), (FCVT_D_W (i32 X0))>; // double->[u]int. Round-to-zero must be used. -def : Pat<(i32 (fp_to_sint FPR64:$rs1)), (FCVT_W_D FPR64:$rs1, 0b001)>; -def : Pat<(i32 (fp_to_uint FPR64:$rs1)), (FCVT_WU_D FPR64:$rs1, 0b001)>; +def : Pat<(i32 (any_fp_to_sint FPR64:$rs1)), (FCVT_W_D FPR64:$rs1, 0b001)>; +def : Pat<(i32 (any_fp_to_uint FPR64:$rs1)), (FCVT_WU_D FPR64:$rs1, 0b001)>; // Saturating double->[u]int32. def : Pat<(i32 (riscv_fcvt_x_rtz FPR64:$rs1)), (FCVT_W_D $rs1, 0b001)>; @@ -342,8 +285,8 @@ def : Pat<(i32 (lrint FPR64:$rs1)), (FCVT_W_D $rs1, 0b111)>; def : Pat<(i32 (lround FPR64:$rs1)), (FCVT_W_D $rs1, 0b100)>; // [u]int->double. -def : Pat<(sint_to_fp (i32 GPR:$rs1)), (FCVT_D_W GPR:$rs1)>; -def : Pat<(uint_to_fp (i32 GPR:$rs1)), (FCVT_D_WU GPR:$rs1)>; +def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_D_W GPR:$rs1)>; +def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_D_WU GPR:$rs1)>; } // Predicates = [HasStdExtD, IsRV32] let Predicates = [HasStdExtD, IsRV64] in { @@ -358,20 +301,20 @@ def : Pat<(i64 (bitconvert FPR64:$rs1)), (FMV_X_D FPR64:$rs1)>; // Use target specific isd nodes to help us remember the result is sign // extended. Matching sext_inreg+fptoui/fptosi may cause the conversion to be // duplicated if it has another user that didn't need the sign_extend. -def : Pat<(riscv_fcvt_w_rtz_rv64 FPR64:$rs1), (FCVT_W_D $rs1, 0b001)>; -def : Pat<(riscv_fcvt_wu_rtz_rv64 FPR64:$rs1), (FCVT_WU_D $rs1, 0b001)>; +def : Pat<(riscv_any_fcvt_w_rtz_rv64 FPR64:$rs1), (FCVT_W_D $rs1, 0b001)>; +def : Pat<(riscv_any_fcvt_wu_rtz_rv64 FPR64:$rs1), (FCVT_WU_D $rs1, 0b001)>; // [u]int32->fp -def : Pat<(sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_D_W $rs1)>; -def : Pat<(uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_D_WU $rs1)>; +def : Pat<(any_sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_D_W $rs1)>; +def : Pat<(any_uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_D_WU $rs1)>; // Saturating double->[u]int64. def : Pat<(i64 (riscv_fcvt_x_rtz FPR64:$rs1)), (FCVT_L_D $rs1, 0b001)>; def : Pat<(i64 (riscv_fcvt_xu_rtz FPR64:$rs1)), (FCVT_LU_D $rs1, 0b001)>; // double->[u]int64. Round-to-zero must be used. -def : Pat<(i64 (fp_to_sint FPR64:$rs1)), (FCVT_L_D FPR64:$rs1, 0b001)>; -def : Pat<(i64 (fp_to_uint FPR64:$rs1)), (FCVT_LU_D FPR64:$rs1, 0b001)>; +def : Pat<(i64 (any_fp_to_sint FPR64:$rs1)), (FCVT_L_D FPR64:$rs1, 0b001)>; +def : Pat<(i64 (any_fp_to_uint FPR64:$rs1)), (FCVT_LU_D FPR64:$rs1, 0b001)>; // double->int64 with current rounding mode. def : Pat<(i64 (lrint FPR64:$rs1)), (FCVT_L_D $rs1, 0b111)>; @@ -382,6 +325,6 @@ def : Pat<(i64 (lround FPR64:$rs1)), (FCVT_L_D $rs1, 0b100)>; def : Pat<(i64 (llround FPR64:$rs1)), (FCVT_L_D $rs1, 0b100)>; // [u]int64->fp. Match GCC and default to using dynamic rounding mode. -def : Pat<(sint_to_fp (i64 GPR:$rs1)), (FCVT_D_L GPR:$rs1, 0b111)>; -def : Pat<(uint_to_fp (i64 GPR:$rs1)), (FCVT_D_LU GPR:$rs1, 0b111)>; +def : Pat<(any_sint_to_fp (i64 GPR:$rs1)), (FCVT_D_L GPR:$rs1, 0b111)>; +def : Pat<(any_uint_to_fp (i64 GPR:$rs1)), (FCVT_D_LU GPR:$rs1, 0b111)>; } // Predicates = [HasStdExtD, IsRV64] diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td index 3400c3be52bf..bb45ed859442 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoF.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoF.td @@ -19,9 +19,9 @@ def SDT_RISCVFMV_W_X_RV64 : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisVT<1, i64>]>; def SDT_RISCVFMV_X_ANYEXTW_RV64 : SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, f32>]>; -def STD_RISCVFCVT_W_RV64 +def SDT_RISCVFCVT_W_RV64 : SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisFP<1>]>; -def STD_RISCVFCVT_X +def SDT_RISCVFCVT_X : SDTypeProfile<1, 1, [SDTCisVT<0, XLenVT>, SDTCisFP<1>]>; def riscv_fmv_w_x_rv64 @@ -29,13 +29,27 @@ def riscv_fmv_w_x_rv64 def riscv_fmv_x_anyextw_rv64 : SDNode<"RISCVISD::FMV_X_ANYEXTW_RV64", SDT_RISCVFMV_X_ANYEXTW_RV64>; def riscv_fcvt_w_rtz_rv64 - : SDNode<"RISCVISD::FCVT_W_RTZ_RV64", STD_RISCVFCVT_W_RV64>; + : SDNode<"RISCVISD::FCVT_W_RTZ_RV64", SDT_RISCVFCVT_W_RV64>; def riscv_fcvt_wu_rtz_rv64 - : SDNode<"RISCVISD::FCVT_WU_RTZ_RV64", STD_RISCVFCVT_W_RV64>; + : SDNode<"RISCVISD::FCVT_WU_RTZ_RV64", SDT_RISCVFCVT_W_RV64>; def riscv_fcvt_x_rtz - : SDNode<"RISCVISD::FCVT_X_RTZ", STD_RISCVFCVT_X>; + : SDNode<"RISCVISD::FCVT_X_RTZ", SDT_RISCVFCVT_X>; def riscv_fcvt_xu_rtz - : SDNode<"RISCVISD::FCVT_XU_RTZ", STD_RISCVFCVT_X>; + : SDNode<"RISCVISD::FCVT_XU_RTZ", SDT_RISCVFCVT_X>; + +def riscv_strict_fcvt_w_rtz_rv64 + : SDNode<"RISCVISD::STRICT_FCVT_W_RTZ_RV64", SDT_RISCVFCVT_W_RV64, + [SDNPHasChain]>; +def riscv_strict_fcvt_wu_rtz_rv64 + : SDNode<"RISCVISD::STRICT_FCVT_WU_RTZ_RV64", SDT_RISCVFCVT_W_RV64, + [SDNPHasChain]>; + +def riscv_any_fcvt_w_rtz_rv64 : PatFrags<(ops node:$src), + [(riscv_strict_fcvt_w_rtz_rv64 node:$src), + (riscv_fcvt_w_rtz_rv64 node:$src)]>; +def riscv_any_fcvt_wu_rtz_rv64 : PatFrags<(ops node:$src), + [(riscv_strict_fcvt_wu_rtz_rv64 node:$src), + (riscv_fcvt_wu_rtz_rv64 node:$src)]>; //===----------------------------------------------------------------------===// // Operand and SDNode transformation definitions. @@ -59,54 +73,65 @@ def frmarg : Operand<XLenVT> { // Instruction class templates //===----------------------------------------------------------------------===// -let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -class FPFMAS_rrr_frm<RISCVOpcode opcode, string opcodestr> - : RVInstR4Frm<0b00, opcode, (outs FPR32:$rd), - (ins FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, frmarg:$funct3), - opcodestr, "$rd, $rs1, $rs2, $rs3, $funct3">; +let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1, + UseNamedOperandTable = 1, hasPostISelHook = 1 in +class FPFMA_rrr_frm<RISCVOpcode opcode, bits<2> funct2, string opcodestr, + RegisterClass rty> + : RVInstR4Frm<funct2, opcode, (outs rty:$rd), + (ins rty:$rs1, rty:$rs2, rty:$rs3, frmarg:$frm), + opcodestr, "$rd, $rs1, $rs2, $rs3, $frm">; -class FPFMASDynFrmAlias<FPFMAS_rrr_frm Inst, string OpcodeStr> +class FPFMADynFrmAlias<FPFMA_rrr_frm Inst, string OpcodeStr, + RegisterClass rty> : InstAlias<OpcodeStr#" $rd, $rs1, $rs2, $rs3", - (Inst FPR32:$rd, FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>; - -let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -class FPALUS_rr<bits<7> funct7, bits<3> funct3, string opcodestr> - : RVInstR<funct7, funct3, OPC_OP_FP, (outs FPR32:$rd), - (ins FPR32:$rs1, FPR32:$rs2), opcodestr, "$rd, $rs1, $rs2">; - -let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -class FPALUS_rr_frm<bits<7> funct7, string opcodestr> - : RVInstRFrm<funct7, OPC_OP_FP, (outs FPR32:$rd), - (ins FPR32:$rs1, FPR32:$rs2, frmarg:$funct3), opcodestr, - "$rd, $rs1, $rs2, $funct3">; - -class FPALUSDynFrmAlias<FPALUS_rr_frm Inst, string OpcodeStr> + (Inst rty:$rd, rty:$rs1, rty:$rs2, rty:$rs3, 0b111)>; + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1 in +class FPALU_rr<bits<7> funct7, bits<3> funct3, string opcodestr, + RegisterClass rty> + : RVInstR<funct7, funct3, OPC_OP_FP, (outs rty:$rd), + (ins rty:$rs1, rty:$rs2), opcodestr, "$rd, $rs1, $rs2">; + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1, + UseNamedOperandTable = 1, hasPostISelHook = 1 in +class FPALU_rr_frm<bits<7> funct7, string opcodestr, RegisterClass rty> + : RVInstRFrm<funct7, OPC_OP_FP, (outs rty:$rd), + (ins rty:$rs1, rty:$rs2, frmarg:$frm), opcodestr, + "$rd, $rs1, $rs2, $frm">; + +class FPALUDynFrmAlias<FPALU_rr_frm Inst, string OpcodeStr, + RegisterClass rty> : InstAlias<OpcodeStr#" $rd, $rs1, $rs2", - (Inst FPR32:$rd, FPR32:$rs1, FPR32:$rs2, 0b111)>; + (Inst rty:$rd, rty:$rs1, rty:$rs2, 0b111)>; -let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -class FPUnaryOp_r<bits<7> funct7, bits<3> funct3, RegisterClass rdty, - RegisterClass rs1ty, string opcodestr> +let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1 in +class FPUnaryOp_r<bits<7> funct7, bits<5> rs2val, bits<3> funct3, + RegisterClass rdty, RegisterClass rs1ty, string opcodestr> : RVInstR<funct7, funct3, OPC_OP_FP, (outs rdty:$rd), (ins rs1ty:$rs1), - opcodestr, "$rd, $rs1">; + opcodestr, "$rd, $rs1"> { + let rs2 = rs2val; +} -let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -class FPUnaryOp_r_frm<bits<7> funct7, RegisterClass rdty, RegisterClass rs1ty, - string opcodestr> +let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1, + UseNamedOperandTable = 1, hasPostISelHook = 1 in +class FPUnaryOp_r_frm<bits<7> funct7, bits<5> rs2val, RegisterClass rdty, + RegisterClass rs1ty, string opcodestr> : RVInstRFrm<funct7, OPC_OP_FP, (outs rdty:$rd), - (ins rs1ty:$rs1, frmarg:$funct3), opcodestr, - "$rd, $rs1, $funct3">; + (ins rs1ty:$rs1, frmarg:$frm), opcodestr, + "$rd, $rs1, $frm"> { + let rs2 = rs2val; +} class FPUnaryOpDynFrmAlias<FPUnaryOp_r_frm Inst, string OpcodeStr, RegisterClass rdty, RegisterClass rs1ty> : InstAlias<OpcodeStr#" $rd, $rs1", (Inst rdty:$rd, rs1ty:$rs1, 0b111)>; -let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -class FPCmpS_rr<bits<3> funct3, string opcodestr> - : RVInstR<0b1010000, funct3, OPC_OP_FP, (outs GPR:$rd), - (ins FPR32:$rs1, FPR32:$rs2), opcodestr, "$rd, $rs1, $rs2">, - Sched<[WriteFCmp32, ReadFCmp32, ReadFCmp32]>; +let hasSideEffects = 0, mayLoad = 0, mayStore = 0, mayRaiseFPException = 1 in +class FPCmp_rr<bits<7> funct7, bits<3> funct3, string opcodestr, + RegisterClass rty> + : RVInstR<funct7, funct3, OPC_OP_FP, (outs GPR:$rd), + (ins rty:$rs1, rty:$rs2), opcodestr, "$rd, $rs1, $rs2">; //===----------------------------------------------------------------------===// // Instructions @@ -128,116 +153,98 @@ def FSW : RVInstS<0b010, OPC_STORE_FP, (outs), "fsw", "$rs2, ${imm12}(${rs1})">, Sched<[WriteFST32, ReadStoreData, ReadFMemBase]>; -def FMADD_S : FPFMAS_rrr_frm<OPC_MADD, "fmadd.s">, - Sched<[WriteFMA32, ReadFMA32, ReadFMA32, ReadFMA32]>; -def : FPFMASDynFrmAlias<FMADD_S, "fmadd.s">; -def FMSUB_S : FPFMAS_rrr_frm<OPC_MSUB, "fmsub.s">, - Sched<[WriteFMA32, ReadFMA32, ReadFMA32, ReadFMA32]>; -def : FPFMASDynFrmAlias<FMSUB_S, "fmsub.s">; -def FNMSUB_S : FPFMAS_rrr_frm<OPC_NMSUB, "fnmsub.s">, - Sched<[WriteFMA32, ReadFMA32, ReadFMA32, ReadFMA32]>; -def : FPFMASDynFrmAlias<FNMSUB_S, "fnmsub.s">; -def FNMADD_S : FPFMAS_rrr_frm<OPC_NMADD, "fnmadd.s">, - Sched<[WriteFMA32, ReadFMA32, ReadFMA32, ReadFMA32]>; -def : FPFMASDynFrmAlias<FNMADD_S, "fnmadd.s">; - -def FADD_S : FPALUS_rr_frm<0b0000000, "fadd.s">, +let SchedRW = [WriteFMA32, ReadFMA32, ReadFMA32, ReadFMA32] in { +def FMADD_S : FPFMA_rrr_frm<OPC_MADD, 0b00, "fmadd.s", FPR32>; +def FMSUB_S : FPFMA_rrr_frm<OPC_MSUB, 0b00, "fmsub.s", FPR32>; +def FNMSUB_S : FPFMA_rrr_frm<OPC_NMSUB, 0b00, "fnmsub.s", FPR32>; +def FNMADD_S : FPFMA_rrr_frm<OPC_NMADD, 0b00, "fnmadd.s", FPR32>; +} + +def : FPFMADynFrmAlias<FMADD_S, "fmadd.s", FPR32>; +def : FPFMADynFrmAlias<FMSUB_S, "fmsub.s", FPR32>; +def : FPFMADynFrmAlias<FNMSUB_S, "fnmsub.s", FPR32>; +def : FPFMADynFrmAlias<FNMADD_S, "fnmadd.s", FPR32>; + +def FADD_S : FPALU_rr_frm<0b0000000, "fadd.s", FPR32>, Sched<[WriteFALU32, ReadFALU32, ReadFALU32]>; -def : FPALUSDynFrmAlias<FADD_S, "fadd.s">; -def FSUB_S : FPALUS_rr_frm<0b0000100, "fsub.s">, +def FSUB_S : FPALU_rr_frm<0b0000100, "fsub.s", FPR32>, Sched<[WriteFALU32, ReadFALU32, ReadFALU32]>; -def : FPALUSDynFrmAlias<FSUB_S, "fsub.s">; -def FMUL_S : FPALUS_rr_frm<0b0001000, "fmul.s">, +def FMUL_S : FPALU_rr_frm<0b0001000, "fmul.s", FPR32>, Sched<[WriteFMul32, ReadFMul32, ReadFMul32]>; -def : FPALUSDynFrmAlias<FMUL_S, "fmul.s">; -def FDIV_S : FPALUS_rr_frm<0b0001100, "fdiv.s">, +def FDIV_S : FPALU_rr_frm<0b0001100, "fdiv.s", FPR32>, Sched<[WriteFDiv32, ReadFDiv32, ReadFDiv32]>; -def : FPALUSDynFrmAlias<FDIV_S, "fdiv.s">; -def FSQRT_S : FPUnaryOp_r_frm<0b0101100, FPR32, FPR32, "fsqrt.s">, - Sched<[WriteFSqrt32, ReadFSqrt32]> { - let rs2 = 0b00000; -} +def : FPALUDynFrmAlias<FADD_S, "fadd.s", FPR32>; +def : FPALUDynFrmAlias<FSUB_S, "fsub.s", FPR32>; +def : FPALUDynFrmAlias<FMUL_S, "fmul.s", FPR32>; +def : FPALUDynFrmAlias<FDIV_S, "fdiv.s", FPR32>; + +def FSQRT_S : FPUnaryOp_r_frm<0b0101100, 0b00000, FPR32, FPR32, "fsqrt.s">, + Sched<[WriteFSqrt32, ReadFSqrt32]>; def : FPUnaryOpDynFrmAlias<FSQRT_S, "fsqrt.s", FPR32, FPR32>; -def FSGNJ_S : FPALUS_rr<0b0010000, 0b000, "fsgnj.s">, - Sched<[WriteFSGNJ32, ReadFSGNJ32, ReadFSGNJ32]>; -def FSGNJN_S : FPALUS_rr<0b0010000, 0b001, "fsgnjn.s">, - Sched<[WriteFSGNJ32, ReadFSGNJ32, ReadFSGNJ32]>; -def FSGNJX_S : FPALUS_rr<0b0010000, 0b010, "fsgnjx.s">, - Sched<[WriteFSGNJ32, ReadFSGNJ32, ReadFSGNJ32]>; -def FMIN_S : FPALUS_rr<0b0010100, 0b000, "fmin.s">, - Sched<[WriteFMinMax32, ReadFMinMax32, ReadFMinMax32]>; -def FMAX_S : FPALUS_rr<0b0010100, 0b001, "fmax.s">, - Sched<[WriteFMinMax32, ReadFMinMax32, ReadFMinMax32]>; - -def FCVT_W_S : FPUnaryOp_r_frm<0b1100000, GPR, FPR32, "fcvt.w.s">, - Sched<[WriteFCvtF32ToI32, ReadFCvtF32ToI32]> { - let rs2 = 0b00000; +let SchedRW = [WriteFSGNJ32, ReadFSGNJ32, ReadFSGNJ32], + mayRaiseFPException = 0 in { +def FSGNJ_S : FPALU_rr<0b0010000, 0b000, "fsgnj.s", FPR32>; +def FSGNJN_S : FPALU_rr<0b0010000, 0b001, "fsgnjn.s", FPR32>; +def FSGNJX_S : FPALU_rr<0b0010000, 0b010, "fsgnjx.s", FPR32>; } -def : FPUnaryOpDynFrmAlias<FCVT_W_S, "fcvt.w.s", GPR, FPR32>; -def FCVT_WU_S : FPUnaryOp_r_frm<0b1100000, GPR, FPR32, "fcvt.wu.s">, - Sched<[WriteFCvtF32ToI32, ReadFCvtF32ToI32]> { - let rs2 = 0b00001; +let SchedRW = [WriteFMinMax32, ReadFMinMax32, ReadFMinMax32] in { +def FMIN_S : FPALU_rr<0b0010100, 0b000, "fmin.s", FPR32>; +def FMAX_S : FPALU_rr<0b0010100, 0b001, "fmax.s", FPR32>; } -def : FPUnaryOpDynFrmAlias<FCVT_WU_S, "fcvt.wu.s", GPR, FPR32>; -def FMV_X_W : FPUnaryOp_r<0b1110000, 0b000, GPR, FPR32, "fmv.x.w">, - Sched<[WriteFMovF32ToI32, ReadFMovF32ToI32]> { - let rs2 = 0b00000; -} +def FCVT_W_S : FPUnaryOp_r_frm<0b1100000, 0b00000, GPR, FPR32, "fcvt.w.s">, + Sched<[WriteFCvtF32ToI32, ReadFCvtF32ToI32]>; +def : FPUnaryOpDynFrmAlias<FCVT_W_S, "fcvt.w.s", GPR, FPR32>; + +def FCVT_WU_S : FPUnaryOp_r_frm<0b1100000, 0b00001, GPR, FPR32, "fcvt.wu.s">, + Sched<[WriteFCvtF32ToI32, ReadFCvtF32ToI32]>; +def : FPUnaryOpDynFrmAlias<FCVT_WU_S, "fcvt.wu.s", GPR, FPR32>; -def FEQ_S : FPCmpS_rr<0b010, "feq.s">; -def FLT_S : FPCmpS_rr<0b001, "flt.s">; -def FLE_S : FPCmpS_rr<0b000, "fle.s">; +let mayRaiseFPException = 0 in +def FMV_X_W : FPUnaryOp_r<0b1110000, 0b00000, 0b000, GPR, FPR32, "fmv.x.w">, + Sched<[WriteFMovF32ToI32, ReadFMovF32ToI32]>; -def FCLASS_S : FPUnaryOp_r<0b1110000, 0b001, GPR, FPR32, "fclass.s">, - Sched<[WriteFClass32, ReadFClass32]> { - let rs2 = 0b00000; +let SchedRW = [WriteFCmp32, ReadFCmp32, ReadFCmp32] in { +def FEQ_S : FPCmp_rr<0b1010000, 0b010, "feq.s", FPR32>; +def FLT_S : FPCmp_rr<0b1010000, 0b001, "flt.s", FPR32>; +def FLE_S : FPCmp_rr<0b1010000, 0b000, "fle.s", FPR32>; } -def FCVT_S_W : FPUnaryOp_r_frm<0b1101000, FPR32, GPR, "fcvt.s.w">, - Sched<[WriteFCvtI32ToF32, ReadFCvtI32ToF32]> { - let rs2 = 0b00000; -} +let mayRaiseFPException = 0 in +def FCLASS_S : FPUnaryOp_r<0b1110000, 0b00000, 0b001, GPR, FPR32, "fclass.s">, + Sched<[WriteFClass32, ReadFClass32]>; + +def FCVT_S_W : FPUnaryOp_r_frm<0b1101000, 0b00000, FPR32, GPR, "fcvt.s.w">, + Sched<[WriteFCvtI32ToF32, ReadFCvtI32ToF32]>; def : FPUnaryOpDynFrmAlias<FCVT_S_W, "fcvt.s.w", FPR32, GPR>; -def FCVT_S_WU : FPUnaryOp_r_frm<0b1101000, FPR32, GPR, "fcvt.s.wu">, - Sched<[WriteFCvtI32ToF32, ReadFCvtI32ToF32]> { - let rs2 = 0b00001; -} +def FCVT_S_WU : FPUnaryOp_r_frm<0b1101000, 0b00001, FPR32, GPR, "fcvt.s.wu">, + Sched<[WriteFCvtI32ToF32, ReadFCvtI32ToF32]>; def : FPUnaryOpDynFrmAlias<FCVT_S_WU, "fcvt.s.wu", FPR32, GPR>; -def FMV_W_X : FPUnaryOp_r<0b1111000, 0b000, FPR32, GPR, "fmv.w.x">, - Sched<[WriteFMovI32ToF32, ReadFMovI32ToF32]> { - let rs2 = 0b00000; -} +let mayRaiseFPException = 0 in +def FMV_W_X : FPUnaryOp_r<0b1111000, 0b00000, 0b000, FPR32, GPR, "fmv.w.x">, + Sched<[WriteFMovI32ToF32, ReadFMovI32ToF32]>; } // Predicates = [HasStdExtF] let Predicates = [HasStdExtF, IsRV64] in { -def FCVT_L_S : FPUnaryOp_r_frm<0b1100000, GPR, FPR32, "fcvt.l.s">, - Sched<[WriteFCvtF32ToI64, ReadFCvtF32ToI64]> { - let rs2 = 0b00010; -} +def FCVT_L_S : FPUnaryOp_r_frm<0b1100000, 0b00010, GPR, FPR32, "fcvt.l.s">, + Sched<[WriteFCvtF32ToI64, ReadFCvtF32ToI64]>; def : FPUnaryOpDynFrmAlias<FCVT_L_S, "fcvt.l.s", GPR, FPR32>; -def FCVT_LU_S : FPUnaryOp_r_frm<0b1100000, GPR, FPR32, "fcvt.lu.s">, - Sched<[WriteFCvtF32ToI64, ReadFCvtF32ToI64]> { - let rs2 = 0b00011; -} +def FCVT_LU_S : FPUnaryOp_r_frm<0b1100000, 0b00011, GPR, FPR32, "fcvt.lu.s">, + Sched<[WriteFCvtF32ToI64, ReadFCvtF32ToI64]>; def : FPUnaryOpDynFrmAlias<FCVT_LU_S, "fcvt.lu.s", GPR, FPR32>; -def FCVT_S_L : FPUnaryOp_r_frm<0b1101000, FPR32, GPR, "fcvt.s.l">, - Sched<[WriteFCvtI64ToF32, ReadFCvtI64ToF32]> { - let rs2 = 0b00010; -} +def FCVT_S_L : FPUnaryOp_r_frm<0b1101000, 0b00010, FPR32, GPR, "fcvt.s.l">, + Sched<[WriteFCvtI64ToF32, ReadFCvtI64ToF32]>; def : FPUnaryOpDynFrmAlias<FCVT_S_L, "fcvt.s.l", FPR32, GPR>; -def FCVT_S_LU : FPUnaryOp_r_frm<0b1101000, FPR32, GPR, "fcvt.s.lu">, - Sched<[WriteFCvtI64ToF32, ReadFCvtI64ToF32]> { - let rs2 = 0b00011; -} +def FCVT_S_LU : FPUnaryOp_r_frm<0b1101000, 0b00011, FPR32, GPR, "fcvt.s.lu">, + Sched<[WriteFCvtI64ToF32, ReadFCvtI64ToF32]>; def : FPUnaryOpDynFrmAlias<FCVT_S_LU, "fcvt.s.lu", FPR32, GPR>; } // Predicates = [HasStdExtF, IsRV64] @@ -320,12 +327,12 @@ def : Pat<(f32 (fpimm0)), (FMV_W_X X0)>; /// Float arithmetic operations -def : PatFpr32Fpr32DynFrm<fadd, FADD_S>; -def : PatFpr32Fpr32DynFrm<fsub, FSUB_S>; -def : PatFpr32Fpr32DynFrm<fmul, FMUL_S>; -def : PatFpr32Fpr32DynFrm<fdiv, FDIV_S>; +def : PatFpr32Fpr32DynFrm<any_fadd, FADD_S>; +def : PatFpr32Fpr32DynFrm<any_fsub, FSUB_S>; +def : PatFpr32Fpr32DynFrm<any_fmul, FMUL_S>; +def : PatFpr32Fpr32DynFrm<any_fdiv, FDIV_S>; -def : Pat<(fsqrt FPR32:$rs1), (FSQRT_S FPR32:$rs1, 0b111)>; +def : Pat<(any_fsqrt FPR32:$rs1), (FSQRT_S FPR32:$rs1, 0b111)>; def : Pat<(fneg FPR32:$rs1), (FSGNJN_S $rs1, $rs1)>; def : Pat<(fabs FPR32:$rs1), (FSGNJX_S $rs1, $rs1)>; @@ -334,19 +341,19 @@ def : PatFpr32Fpr32<fcopysign, FSGNJ_S>; def : Pat<(fcopysign FPR32:$rs1, (fneg FPR32:$rs2)), (FSGNJN_S $rs1, $rs2)>; // fmadd: rs1 * rs2 + rs3 -def : Pat<(fma FPR32:$rs1, FPR32:$rs2, FPR32:$rs3), +def : Pat<(any_fma FPR32:$rs1, FPR32:$rs2, FPR32:$rs3), (FMADD_S $rs1, $rs2, $rs3, 0b111)>; // fmsub: rs1 * rs2 - rs3 -def : Pat<(fma FPR32:$rs1, FPR32:$rs2, (fneg FPR32:$rs3)), +def : Pat<(any_fma FPR32:$rs1, FPR32:$rs2, (fneg FPR32:$rs3)), (FMSUB_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>; // fnmsub: -rs1 * rs2 + rs3 -def : Pat<(fma (fneg FPR32:$rs1), FPR32:$rs2, FPR32:$rs3), +def : Pat<(any_fma (fneg FPR32:$rs1), FPR32:$rs2, FPR32:$rs3), (FNMSUB_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>; // fnmadd: -rs1 * rs2 - rs3 -def : Pat<(fma (fneg FPR32:$rs1), FPR32:$rs2, (fneg FPR32:$rs3)), +def : Pat<(any_fma (fneg FPR32:$rs1), FPR32:$rs2, (fneg FPR32:$rs3)), (FNMADD_S FPR32:$rs1, FPR32:$rs2, FPR32:$rs3, 0b111)>; // The ratified 20191213 ISA spec defines fmin and fmax in a way that matches @@ -382,8 +389,8 @@ def : Pat<(bitconvert (i32 GPR:$rs1)), (FMV_W_X GPR:$rs1)>; def : Pat<(i32 (bitconvert FPR32:$rs1)), (FMV_X_W FPR32:$rs1)>; // float->[u]int. Round-to-zero must be used. -def : Pat<(i32 (fp_to_sint FPR32:$rs1)), (FCVT_W_S $rs1, 0b001)>; -def : Pat<(i32 (fp_to_uint FPR32:$rs1)), (FCVT_WU_S $rs1, 0b001)>; +def : Pat<(i32 (any_fp_to_sint FPR32:$rs1)), (FCVT_W_S $rs1, 0b001)>; +def : Pat<(i32 (any_fp_to_uint FPR32:$rs1)), (FCVT_WU_S $rs1, 0b001)>; // Saturating float->[u]int32. def : Pat<(i32 (riscv_fcvt_x_rtz FPR32:$rs1)), (FCVT_W_S $rs1, 0b001)>; @@ -396,8 +403,8 @@ def : Pat<(i32 (lrint FPR32:$rs1)), (FCVT_W_S $rs1, 0b111)>; def : Pat<(i32 (lround FPR32:$rs1)), (FCVT_W_S $rs1, 0b100)>; // [u]int->float. Match GCC and default to using dynamic rounding mode. -def : Pat<(sint_to_fp (i32 GPR:$rs1)), (FCVT_S_W $rs1, 0b111)>; -def : Pat<(uint_to_fp (i32 GPR:$rs1)), (FCVT_S_WU $rs1, 0b111)>; +def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_S_W $rs1, 0b111)>; +def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_S_WU $rs1, 0b111)>; } // Predicates = [HasStdExtF, IsRV32] let Predicates = [HasStdExtF, IsRV64] in { @@ -410,12 +417,12 @@ def : Pat<(sext_inreg (riscv_fmv_x_anyextw_rv64 FPR32:$src), i32), // Use target specific isd nodes to help us remember the result is sign // extended. Matching sext_inreg+fptoui/fptosi may cause the conversion to be // duplicated if it has another user that didn't need the sign_extend. -def : Pat<(riscv_fcvt_w_rtz_rv64 FPR32:$rs1), (FCVT_W_S $rs1, 0b001)>; -def : Pat<(riscv_fcvt_wu_rtz_rv64 FPR32:$rs1), (FCVT_WU_S $rs1, 0b001)>; +def : Pat<(riscv_any_fcvt_w_rtz_rv64 FPR32:$rs1), (FCVT_W_S $rs1, 0b001)>; +def : Pat<(riscv_any_fcvt_wu_rtz_rv64 FPR32:$rs1), (FCVT_WU_S $rs1, 0b001)>; // float->[u]int64. Round-to-zero must be used. -def : Pat<(i64 (fp_to_sint FPR32:$rs1)), (FCVT_L_S $rs1, 0b001)>; -def : Pat<(i64 (fp_to_uint FPR32:$rs1)), (FCVT_LU_S $rs1, 0b001)>; +def : Pat<(i64 (any_fp_to_sint FPR32:$rs1)), (FCVT_L_S $rs1, 0b001)>; +def : Pat<(i64 (any_fp_to_uint FPR32:$rs1)), (FCVT_LU_S $rs1, 0b001)>; // Saturating float->[u]int64. def : Pat<(i64 (riscv_fcvt_x_rtz FPR32:$rs1)), (FCVT_L_S $rs1, 0b001)>; @@ -430,8 +437,8 @@ def : Pat<(i64 (lround FPR32:$rs1)), (FCVT_L_S $rs1, 0b100)>; def : Pat<(i64 (llround FPR32:$rs1)), (FCVT_L_S $rs1, 0b100)>; // [u]int->fp. Match GCC and default to using dynamic rounding mode. -def : Pat<(sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_S_W $rs1, 0b111)>; -def : Pat<(uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_S_WU $rs1, 0b111)>; -def : Pat<(sint_to_fp (i64 GPR:$rs1)), (FCVT_S_L $rs1, 0b111)>; -def : Pat<(uint_to_fp (i64 GPR:$rs1)), (FCVT_S_LU $rs1, 0b111)>; +def : Pat<(any_sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_S_W $rs1, 0b111)>; +def : Pat<(any_uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_S_WU $rs1, 0b111)>; +def : Pat<(any_sint_to_fp (i64 GPR:$rs1)), (FCVT_S_L $rs1, 0b111)>; +def : Pat<(any_uint_to_fp (i64 GPR:$rs1)), (FCVT_S_LU $rs1, 0b111)>; } // Predicates = [HasStdExtF, IsRV64] diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td index a037dbf585ce..b62e23d3b0fa 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoM.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoM.td @@ -96,14 +96,6 @@ def : Pat<(srem (sexti32 (i64 GPR:$rs1)), (sexti32 (i64 GPR:$rs2))), (REMW GPR:$rs1, GPR:$rs2)>; } // Predicates = [HasStdExtM, IsRV64] -// Pattern to detect constants with no more than 32 active bits that can't -// be materialized with lui+addiw. -def uimm32_not_simm32 : PatLeaf<(XLenVT GPR:$a), [{ - auto *C = dyn_cast<ConstantSDNode>(N); - return C && C->hasOneUse() && isUInt<32>(C->getZExtValue()) && - !isInt<32>(C->getSExtValue()); -}]>; - let Predicates = [HasStdExtM, IsRV64, NotHasStdExtZba] in { // Special case for calculating the full 64-bit product of a 32x32 unsigned // multiply where the inputs aren't known to be zero extended. We can shift the @@ -111,9 +103,4 @@ let Predicates = [HasStdExtM, IsRV64, NotHasStdExtZba] in { // zeroing the upper 32 bits. def : Pat<(i64 (mul (and GPR:$rs1, 0xffffffff), (and GPR:$rs2, 0xffffffff))), (MULHU (SLLI GPR:$rs1, 32), (SLLI GPR:$rs2, 32))>; -// The RHS could also be a constant that is hard to materialize. By shifting -// left we can allow constant materialization to use LUI+ADDIW via -// hasAllWUsers. -def : Pat<(i64 (mul (and GPR:$rs1, 0xffffffff), uimm32_not_simm32:$rs2)), - (MULHU (SLLI GPR:$rs1, 32), (SLLI GPR:$rs2, 32))>; } // Predicates = [HasStdExtM, IsRV64, NotHasStdExtZba] diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td index 3d5f9bc54731..173ae43a08d6 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td @@ -338,29 +338,6 @@ class VALUVs2<bits<6> funct6, bits<5> vs1, RISCVVFormat opv, string opcodestr> opcodestr, "$vd, $vs2$vm">; } // hasSideEffects = 0, mayLoad = 0, mayStore = 0 -let hasSideEffects = 0, mayLoad = 1, mayStore = 1 in { -// vamo vd, (rs1), vs2, vd, vm -class VAMOWd<RISCVAMOOP amoop, RISCVWidth width, string opcodestr> - : RVInstVAMO<amoop, width.Value{2-0}, (outs VR:$vd_wd), - (ins GPR:$rs1, VR:$vs2, VR:$vd, VMaskOp:$vm), - opcodestr, "$vd_wd, (${rs1}), $vs2, $vd$vm"> { - let Constraints = "$vd_wd = $vd"; - let wd = 1; - bits<5> vd; - let Inst{11-7} = vd; -} - -// vamo x0, (rs1), vs2, vs3, vm -class VAMONoWd<RISCVAMOOP amoop, RISCVWidth width, string opcodestr> - : RVInstVAMO<amoop, width.Value{2-0}, (outs), - (ins GPR:$rs1, VR:$vs2, VR:$vs3, VMaskOp:$vm), - opcodestr, "x0, (${rs1}), $vs2, $vs3$vm"> { - bits<5> vs3; - let Inst{11-7} = vs3; -} - -} // hasSideEffects = 0, mayLoad = 1, mayStore = 1 - //===----------------------------------------------------------------------===// // Combination of instruction classes. // Use these multiclasses to define instructions more easily. @@ -779,11 +756,6 @@ multiclass VCPR_MV_Mask<string opcodestr, bits<6> funct6, string vm = "v"> { Sched<[WriteVCompressV, ReadVCompressV, ReadVCompressV]>; } -multiclass VAMO<RISCVAMOOP amoop, RISCVWidth width, string opcodestr> { - def _WD : VAMOWd<amoop, width, opcodestr>; - def _UNWD : VAMONoWd<amoop, width, opcodestr>; -} - multiclass VWholeLoadN<bits<3> nf, string opcodestr, RegisterClass VRC> { foreach l = [8, 16, 32, 64] in { defvar w = !cast<RISCVWidth>("LSWidth" # l); @@ -822,7 +794,7 @@ foreach eew = [8, 16, 32, 64] in { // Vector Strided Instructions def VLSE#eew#_V : VStridedLoad<w, "vlse"#eew#".v">, VLSSched<eew>; def VSSE#eew#_V : VStridedStore<w, "vsse"#eew#".v">, VSSSched<eew>; - + // Vector Indexed Instructions def VLUXEI#eew#_V : VIndexedLoad<MOPLDIndexedUnord, w, "vluxei"#eew#".v">, VLXSched<eew, "U">; @@ -1416,13 +1388,20 @@ defm VCOMPRESS_V : VCPR_MV_Mask<"vcompress", 0b010111>; let hasSideEffects = 0, mayLoad = 0, mayStore = 0, RVVConstraint = NoConstraint in { -foreach n = [1, 2, 4, 8] in { - def VMV#n#R_V : RVInstV<0b100111, !add(n, -1), OPIVI, (outs VR:$vd), - (ins VR:$vs2), "vmv" # n # "r.v", "$vd, $vs2">, - VMVRSched<n> { +def VMV1R_V : RVInstV<0b100111, 0, OPIVI, (outs VR:$vd), (ins VR:$vs2), + "vmv1r.v", "$vd, $vs2">, VMVRSched<1> { let Uses = []; let vm = 1; } +// A future extension may relax the vector register alignment restrictions. +foreach n = [2, 4, 8] in { + defvar vrc = !cast<VReg>("VRM"#n); + def VMV#n#R_V : RVInstV<0b100111, !add(n, -1), OPIVI, (outs vrc:$vd), + (ins vrc:$vs2), "vmv" # n # "r.v", "$vd, $vs2">, + VMVRSched<n> { + let Uses = []; + let vm = 1; + } } } // hasSideEffects = 0, mayLoad = 0, mayStore = 0 } // Predicates = [HasStdExtV] @@ -1462,31 +1441,4 @@ let Predicates = [HasStdExtZvlsseg] in { } } // Predicates = [HasStdExtZvlsseg] -let Predicates = [HasStdExtZvamo, HasStdExtA] in { - foreach eew = [8, 16, 32] in { - defvar w = !cast<RISCVWidth>("LSWidth"#eew); - defm VAMOSWAPEI#eew : VAMO<AMOOPVamoSwap, w, "vamoswapei"#eew#".v">; - defm VAMOADDEI#eew : VAMO<AMOOPVamoAdd, w, "vamoaddei"#eew#".v">; - defm VAMOXOREI#eew : VAMO<AMOOPVamoXor, w, "vamoxorei"#eew#".v">; - defm VAMOANDEI#eew : VAMO<AMOOPVamoAnd, w, "vamoandei"#eew#".v">; - defm VAMOOREI#eew : VAMO<AMOOPVamoOr, w, "vamoorei"#eew#".v">; - defm VAMOMINEI#eew : VAMO<AMOOPVamoMin, w, "vamominei"#eew#".v">; - defm VAMOMAXEI#eew : VAMO<AMOOPVamoMax, w, "vamomaxei"#eew#".v">; - defm VAMOMINUEI#eew : VAMO<AMOOPVamoMinu, w, "vamominuei"#eew#".v">; - defm VAMOMAXUEI#eew : VAMO<AMOOPVamoMaxu, w, "vamomaxuei"#eew#".v">; - } -} // Predicates = [HasStdExtZvamo, HasStdExtA] - -let Predicates = [HasStdExtZvamo, HasStdExtA, IsRV64] in { - defm VAMOSWAPEI64 : VAMO<AMOOPVamoSwap, LSWidth64, "vamoswapei64.v">; - defm VAMOADDEI64 : VAMO<AMOOPVamoAdd, LSWidth64, "vamoaddei64.v">; - defm VAMOXOREI64 : VAMO<AMOOPVamoXor, LSWidth64, "vamoxorei64.v">; - defm VAMOANDEI64 : VAMO<AMOOPVamoAnd, LSWidth64, "vamoandei64.v">; - defm VAMOOREI64 : VAMO<AMOOPVamoOr, LSWidth64, "vamoorei64.v">; - defm VAMOMINEI64 : VAMO<AMOOPVamoMin, LSWidth64, "vamominei64.v">; - defm VAMOMAXEI64 : VAMO<AMOOPVamoMax, LSWidth64, "vamomaxei64.v">; - defm VAMOMINUEI64 : VAMO<AMOOPVamoMinu, LSWidth64, "vamominuei64.v">; - defm VAMOMAXUEI64 : VAMO<AMOOPVamoMaxu, LSWidth64, "vamomaxuei64.v">; -} // Predicates = [HasStdExtZvamo, HasStdExtA, IsRV64] - include "RISCVInstrInfoVPseudos.td" diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index a82e333e6bab..073fa605e0fb 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -1124,68 +1124,6 @@ class VPseudoTernaryNoMaskWithPolicy<VReg RetClass, let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); } -class VPseudoAMOWDNoMask<VReg RetClass, - VReg Op1Class> : - Pseudo<(outs GetVRegNoV0<RetClass>.R:$vd_wd), - (ins GPR:$rs1, - Op1Class:$vs2, - GetVRegNoV0<RetClass>.R:$vd, - AVL:$vl, ixlenimm:$sew), []>, - RISCVVPseudo { - let mayLoad = 1; - let mayStore = 1; - let hasSideEffects = 1; - let Constraints = "$vd_wd = $vd"; - let HasVLOp = 1; - let HasSEWOp = 1; - let HasDummyMask = 1; - let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); -} - -class VPseudoAMOWDMask<VReg RetClass, - VReg Op1Class> : - Pseudo<(outs GetVRegNoV0<RetClass>.R:$vd_wd), - (ins GPR:$rs1, - Op1Class:$vs2, - GetVRegNoV0<RetClass>.R:$vd, - VMaskOp:$vm, AVL:$vl, ixlenimm:$sew), []>, - RISCVVPseudo { - let mayLoad = 1; - let mayStore = 1; - let hasSideEffects = 1; - let Constraints = "$vd_wd = $vd"; - let HasVLOp = 1; - let HasSEWOp = 1; - let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); -} - -multiclass VPseudoAMOEI<int eew> { - // Standard scalar AMO supports 32, 64, and 128 Mem data bits, - // and in the base vector "V" extension, only SEW up to ELEN = max(XLEN, FLEN) - // are required to be supported. - // therefore only [32, 64] is allowed here. - foreach sew = [32, 64] in { - foreach lmul = MxSet<sew>.m in { - defvar octuple_lmul = lmul.octuple; - // Calculate emul = eew * lmul / sew - defvar octuple_emul = !srl(!mul(eew, octuple_lmul), log2<sew>.val); - if !and(!ge(octuple_emul, 1), !le(octuple_emul, 64)) then { - defvar emulMX = octuple_to_str<octuple_emul>.ret; - defvar emul= !cast<LMULInfo>("V_" # emulMX); - let VLMul = lmul.value in { - def "_WD_" # lmul.MX # "_" # emulMX : VPseudoAMOWDNoMask<lmul.vrclass, emul.vrclass>; - def "_WD_" # lmul.MX # "_" # emulMX # "_MASK" : VPseudoAMOWDMask<lmul.vrclass, emul.vrclass>; - } - } - } - } -} - -multiclass VPseudoAMO { - foreach eew = EEWList in - defm "EI" # eew : VPseudoAMOEI<eew>; -} - class VPseudoUSSegLoadNoMask<VReg RetClass, int EEW, bits<4> NF, bit isFF>: Pseudo<(outs RetClass:$rd), (ins GPR:$rs1, AVL:$vl, ixlenimm:$sew),[]>, @@ -1376,17 +1314,35 @@ class VPseudoISegStoreMask<VReg ValClass, VReg IdxClass, int EEW, bits<3> LMUL, let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst); } -multiclass VPseudoUSLoad<bit isFF> { +multiclass VPseudoUSLoad { foreach eew = EEWList in { foreach lmul = MxSet<eew>.m in { defvar LInfo = lmul.MX; defvar vreg = lmul.vrclass; - defvar FFStr = !if(isFF, "FF", ""); let VLMul = lmul.value in { - def "E" # eew # FFStr # "_V_" # LInfo : - VPseudoUSLoadNoMask<vreg, eew, isFF>; - def "E" # eew # FFStr # "_V_" # LInfo # "_MASK" : - VPseudoUSLoadMask<vreg, eew, isFF>; + def "E" # eew # "_V_" # LInfo : + VPseudoUSLoadNoMask<vreg, eew, false>, + VLESched<eew>; + def "E" # eew # "_V_" # LInfo # "_MASK" : + VPseudoUSLoadMask<vreg, eew, false>, + VLESched<eew>; + } + } + } +} + +multiclass VPseudoFFLoad { + foreach eew = EEWList in { + foreach lmul = MxSet<eew>.m in { + defvar LInfo = lmul.MX; + defvar vreg = lmul.vrclass; + let VLMul = lmul.value in { + def "E" # eew # "FF_V_" # LInfo : + VPseudoUSLoadNoMask<vreg, eew, true>, + VLFSched<eew>; + def "E" # eew # "FF_V_" # LInfo # "_MASK" : + VPseudoUSLoadMask<vreg, eew, true>, + VLFSched<eew>; } } } @@ -1406,8 +1362,10 @@ multiclass VPseudoSLoad { defvar LInfo = lmul.MX; defvar vreg = lmul.vrclass; let VLMul = lmul.value in { - def "E" # eew # "_V_" # LInfo : VPseudoSLoadNoMask<vreg, eew>; - def "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoSLoadMask<vreg, eew>; + def "E" # eew # "_V_" # LInfo : VPseudoSLoadNoMask<vreg, eew>, + VLSSched<eew>; + def "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoSLoadMask<vreg, eew>, + VLSSched<eew>; } } } @@ -1427,11 +1385,14 @@ multiclass VPseudoILoad<bit Ordered> { defvar Vreg = lmul.vrclass; defvar IdxVreg = idx_lmul.vrclass; defvar HasConstraint = !ne(sew, eew); + defvar Order = !if(Ordered, "O", "U"); let VLMul = lmul.value in { def "EI" # eew # "_V_" # IdxLInfo # "_" # LInfo : - VPseudoILoadNoMask<Vreg, IdxVreg, eew, idx_lmul.value, Ordered, HasConstraint>; + VPseudoILoadNoMask<Vreg, IdxVreg, eew, idx_lmul.value, Ordered, HasConstraint>, + VLXSched<eew, Order>; def "EI" # eew # "_V_" # IdxLInfo # "_" # LInfo # "_MASK" : - VPseudoILoadMask<Vreg, IdxVreg, eew, idx_lmul.value, Ordered, HasConstraint>; + VPseudoILoadMask<Vreg, IdxVreg, eew, idx_lmul.value, Ordered, HasConstraint>, + VLXSched<eew, Order>; } } } @@ -1445,8 +1406,10 @@ multiclass VPseudoUSStore { defvar LInfo = lmul.MX; defvar vreg = lmul.vrclass; let VLMul = lmul.value in { - def "E" # eew # "_V_" # LInfo : VPseudoUSStoreNoMask<vreg, eew>; - def "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoUSStoreMask<vreg, eew>; + def "E" # eew # "_V_" # LInfo : VPseudoUSStoreNoMask<vreg, eew>, + VSESched<eew>; + def "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoUSStoreMask<vreg, eew>, + VSESched<eew>; } } } @@ -1466,8 +1429,10 @@ multiclass VPseudoSStore { defvar LInfo = lmul.MX; defvar vreg = lmul.vrclass; let VLMul = lmul.value in { - def "E" # eew # "_V_" # LInfo : VPseudoSStoreNoMask<vreg, eew>; - def "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoSStoreMask<vreg, eew>; + def "E" # eew # "_V_" # LInfo : VPseudoSStoreNoMask<vreg, eew>, + VSSSched<eew>; + def "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoSStoreMask<vreg, eew>, + VSSSched<eew>; } } } @@ -1486,11 +1451,14 @@ multiclass VPseudoIStore<bit Ordered> { defvar idx_lmul = !cast<LMULInfo>("V_" # IdxLInfo); defvar Vreg = lmul.vrclass; defvar IdxVreg = idx_lmul.vrclass; + defvar Order = !if(Ordered, "O", "U"); let VLMul = lmul.value in { def "EI" # eew # "_V_" # IdxLInfo # "_" # LInfo : - VPseudoIStoreNoMask<Vreg, IdxVreg, eew, idx_lmul.value, Ordered>; + VPseudoIStoreNoMask<Vreg, IdxVreg, eew, idx_lmul.value, Ordered>, + VSXSched<eew, Order>; def "EI" # eew # "_V_" # IdxLInfo # "_" # LInfo # "_MASK" : - VPseudoIStoreMask<Vreg, IdxVreg, eew, idx_lmul.value, Ordered>; + VPseudoIStoreMask<Vreg, IdxVreg, eew, idx_lmul.value, Ordered>, + VSXSched<eew, Order>; } } } @@ -1498,32 +1466,50 @@ multiclass VPseudoIStore<bit Ordered> { } } -multiclass VPseudoUnaryS_M { +multiclass VPseudoVPOP_M { foreach mti = AllMasks in { let VLMul = mti.LMul.value in { - def "_M_" # mti.BX : VPseudoUnaryNoMask<GPR, VR>; - def "_M_" # mti.BX # "_MASK" : VPseudoMaskUnarySOutMask; + def "_M_" # mti.BX : VPseudoUnaryNoMask<GPR, VR>, + Sched<[WriteVMPopV, ReadVMPopV, ReadVMPopV]>; + def "_M_" # mti.BX # "_MASK" : VPseudoMaskUnarySOutMask, + Sched<[WriteVMPopV, ReadVMPopV, ReadVMPopV]>; } } } -multiclass VPseudoUnaryM_M { +multiclass VPseudoV1ST_M { + foreach mti = AllMasks in + { + let VLMul = mti.LMul.value in { + def "_M_" # mti.BX : VPseudoUnaryNoMask<GPR, VR>, + Sched<[WriteVMFFSV, ReadVMFFSV, ReadVMFFSV]>; + def "_M_" # mti.BX # "_MASK" : VPseudoMaskUnarySOutMask, + Sched<[WriteVMFFSV, ReadVMFFSV, ReadVMFFSV]>; + } + } +} + +multiclass VPseudoVSFS_M { defvar constraint = "@earlyclobber $rd"; foreach mti = AllMasks in { let VLMul = mti.LMul.value in { - def "_M_" # mti.BX : VPseudoUnaryNoMask<VR, VR, constraint>; - def "_M_" # mti.BX # "_MASK" : VPseudoUnaryMask<VR, VR, constraint>; + def "_M_" # mti.BX : VPseudoUnaryNoMask<VR, VR, constraint>, + Sched<[WriteVMSFSV, ReadVMSFSV, ReadVMask]>; + def "_M_" # mti.BX # "_MASK" : VPseudoUnaryMask<VR, VR, constraint>, + Sched<[WriteVMSFSV, ReadVMSFSV, ReadVMask]>; } } } -multiclass VPseudoMaskNullaryV { +multiclass VPseudoVID_V { foreach m = MxList.m in { let VLMul = m.value in { - def "_V_" # m.MX : VPseudoNullaryNoMask<m.vrclass>; - def "_V_" # m.MX # "_MASK" : VPseudoNullaryMask<m.vrclass>; + def "_V_" # m.MX : VPseudoNullaryNoMask<m.vrclass>, + Sched<[WriteVMIdxV, ReadVMask]>; + def "_V_" # m.MX # "_MASK" : VPseudoNullaryMask<m.vrclass>, + Sched<[WriteVMIdxV, ReadVMask]>; } } } @@ -1536,20 +1522,23 @@ multiclass VPseudoNullaryPseudoM <string BaseInst> { } } -multiclass VPseudoUnaryV_M { +multiclass VPseudoVIOT_M { defvar constraint = "@earlyclobber $rd"; foreach m = MxList.m in { let VLMul = m.value in { - def "_" # m.MX : VPseudoUnaryNoMask<m.vrclass, VR, constraint>; - def "_" # m.MX # "_MASK" : VPseudoUnaryMask<m.vrclass, VR, constraint>; + def "_" # m.MX : VPseudoUnaryNoMask<m.vrclass, VR, constraint>, + Sched<[WriteVMIotV, ReadVMIotV, ReadVMask]>; + def "_" # m.MX # "_MASK" : VPseudoUnaryMask<m.vrclass, VR, constraint>, + Sched<[WriteVMIotV, ReadVMIotV, ReadVMask]>; } } } -multiclass VPseudoUnaryV_V_AnyMask { +multiclass VPseudoVCPR_V { foreach m = MxList.m in { let VLMul = m.value in - def _VM # "_" # m.MX : VPseudoUnaryAnyMask<m.vrclass, m.vrclass>; + def _VM # "_" # m.MX : VPseudoUnaryAnyMask<m.vrclass, m.vrclass>, + Sched<[WriteVCompressV, ReadVCompressV, ReadVCompressV]>; } } @@ -1611,7 +1600,7 @@ multiclass VPseudoBinaryV_VV<string Constraint = ""> { defm _VV : VPseudoBinary<m.vrclass, m.vrclass, m.vrclass, m, Constraint>; } -multiclass VPseudoBinaryV_VV_EEW<int eew, string Constraint = ""> { +multiclass VPseudoVGTR_VV_EEW<int eew, string Constraint = ""> { foreach m = MxList.m in { foreach sew = EEWList in { defvar octuple_lmul = m.octuple; @@ -1620,7 +1609,8 @@ multiclass VPseudoBinaryV_VV_EEW<int eew, string Constraint = ""> { if !and(!ge(octuple_emul, 1), !le(octuple_emul, 64)) then { defvar emulMX = octuple_to_str<octuple_emul>.ret; defvar emul = !cast<LMULInfo>("V_" # emulMX); - defm _VV : VPseudoBinaryEmul<m.vrclass, m.vrclass, emul.vrclass, m, emul, Constraint>; + defm _VV : VPseudoBinaryEmul<m.vrclass, m.vrclass, emul.vrclass, m, emul, Constraint>, + Sched<[WriteVGatherV, ReadVGatherV, ReadVGatherV]>; } } } @@ -1631,6 +1621,12 @@ multiclass VPseudoBinaryV_VX<string Constraint = ""> { defm "_VX" : VPseudoBinary<m.vrclass, m.vrclass, GPR, m, Constraint>; } +multiclass VPseudoVSLD1_VX<string Constraint = ""> { + foreach m = MxList.m in + defm "_VX" : VPseudoBinary<m.vrclass, m.vrclass, GPR, m, Constraint>, + Sched<[WriteVISlide1X, ReadVISlideV, ReadVISlideX, ReadVMask]>; +} + multiclass VPseudoBinaryV_VF<string Constraint = ""> { foreach m = MxList.m in foreach f = FPList.fpinfo in @@ -1638,15 +1634,24 @@ multiclass VPseudoBinaryV_VF<string Constraint = ""> { f.fprclass, m, Constraint>; } +multiclass VPseudoVSLD1_VF<string Constraint = ""> { + foreach m = MxList.m in + foreach f = FPList.fpinfo in + defm "_V" # f.FX : + VPseudoBinary<m.vrclass, m.vrclass, f.fprclass, m, Constraint>, + Sched<[WriteVFSlide1F, ReadVFSlideV, ReadVFSlideF, ReadVMask]>; +} + multiclass VPseudoBinaryV_VI<Operand ImmType = simm5, string Constraint = ""> { foreach m = MxList.m in defm _VI : VPseudoBinary<m.vrclass, m.vrclass, ImmType, m, Constraint>; } -multiclass VPseudoBinaryM_MM { +multiclass VPseudoVALU_MM { foreach m = MxList.m in let VLMul = m.value in { - def "_MM_" # m.MX : VPseudoBinaryNoMask<VR, VR, VR, "">; + def "_MM_" # m.MX : VPseudoBinaryNoMask<VR, VR, VR, "">, + Sched<[WriteVMALUV, ReadVMALUV, ReadVMALUV]>; } } @@ -1744,12 +1749,13 @@ multiclass VPseudoBinaryV_XM<bit CarryOut = 0, bit CarryIn = 1, m.vrclass, GPR, m, CarryIn, Constraint>; } -multiclass VPseudoBinaryV_FM { +multiclass VPseudoVMRG_FM { foreach m = MxList.m in foreach f = FPList.fpinfo in def "_V" # f.FX # "M_" # m.MX : VPseudoBinaryCarryIn<GetVRegNoV0<m.vrclass>.R, - m.vrclass, f.fprclass, m, /*CarryIn=*/1, "">; + m.vrclass, f.fprclass, m, /*CarryIn=*/1, "">, + Sched<[WriteVFMergeV, ReadVFMergeV, ReadVFMergeF, ReadVMask]>; } multiclass VPseudoBinaryV_IM<bit CarryOut = 0, bit CarryIn = 1, @@ -1762,76 +1768,102 @@ multiclass VPseudoBinaryV_IM<bit CarryOut = 0, bit CarryIn = 1, m.vrclass, simm5, m, CarryIn, Constraint>; } -multiclass VPseudoUnaryV_V_X_I_NoDummyMask { +multiclass VPseudoUnaryVMV_V_X_I { foreach m = MxList.m in { let VLMul = m.value in { - def "_V_" # m.MX : VPseudoUnaryNoDummyMask<m.vrclass, m.vrclass>; - def "_X_" # m.MX : VPseudoUnaryNoDummyMask<m.vrclass, GPR>; - def "_I_" # m.MX : VPseudoUnaryNoDummyMask<m.vrclass, simm5>; + def "_V_" # m.MX : VPseudoUnaryNoDummyMask<m.vrclass, m.vrclass>, + Sched<[WriteVIMovV, ReadVIMovV]>; + def "_X_" # m.MX : VPseudoUnaryNoDummyMask<m.vrclass, GPR>, + Sched<[WriteVIMovX, ReadVIMovX]>; + def "_I_" # m.MX : VPseudoUnaryNoDummyMask<m.vrclass, simm5>, + Sched<[WriteVIMovI]>; } } } -multiclass VPseudoUnaryV_F_NoDummyMask { +multiclass VPseudoVMV_F { foreach m = MxList.m in { foreach f = FPList.fpinfo in { let VLMul = m.value in { - def "_" # f.FX # "_" # m.MX : VPseudoUnaryNoDummyMask<m.vrclass, f.fprclass>; + def "_" # f.FX # "_" # m.MX : + VPseudoUnaryNoDummyMask<m.vrclass, f.fprclass>, + Sched<[WriteVFMovV, ReadVFMovF]>; } } } } -multiclass VPseudoUnaryTAV_V { +multiclass VPseudoVCLS_V { foreach m = MxList.m in { let VLMul = m.value in { - def "_V_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.vrclass>; - def "_V_" # m.MX # "_MASK" : VPseudoUnaryMaskTA<m.vrclass, m.vrclass>; + def "_V_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.vrclass>, + Sched<[WriteVFClassV, ReadVFClassV, ReadVMask]>; + def "_V_" # m.MX # "_MASK" : VPseudoUnaryMask<m.vrclass, m.vrclass>, + Sched<[WriteVFClassV, ReadVFClassV, ReadVMask]>; } } } -multiclass VPseudoUnaryV_V { +multiclass VPseudoVSQR_V { foreach m = MxList.m in { let VLMul = m.value in { - def "_V_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.vrclass>; - def "_V_" # m.MX # "_MASK" : VPseudoUnaryMask<m.vrclass, m.vrclass>; + def "_V_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.vrclass>, + Sched<[WriteVFSqrtV, ReadVFSqrtV, ReadVMask]>; + def "_V_" # m.MX # "_MASK" : VPseudoUnaryMaskTA<m.vrclass, m.vrclass>, + Sched<[WriteVFSqrtV, ReadVFSqrtV, ReadVMask]>; } } } -multiclass PseudoUnaryV_VF2 { +multiclass VPseudoVRCP_V { + foreach m = MxList.m in { + let VLMul = m.value in { + def "_V_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.vrclass>, + Sched<[WriteVFRecpV, ReadVFRecpV, ReadVMask]>; + def "_V_" # m.MX # "_MASK" : VPseudoUnaryMaskTA<m.vrclass, m.vrclass>, + Sched<[WriteVFRecpV, ReadVFRecpV, ReadVMask]>; + } + } +} + +multiclass PseudoVEXT_VF2 { defvar constraints = "@earlyclobber $rd"; foreach m = MxListVF2.m in { let VLMul = m.value in { - def "_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.f2vrclass, constraints>; - def "_" # m.MX # "_MASK" : VPseudoUnaryMaskTA<m.vrclass, m.f2vrclass, - constraints>; + def "_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.f2vrclass, constraints>, + Sched<[WriteVExtV, ReadVExtV, ReadVMask]>; + def "_" # m.MX # "_MASK" : + VPseudoUnaryMaskTA<m.vrclass, m.f2vrclass, constraints>, + Sched<[WriteVExtV, ReadVExtV, ReadVMask]>; } } } -multiclass PseudoUnaryV_VF4 { +multiclass PseudoVEXT_VF4 { defvar constraints = "@earlyclobber $rd"; foreach m = MxListVF4.m in { let VLMul = m.value in { - def "_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.f4vrclass, constraints>; - def "_" # m.MX # "_MASK" : VPseudoUnaryMaskTA<m.vrclass, m.f4vrclass, - constraints>; + def "_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.f4vrclass, constraints>, + Sched<[WriteVExtV, ReadVExtV, ReadVMask]>; + def "_" # m.MX # "_MASK" : + VPseudoUnaryMaskTA<m.vrclass, m.f4vrclass, constraints>, + Sched<[WriteVExtV, ReadVExtV, ReadVMask]>; } } } -multiclass PseudoUnaryV_VF8 { +multiclass PseudoVEXT_VF8 { defvar constraints = "@earlyclobber $rd"; foreach m = MxListVF8.m in { let VLMul = m.value in { - def "_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.f8vrclass, constraints>; - def "_" # m.MX # "_MASK" : VPseudoUnaryMaskTA<m.vrclass, m.f8vrclass, - constraints>; + def "_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.f8vrclass, constraints>, + Sched<[WriteVExtV, ReadVExtV, ReadVMask]>; + def "_" # m.MX # "_MASK" : + VPseudoUnaryMaskTA<m.vrclass, m.f8vrclass, constraints>, + Sched<[WriteVExtV, ReadVExtV, ReadVMask]>; } } } @@ -1874,30 +1906,172 @@ multiclass VPseudoBinaryM_VI { !if(!ge(m.octuple, 16), "@earlyclobber $rd", "")>; } -multiclass VPseudoBinaryV_VV_VX_VI<Operand ImmType = simm5, string Constraint = ""> { - defm "" : VPseudoBinaryV_VV<Constraint>; - defm "" : VPseudoBinaryV_VX<Constraint>; - defm "" : VPseudoBinaryV_VI<ImmType, Constraint>; +multiclass VPseudoVGTR_VV_VX_VI<Operand ImmType = simm5, string Constraint = ""> { + defm "" : VPseudoBinaryV_VV<Constraint>, + Sched<[WriteVGatherV, ReadVGatherV, ReadVGatherV, ReadVMask]>; + defm "" : VPseudoBinaryV_VX<Constraint>, + Sched<[WriteVGatherX, ReadVGatherV, ReadVGatherX, ReadVMask]>; + defm "" : VPseudoBinaryV_VI<ImmType, Constraint>, + Sched<[WriteVGatherI, ReadVGatherV, ReadVMask]>; } -multiclass VPseudoBinaryV_VV_VX { - defm "" : VPseudoBinaryV_VV; - defm "" : VPseudoBinaryV_VX; +multiclass VPseudoVSALU_VV_VX_VI<Operand ImmType = simm5, string Constraint = ""> { + defm "" : VPseudoBinaryV_VV<Constraint>, + Sched<[WriteVSALUV, ReadVSALUV, ReadVSALUV, ReadVMask]>; + defm "" : VPseudoBinaryV_VX<Constraint>, + Sched<[WriteVSALUX, ReadVSALUV, ReadVSALUX, ReadVMask]>; + defm "" : VPseudoBinaryV_VI<ImmType, Constraint>, + Sched<[WriteVSALUI, ReadVSALUV, ReadVMask]>; } -multiclass VPseudoBinaryV_VV_VF { - defm "" : VPseudoBinaryV_VV; - defm "" : VPseudoBinaryV_VF; + +multiclass VPseudoVSHT_VV_VX_VI<Operand ImmType = simm5, string Constraint = ""> { + defm "" : VPseudoBinaryV_VV<Constraint>, + Sched<[WriteVShiftV, ReadVShiftV, ReadVShiftV, ReadVMask]>; + defm "" : VPseudoBinaryV_VX<Constraint>, + Sched<[WriteVShiftX, ReadVShiftV, ReadVShiftX, ReadVMask]>; + defm "" : VPseudoBinaryV_VI<ImmType, Constraint>, + Sched<[WriteVShiftI, ReadVShiftV, ReadVMask]>; } -multiclass VPseudoBinaryV_VX_VI<Operand ImmType = simm5> { - defm "" : VPseudoBinaryV_VX; - defm "" : VPseudoBinaryV_VI<ImmType>; +multiclass VPseudoVSSHT_VV_VX_VI<Operand ImmType = simm5, string Constraint = ""> { + defm "" : VPseudoBinaryV_VV<Constraint>, + Sched<[WriteVSShiftV, ReadVSShiftV, ReadVSShiftV, ReadVMask]>; + defm "" : VPseudoBinaryV_VX<Constraint>, + Sched<[WriteVSShiftX, ReadVSShiftV, ReadVSShiftX, ReadVMask]>; + defm "" : VPseudoBinaryV_VI<ImmType, Constraint>, + Sched<[WriteVSShiftI, ReadVSShiftV, ReadVMask]>; } -multiclass VPseudoBinaryW_VV_VX { - defm "" : VPseudoBinaryW_VV; - defm "" : VPseudoBinaryW_VX; +multiclass VPseudoVALU_VV_VX_VI<Operand ImmType = simm5, string Constraint = ""> { + defm "" : VPseudoBinaryV_VV<Constraint>, + Sched<[WriteVIALUV, ReadVIALUV, ReadVIALUV, ReadVMask]>; + defm "" : VPseudoBinaryV_VX<Constraint>, + Sched<[WriteVIALUX, ReadVIALUV, ReadVIALUX, ReadVMask]>; + defm "" : VPseudoBinaryV_VI<ImmType, Constraint>, + Sched<[WriteVIALUI, ReadVIALUV, ReadVMask]>; +} + +multiclass VPseudoVSALU_VV_VX { + defm "" : VPseudoBinaryV_VV, + Sched<[WriteVSALUV, ReadVSALUV, ReadVSALUV, ReadVMask]>; + defm "" : VPseudoBinaryV_VX, + Sched<[WriteVSALUX, ReadVSALUV, ReadVSALUX, ReadVMask]>; +} + +multiclass VPseudoVSMUL_VV_VX { + defm "" : VPseudoBinaryV_VV, + Sched<[WriteVSMulV, ReadVSMulV, ReadVSMulV, ReadVMask]>; + defm "" : VPseudoBinaryV_VX, + Sched<[WriteVSMulX, ReadVSMulV, ReadVSMulX, ReadVMask]>; +} + +multiclass VPseudoVAALU_VV_VX { + defm "" : VPseudoBinaryV_VV, + Sched<[WriteVAALUV, ReadVAALUV, ReadVAALUV, ReadVMask]>; + defm "" : VPseudoBinaryV_VX, + Sched<[WriteVAALUX, ReadVAALUV, ReadVAALUX, ReadVMask]>; +} + +multiclass VPseudoVMINMAX_VV_VX { + defm "" : VPseudoBinaryV_VV, + Sched<[WriteVICmpV, ReadVICmpV, ReadVICmpV, ReadVMask]>; + defm "" : VPseudoBinaryV_VX, + Sched<[WriteVICmpX, ReadVICmpV, ReadVICmpX, ReadVMask]>; +} + +multiclass VPseudoVMUL_VV_VX { + defm "" : VPseudoBinaryV_VV, + Sched<[WriteVIMulV, ReadVIMulV, ReadVIMulV, ReadVMask]>; + defm "" : VPseudoBinaryV_VX, + Sched<[WriteVIMulX, ReadVIMulV, ReadVIMulX, ReadVMask]>; +} + +multiclass VPseudoVDIV_VV_VX { + defm "" : VPseudoBinaryV_VV, + Sched<[WriteVIDivV, ReadVIDivV, ReadVIDivV, ReadVMask]>; + defm "" : VPseudoBinaryV_VX, + Sched<[WriteVIDivX, ReadVIDivV, ReadVIDivX, ReadVMask]>; +} + +multiclass VPseudoVFMUL_VV_VF { + defm "" : VPseudoBinaryV_VV, + Sched<[WriteVFMulV, ReadVFMulV, ReadVFMulV, ReadVMask]>; + defm "" : VPseudoBinaryV_VF, + Sched<[WriteVFMulF, ReadVFMulV, ReadVFMulF, ReadVMask]>; +} + +multiclass VPseudoVFDIV_VV_VF { + defm "" : VPseudoBinaryV_VV, + Sched<[WriteVFDivV, ReadVFDivV, ReadVFDivV, ReadVMask]>; + defm "" : VPseudoBinaryV_VF, + Sched<[WriteVFDivF, ReadVFDivV, ReadVFDivF, ReadVMask]>; +} + +multiclass VPseudoVFRDIV_VF { + defm "" : VPseudoBinaryV_VF, + Sched<[WriteVFDivF, ReadVFDivV, ReadVFDivF, ReadVMask]>; +} + +multiclass VPseudoVALU_VV_VX { + defm "" : VPseudoBinaryV_VV, + Sched<[WriteVIALUV, ReadVIALUV, ReadVIALUV, ReadVMask]>; + defm "" : VPseudoBinaryV_VX, + Sched<[WriteVIALUX, ReadVIALUV, ReadVIALUX, ReadVMask]>; +} + +multiclass VPseudoVSGNJ_VV_VF { + defm "" : VPseudoBinaryV_VV, + Sched<[WriteVFSgnjV, ReadVFSgnjV, ReadVFSgnjV, ReadVMask]>; + defm "" : VPseudoBinaryV_VF, + Sched<[WriteVFSgnjF, ReadVFSgnjV, ReadVFSgnjF, ReadVMask]>; +} + +multiclass VPseudoVMAX_VV_VF { + defm "" : VPseudoBinaryV_VV, + Sched<[WriteVFCmpV, ReadVFCmpV, ReadVFCmpV, ReadVMask]>; + defm "" : VPseudoBinaryV_VF, + Sched<[WriteVFCmpF, ReadVFCmpV, ReadVFCmpF, ReadVMask]>; +} + +multiclass VPseudoVALU_VV_VF { + defm "" : VPseudoBinaryV_VV, + Sched<[WriteVFALUV, ReadVFALUV, ReadVFALUV, ReadVMask]>; + defm "" : VPseudoBinaryV_VF, + Sched<[WriteVFALUF, ReadVFALUV, ReadVFALUF, ReadVMask]>; +} + +multiclass VPseudoVALU_VF { + defm "" : VPseudoBinaryV_VF, + Sched<[WriteVFALUF, ReadVFALUV, ReadVFALUF, ReadVMask]>; +} + +multiclass VPseudoVALU_VX_VI<Operand ImmType = simm5> { + defm "" : VPseudoBinaryV_VX, + Sched<[WriteVIALUX, ReadVIALUV, ReadVIALUX, ReadVMask]>; + defm "" : VPseudoBinaryV_VI<ImmType>, + Sched<[WriteVIALUI, ReadVIALUV, ReadVMask]>; +} + +multiclass VPseudoVWALU_VV_VX { + defm "" : VPseudoBinaryW_VV, + Sched<[WriteVIWALUV, ReadVIWALUV, ReadVIWALUV, ReadVMask]>; + defm "" : VPseudoBinaryW_VX, + Sched<[WriteVIWALUX, ReadVIWALUV, ReadVIWALUX, ReadVMask]>; +} + +multiclass VPseudoVWMUL_VV_VX { + defm "" : VPseudoBinaryW_VV, + Sched<[WriteVIWMulV, ReadVIWMulV, ReadVIWMulV, ReadVMask]>; + defm "" : VPseudoBinaryW_VX, + Sched<[WriteVIWMulX, ReadVIWMulV, ReadVIWMulX, ReadVMask]>; +} + +multiclass VPseudoVWMUL_VV_VF { + defm "" : VPseudoBinaryW_VV, + Sched<[WriteVFWMulV, ReadVFWMulV, ReadVFWMulV, ReadVMask]>; + defm "" : VPseudoBinaryW_VF, + Sched<[WriteVFWMulF, ReadVFWMulV, ReadVFWMulF, ReadVMask]>; } multiclass VPseudoBinaryW_VV_VF { @@ -1905,53 +2079,100 @@ multiclass VPseudoBinaryW_VV_VF { defm "" : VPseudoBinaryW_VF; } -multiclass VPseudoBinaryW_WV_WX { - defm "" : VPseudoBinaryW_WV; - defm "" : VPseudoBinaryW_WX; +multiclass VPseudoVWALU_WV_WX { + defm "" : VPseudoBinaryW_WV, + Sched<[WriteVIWALUV, ReadVIWALUV, ReadVIWALUV, ReadVMask]>; + defm "" : VPseudoBinaryW_WX, + Sched<[WriteVIWALUX, ReadVIWALUV, ReadVIWALUX, ReadVMask]>; +} + +multiclass VPseudoVFWALU_VV_VF { + defm "" : VPseudoBinaryW_VV, + Sched<[WriteVFWALUV, ReadVFWALUV, ReadVFWALUV, ReadVMask]>; + defm "" : VPseudoBinaryW_VF, + Sched<[WriteVFWALUF, ReadVFWALUV, ReadVFWALUF, ReadVMask]>; +} + +multiclass VPseudoVFWALU_WV_WF { + defm "" : VPseudoBinaryW_WV, + Sched<[WriteVFWALUV, ReadVFWALUV, ReadVFWALUV, ReadVMask]>; + defm "" : VPseudoBinaryW_WF, + Sched<[WriteVFWALUF, ReadVFWALUV, ReadVFWALUF, ReadVMask]>; +} + +multiclass VPseudoVMRG_VM_XM_IM { + defm "" : VPseudoBinaryV_VM, + Sched<[WriteVIMergeV, ReadVIMergeV, ReadVIMergeV, ReadVMask]>; + defm "" : VPseudoBinaryV_XM, + Sched<[WriteVIMergeX, ReadVIMergeV, ReadVIMergeX, ReadVMask]>; + defm "" : VPseudoBinaryV_IM, + Sched<[WriteVIMergeI, ReadVIMergeV, ReadVMask]>; } -multiclass VPseudoBinaryW_WV_WF { - defm "" : VPseudoBinaryW_WV; - defm "" : VPseudoBinaryW_WF; +multiclass VPseudoVCALU_VM_XM_IM { + defm "" : VPseudoBinaryV_VM, + Sched<[WriteVICALUV, ReadVIALUCV, ReadVIALUCV, ReadVMask]>; + defm "" : VPseudoBinaryV_XM, + Sched<[WriteVICALUX, ReadVIALUCV, ReadVIALUCX, ReadVMask]>; + defm "" : VPseudoBinaryV_IM, + Sched<[WriteVICALUI, ReadVIALUCV, ReadVMask]>; } -multiclass VPseudoBinaryV_VM_XM_IM { - defm "" : VPseudoBinaryV_VM; - defm "" : VPseudoBinaryV_XM; - defm "" : VPseudoBinaryV_IM; +multiclass VPseudoVCALU_VM_XM { + defm "" : VPseudoBinaryV_VM, + Sched<[WriteVICALUV, ReadVIALUCV, ReadVIALUCV, ReadVMask]>; + defm "" : VPseudoBinaryV_XM, + Sched<[WriteVICALUX, ReadVIALUCV, ReadVIALUCX, ReadVMask]>; } -multiclass VPseudoBinaryV_VM_XM { - defm "" : VPseudoBinaryV_VM; - defm "" : VPseudoBinaryV_XM; +multiclass VPseudoVCALUM_VM_XM_IM<string Constraint> { + defm "" : VPseudoBinaryV_VM</*CarryOut=*/1, /*CarryIn=*/1, Constraint>, + Sched<[WriteVICALUV, ReadVIALUCV, ReadVIALUCV, ReadVMask]>; + defm "" : VPseudoBinaryV_XM</*CarryOut=*/1, /*CarryIn=*/1, Constraint>, + Sched<[WriteVICALUX, ReadVIALUCV, ReadVIALUCX, ReadVMask]>; + defm "" : VPseudoBinaryV_IM</*CarryOut=*/1, /*CarryIn=*/1, Constraint>, + Sched<[WriteVICALUI, ReadVIALUCV, ReadVMask]>; } -multiclass VPseudoBinaryM_VM_XM_IM<string Constraint> { - defm "" : VPseudoBinaryV_VM</*CarryOut=*/1, /*CarryIn=*/1, Constraint>; - defm "" : VPseudoBinaryV_XM</*CarryOut=*/1, /*CarryIn=*/1, Constraint>; - defm "" : VPseudoBinaryV_IM</*CarryOut=*/1, /*CarryIn=*/1, Constraint>; +multiclass VPseudoVCALUM_VM_XM<string Constraint> { + defm "" : VPseudoBinaryV_VM</*CarryOut=*/1, /*CarryIn=*/1, Constraint>, + Sched<[WriteVICALUV, ReadVIALUCV, ReadVIALUCV, ReadVMask]>; + defm "" : VPseudoBinaryV_XM</*CarryOut=*/1, /*CarryIn=*/1, Constraint>, + Sched<[WriteVICALUX, ReadVIALUCV, ReadVIALUCX, ReadVMask]>; } -multiclass VPseudoBinaryM_VM_XM<string Constraint> { - defm "" : VPseudoBinaryV_VM</*CarryOut=*/1, /*CarryIn=*/1, Constraint>; - defm "" : VPseudoBinaryV_XM</*CarryOut=*/1, /*CarryIn=*/1, Constraint>; +multiclass VPseudoVCALUM_V_X_I<string Constraint> { + defm "" : VPseudoBinaryV_VM</*CarryOut=*/1, /*CarryIn=*/0, Constraint>, + Sched<[WriteVICALUV, ReadVIALUCV, ReadVIALUCV]>; + defm "" : VPseudoBinaryV_XM</*CarryOut=*/1, /*CarryIn=*/0, Constraint>, + Sched<[WriteVICALUX, ReadVIALUCV, ReadVIALUCX]>; + defm "" : VPseudoBinaryV_IM</*CarryOut=*/1, /*CarryIn=*/0, Constraint>, + Sched<[WriteVICALUI, ReadVIALUCV]>; } -multiclass VPseudoBinaryM_V_X_I<string Constraint> { - defm "" : VPseudoBinaryV_VM</*CarryOut=*/1, /*CarryIn=*/0, Constraint>; - defm "" : VPseudoBinaryV_XM</*CarryOut=*/1, /*CarryIn=*/0, Constraint>; - defm "" : VPseudoBinaryV_IM</*CarryOut=*/1, /*CarryIn=*/0, Constraint>; +multiclass VPseudoVCALUM_V_X<string Constraint> { + defm "" : VPseudoBinaryV_VM</*CarryOut=*/1, /*CarryIn=*/0, Constraint>, + Sched<[WriteVICALUV, ReadVIALUCV, ReadVIALUCV]>; + defm "" : VPseudoBinaryV_XM</*CarryOut=*/1, /*CarryIn=*/0, Constraint>, + Sched<[WriteVICALUX, ReadVIALUCV, ReadVIALUCX]>; } -multiclass VPseudoBinaryM_V_X<string Constraint> { - defm "" : VPseudoBinaryV_VM</*CarryOut=*/1, /*CarryIn=*/0, Constraint>; - defm "" : VPseudoBinaryV_XM</*CarryOut=*/1, /*CarryIn=*/0, Constraint>; +multiclass VPseudoVNCLP_WV_WX_WI { + defm "" : VPseudoBinaryV_WV, + Sched<[WriteVNClipV, ReadVNClipV, ReadVNClipV, ReadVMask]>; + defm "" : VPseudoBinaryV_WX, + Sched<[WriteVNClipX, ReadVNClipV, ReadVNClipX, ReadVMask]>; + defm "" : VPseudoBinaryV_WI, + Sched<[WriteVNClipI, ReadVNClipV, ReadVMask]>; } -multiclass VPseudoBinaryV_WV_WX_WI { - defm "" : VPseudoBinaryV_WV; - defm "" : VPseudoBinaryV_WX; - defm "" : VPseudoBinaryV_WI; +multiclass VPseudoVNSHT_WV_WX_WI { + defm "" : VPseudoBinaryV_WV, + Sched<[WriteVNShiftV, ReadVNShiftV, ReadVNShiftV, ReadVMask]>; + defm "" : VPseudoBinaryV_WX, + Sched<[WriteVNShiftX, ReadVNShiftV, ReadVNShiftX, ReadVMask]>; + defm "" : VPseudoBinaryV_WI, + Sched<[WriteVNShiftI, ReadVNShiftV, ReadVMask]>; } multiclass VPseudoTernary<VReg RetClass, @@ -2031,55 +2252,113 @@ multiclass VPseudoTernaryV_VI<Operand ImmType = simm5, string Constraint = ""> { defm _VI : VPseudoTernary<m.vrclass, m.vrclass, ImmType, m, Constraint>; } -multiclass VPseudoTernaryV_VV_VX_AAXA<string Constraint = ""> { - defm "" : VPseudoTernaryV_VV_AAXA<Constraint>; - defm "" : VPseudoTernaryV_VX_AAXA<Constraint>; +multiclass VPseudoVMAC_VV_VX_AAXA<string Constraint = ""> { + defm "" : VPseudoTernaryV_VV_AAXA<Constraint>, + Sched<[WriteVIMulAddV, ReadVIMulAddV, ReadVIMulAddV, ReadVIMulAddV, ReadVMask]>; + defm "" : VPseudoTernaryV_VX_AAXA<Constraint>, + Sched<[WriteVIMulAddX, ReadVIMulAddV, ReadVIMulAddV, ReadVIMulAddX, ReadVMask]>; } -multiclass VPseudoTernaryV_VV_VF_AAXA<string Constraint = ""> { - defm "" : VPseudoTernaryV_VV_AAXA<Constraint>; - defm "" : VPseudoTernaryV_VF_AAXA<Constraint>; +multiclass VPseudoVMAC_VV_VF_AAXA<string Constraint = ""> { + defm "" : VPseudoTernaryV_VV_AAXA<Constraint>, + Sched<[WriteVFMulAddV, ReadVFMulAddV, ReadVFMulAddV, ReadVFMulAddV, ReadVMask]>; + defm "" : VPseudoTernaryV_VF_AAXA<Constraint>, + Sched<[WriteVFMulAddF, ReadVFMulAddV, ReadVFMulAddV, ReadVFMulAddF, ReadVMask]>; } -multiclass VPseudoTernaryV_VX_VI<Operand ImmType = simm5, string Constraint = ""> { - defm "" : VPseudoTernaryV_VX<Constraint>; - defm "" : VPseudoTernaryV_VI<ImmType, Constraint>; +multiclass VPseudoVSLD_VX_VI<Operand ImmType = simm5, string Constraint = ""> { + defm "" : VPseudoTernaryV_VX<Constraint>, + Sched<[WriteVISlideX, ReadVISlideV, ReadVISlideV, ReadVISlideX, ReadVMask]>; + defm "" : VPseudoTernaryV_VI<ImmType, Constraint>, + Sched<[WriteVISlideI, ReadVISlideV, ReadVISlideV, ReadVMask]>; } -multiclass VPseudoTernaryW_VV_VX { - defm "" : VPseudoTernaryW_VV; - defm "" : VPseudoTernaryW_VX; +multiclass VPseudoVWMAC_VV_VX { + defm "" : VPseudoTernaryW_VV, + Sched<[WriteVIWMulAddV, ReadVIWMulAddV, ReadVIWMulAddV, ReadVIWMulAddV, ReadVMask]>; + defm "" : VPseudoTernaryW_VX, + Sched<[WriteVIWMulAddX, ReadVIWMulAddV, ReadVIWMulAddV, ReadVIWMulAddX, ReadVMask]>; } -multiclass VPseudoTernaryW_VV_VF { - defm "" : VPseudoTernaryW_VV; - defm "" : VPseudoTernaryW_VF; +multiclass VPseudoVWMAC_VX { + defm "" : VPseudoTernaryW_VX, + Sched<[WriteVIWMulAddX, ReadVIWMulAddV, ReadVIWMulAddV, ReadVIWMulAddX, ReadVMask]>; } -multiclass VPseudoBinaryM_VV_VX_VI { - defm "" : VPseudoBinaryM_VV; - defm "" : VPseudoBinaryM_VX; - defm "" : VPseudoBinaryM_VI; +multiclass VPseudoVWMAC_VV_VF { + defm "" : VPseudoTernaryW_VV, + Sched<[WriteVFWMulAddV, ReadVFWMulAddV, ReadVFWMulAddV, ReadVFWMulAddV, ReadVMask]>; + defm "" : VPseudoTernaryW_VF, + Sched<[WriteVFWMulAddF, ReadVFWMulAddV, ReadVFWMulAddV, ReadVFWMulAddF, ReadVMask]>; } -multiclass VPseudoBinaryM_VV_VX { - defm "" : VPseudoBinaryM_VV; - defm "" : VPseudoBinaryM_VX; +multiclass VPseudoVCMPM_VV_VX_VI { + defm "" : VPseudoBinaryM_VV, + Sched<[WriteVICmpV, ReadVICmpV, ReadVICmpV, ReadVMask]>; + defm "" : VPseudoBinaryM_VX, + Sched<[WriteVICmpX, ReadVICmpV, ReadVICmpX, ReadVMask]>; + defm "" : VPseudoBinaryM_VI, + Sched<[WriteVICmpI, ReadVICmpV, ReadVMask]>; } -multiclass VPseudoBinaryM_VV_VF { - defm "" : VPseudoBinaryM_VV; - defm "" : VPseudoBinaryM_VF; +multiclass VPseudoVCMPM_VV_VX { + defm "" : VPseudoBinaryM_VV, + Sched<[WriteVICmpV, ReadVICmpV, ReadVICmpV, ReadVMask]>; + defm "" : VPseudoBinaryM_VX, + Sched<[WriteVICmpX, ReadVICmpV, ReadVICmpX, ReadVMask]>; } -multiclass VPseudoBinaryM_VX_VI { - defm "" : VPseudoBinaryM_VX; - defm "" : VPseudoBinaryM_VI; +multiclass VPseudoVCMPM_VV_VF { + defm "" : VPseudoBinaryM_VV, + Sched<[WriteVFCmpV, ReadVFCmpV, ReadVFCmpV, ReadVMask]>; + defm "" : VPseudoBinaryM_VF, + Sched<[WriteVFCmpF, ReadVFCmpV, ReadVFCmpF, ReadVMask]>; } -multiclass VPseudoReductionV_VS { +multiclass VPseudoVCMPM_VF { + defm "" : VPseudoBinaryM_VF, + Sched<[WriteVFCmpF, ReadVFCmpV, ReadVFCmpF, ReadVMask]>; +} + +multiclass VPseudoVCMPM_VX_VI { + defm "" : VPseudoBinaryM_VX, + Sched<[WriteVICmpX, ReadVICmpV, ReadVICmpX, ReadVMask]>; + defm "" : VPseudoBinaryM_VI, + Sched<[WriteVICmpI, ReadVICmpV, ReadVMask]>; +} + +multiclass VPseudoVRED_VS { foreach m = MxList.m in { - defm _VS : VPseudoTernary<V_M1.vrclass, m.vrclass, V_M1.vrclass, m>; + defm _VS : VPseudoTernary<V_M1.vrclass, m.vrclass, V_M1.vrclass, m>, + Sched<[WriteVIRedV, ReadVIRedV, ReadVIRedV, ReadVIRedV, ReadVMask]>; + } +} + +multiclass VPseudoVWRED_VS { + foreach m = MxList.m in { + defm _VS : VPseudoTernary<V_M1.vrclass, m.vrclass, V_M1.vrclass, m>, + Sched<[WriteVIWRedV, ReadVIWRedV, ReadVIWRedV, ReadVIWRedV, ReadVMask]>; + } +} + +multiclass VPseudoVFRED_VS { + foreach m = MxList.m in { + defm _VS : VPseudoTernary<V_M1.vrclass, m.vrclass, V_M1.vrclass, m>, + Sched<[WriteVFRedV, ReadVFRedV, ReadVFRedV, ReadVFRedV, ReadVMask]>; + } +} + +multiclass VPseudoVFREDO_VS { + foreach m = MxList.m in { + defm _VS : VPseudoTernary<V_M1.vrclass, m.vrclass, V_M1.vrclass, m>, + Sched<[WriteVFRedOV, ReadVFRedOV, ReadVFRedOV, ReadVFRedOV, ReadVMask]>; + } +} + +multiclass VPseudoVFWRED_VS { + foreach m = MxList.m in { + defm _VS : VPseudoTernary<V_M1.vrclass, m.vrclass, V_M1.vrclass, m>, + Sched<[WriteVFWRedV, ReadVFWRedV, ReadVFWRedV, ReadVFWRedV, ReadVMask]>; } } @@ -2094,9 +2373,16 @@ multiclass VPseudoConversion<VReg RetClass, } } -multiclass VPseudoConversionV_V { +multiclass VPseudoVCVTI_V { + foreach m = MxList.m in + defm _V : VPseudoConversion<m.vrclass, m.vrclass, m>, + Sched<[WriteVFCvtFToIV, ReadVFCvtFToIV, ReadVMask]>; +} + +multiclass VPseudoVCVTF_V { foreach m = MxList.m in - defm _V : VPseudoConversion<m.vrclass, m.vrclass, m>; + defm _V : VPseudoConversion<m.vrclass, m.vrclass, m>, + Sched<[WriteVFCvtIToFV, ReadVFCvtIToFV, ReadVMask]>; } multiclass VPseudoConversionW_V { @@ -2105,10 +2391,46 @@ multiclass VPseudoConversionW_V { defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint>; } -multiclass VPseudoConversionV_W { +multiclass VPseudoVWCVTI_V { + defvar constraint = "@earlyclobber $rd"; + foreach m = MxList.m[0-5] in + defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint>, + Sched<[WriteVFWCvtFToIV, ReadVFWCvtFToIV, ReadVMask]>; +} + +multiclass VPseudoVWCVTF_V { + defvar constraint = "@earlyclobber $rd"; + foreach m = MxList.m[0-5] in + defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint>, + Sched<[WriteVFWCvtIToFV, ReadVFWCvtIToFV, ReadVMask]>; +} + +multiclass VPseudoVWCVTD_V { + defvar constraint = "@earlyclobber $rd"; + foreach m = MxList.m[0-5] in + defm _V : VPseudoConversion<m.wvrclass, m.vrclass, m, constraint>, + Sched<[WriteVFWCvtFToFV, ReadVFWCvtFToFV, ReadVMask]>; +} + +multiclass VPseudoVNCVTI_W { + defvar constraint = "@earlyclobber $rd"; + foreach m = MxList.m[0-5] in + defm _W : VPseudoConversion<m.vrclass, m.wvrclass, m, constraint>, + Sched<[WriteVFNCvtFToIV, ReadVFNCvtFToIV, ReadVMask]>; +} + +multiclass VPseudoVNCVTF_W { + defvar constraint = "@earlyclobber $rd"; + foreach m = MxList.m[0-5] in + defm _W : VPseudoConversion<m.vrclass, m.wvrclass, m, constraint>, + Sched<[WriteVFNCvtIToFV, ReadVFNCvtIToFV, ReadVMask]>; +} + +multiclass VPseudoVNCVTD_W { defvar constraint = "@earlyclobber $rd"; foreach m = MxListW.m in - defm _W : VPseudoConversion<m.vrclass, m.wvrclass, m, constraint>; + defm _W : VPseudoConversion<m.vrclass, m.wvrclass, m, constraint>, + Sched<[WriteVFNCvtFToFV, ReadVFNCvtFToFV, ReadVMask]>; } multiclass VPseudoUSSegLoad<bit isFF> { @@ -2543,42 +2865,6 @@ class VPatTernaryMask<string intrinsic, (mask_type V0), GPR:$vl, sew)>; -class VPatAMOWDNoMask<string intrinsic_name, - string inst, - ValueType result_type, - ValueType op1_type, - int sew, - LMULInfo vlmul, - LMULInfo emul, - VReg op1_reg_class> : - Pat<(result_type (!cast<Intrinsic>(intrinsic_name) - GPR:$rs1, - (op1_type op1_reg_class:$vs2), - (result_type vlmul.vrclass:$vd), - VLOpFrag)), - (!cast<Instruction>(inst # "_WD_" # vlmul.MX # "_" # emul.MX) - $rs1, $vs2, $vd, - GPR:$vl, sew)>; - -class VPatAMOWDMask<string intrinsic_name, - string inst, - ValueType result_type, - ValueType op1_type, - ValueType mask_type, - int sew, - LMULInfo vlmul, - LMULInfo emul, - VReg op1_reg_class> : - Pat<(result_type (!cast<Intrinsic>(intrinsic_name # "_mask") - GPR:$rs1, - (op1_type op1_reg_class:$vs2), - (result_type vlmul.vrclass:$vd), - (mask_type V0), - VLOpFrag)), - (!cast<Instruction>(inst # "_WD_" # vlmul.MX # "_" # emul.MX # "_MASK") - $rs1, $vs2, $vd, - (mask_type V0), GPR:$vl, sew)>; - multiclass VPatUnaryS_M<string intrinsic_name, string inst> { @@ -3416,44 +3702,6 @@ multiclass VPatConversionVF_WF <string intrinsic, string instruction> { } } -multiclass VPatAMOWD<string intrinsic, - string inst, - ValueType result_type, - ValueType offset_type, - ValueType mask_type, - int sew, - LMULInfo vlmul, - LMULInfo emul, - VReg op1_reg_class> -{ - def : VPatAMOWDNoMask<intrinsic, inst, result_type, offset_type, - sew, vlmul, emul, op1_reg_class>; - def : VPatAMOWDMask<intrinsic, inst, result_type, offset_type, - mask_type, sew, vlmul, emul, op1_reg_class>; -} - -multiclass VPatAMOV_WD<string intrinsic, - string inst, - list<VTypeInfo> vtilist> { - foreach eew = EEWList in { - foreach vti = vtilist in { - if !or(!eq(vti.SEW, 32), !eq(vti.SEW, 64)) then { - defvar octuple_lmul = vti.LMul.octuple; - // Calculate emul = eew * lmul / sew - defvar octuple_emul = !srl(!mul(eew, octuple_lmul), vti.Log2SEW); - if !and(!ge(octuple_emul, 1), !le(octuple_emul, 64)) then { - defvar emulMX = octuple_to_str<octuple_emul>.ret; - defvar offsetVti = !cast<VTypeInfo>("VI" # eew # emulMX); - defvar inst_ei = inst # "EI" # eew; - defm : VPatAMOWD<intrinsic, inst_ei, - vti.Vector, offsetVti.Vector, - vti.Mask, vti.Log2SEW, vti.LMul, offsetVti.LMul, offsetVti.RegClass>; - } - } - } - } -} - //===----------------------------------------------------------------------===// // Pseudo instructions //===----------------------------------------------------------------------===// @@ -3531,11 +3779,13 @@ def PseudoVSETIVLI : Pseudo<(outs GPR:$rd), (ins uimm5:$rs1, VTypeIOp:$vtypei), //===----------------------------------------------------------------------===// // Pseudos Unit-Stride Loads and Stores -defm PseudoVL : VPseudoUSLoad</*isFF=*/false>; +defm PseudoVL : VPseudoUSLoad; defm PseudoVS : VPseudoUSStore; -defm PseudoVLM : VPseudoLoadMask; -defm PseudoVSM : VPseudoStoreMask; +defm PseudoVLM : VPseudoLoadMask, + Sched<[WriteVLDM, ReadVLDX]>; +defm PseudoVSM : VPseudoStoreMask, + Sched<[WriteVSTM, ReadVSTX]>; //===----------------------------------------------------------------------===// // 7.5 Vector Strided Instructions @@ -3561,7 +3811,7 @@ defm PseudoVSUX : VPseudoIStore</*Ordered=*/false>; // vleff may update VL register let hasSideEffects = 1, Defs = [VL] in -defm PseudoVL : VPseudoUSLoad</*isFF=*/true>; +defm PseudoVL : VPseudoFFLoad; //===----------------------------------------------------------------------===// // 7.8. Vector Load/Store Segment Instructions @@ -3580,28 +3830,15 @@ let hasSideEffects = 1, Defs = [VL] in defm PseudoVLSEG : VPseudoUSSegLoad</*isFF=*/true>; //===----------------------------------------------------------------------===// -// 8. Vector AMO Operations -//===----------------------------------------------------------------------===// -defm PseudoVAMOSWAP : VPseudoAMO; -defm PseudoVAMOADD : VPseudoAMO; -defm PseudoVAMOXOR : VPseudoAMO; -defm PseudoVAMOAND : VPseudoAMO; -defm PseudoVAMOOR : VPseudoAMO; -defm PseudoVAMOMIN : VPseudoAMO; -defm PseudoVAMOMAX : VPseudoAMO; -defm PseudoVAMOMINU : VPseudoAMO; -defm PseudoVAMOMAXU : VPseudoAMO; - -//===----------------------------------------------------------------------===// // 12. Vector Integer Arithmetic Instructions //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// // 12.1. Vector Single-Width Integer Add and Subtract //===----------------------------------------------------------------------===// -defm PseudoVADD : VPseudoBinaryV_VV_VX_VI; -defm PseudoVSUB : VPseudoBinaryV_VV_VX; -defm PseudoVRSUB : VPseudoBinaryV_VX_VI; +defm PseudoVADD : VPseudoVALU_VV_VX_VI; +defm PseudoVSUB : VPseudoVALU_VV_VX; +defm PseudoVRSUB : VPseudoVALU_VX_VI; foreach vti = AllIntegerVectors in { // Match vrsub with 2 vector operands to vsub.vv by swapping operands. This @@ -3657,166 +3894,166 @@ foreach vti = AllIntegerVectors in { //===----------------------------------------------------------------------===// // 12.2. Vector Widening Integer Add/Subtract //===----------------------------------------------------------------------===// -defm PseudoVWADDU : VPseudoBinaryW_VV_VX; -defm PseudoVWSUBU : VPseudoBinaryW_VV_VX; -defm PseudoVWADD : VPseudoBinaryW_VV_VX; -defm PseudoVWSUB : VPseudoBinaryW_VV_VX; -defm PseudoVWADDU : VPseudoBinaryW_WV_WX; -defm PseudoVWSUBU : VPseudoBinaryW_WV_WX; -defm PseudoVWADD : VPseudoBinaryW_WV_WX; -defm PseudoVWSUB : VPseudoBinaryW_WV_WX; +defm PseudoVWADDU : VPseudoVWALU_VV_VX; +defm PseudoVWSUBU : VPseudoVWALU_VV_VX; +defm PseudoVWADD : VPseudoVWALU_VV_VX; +defm PseudoVWSUB : VPseudoVWALU_VV_VX; +defm PseudoVWADDU : VPseudoVWALU_WV_WX; +defm PseudoVWSUBU : VPseudoVWALU_WV_WX; +defm PseudoVWADD : VPseudoVWALU_WV_WX; +defm PseudoVWSUB : VPseudoVWALU_WV_WX; //===----------------------------------------------------------------------===// // 12.3. Vector Integer Extension //===----------------------------------------------------------------------===// -defm PseudoVZEXT_VF2 : PseudoUnaryV_VF2; -defm PseudoVZEXT_VF4 : PseudoUnaryV_VF4; -defm PseudoVZEXT_VF8 : PseudoUnaryV_VF8; -defm PseudoVSEXT_VF2 : PseudoUnaryV_VF2; -defm PseudoVSEXT_VF4 : PseudoUnaryV_VF4; -defm PseudoVSEXT_VF8 : PseudoUnaryV_VF8; +defm PseudoVZEXT_VF2 : PseudoVEXT_VF2; +defm PseudoVZEXT_VF4 : PseudoVEXT_VF4; +defm PseudoVZEXT_VF8 : PseudoVEXT_VF8; +defm PseudoVSEXT_VF2 : PseudoVEXT_VF2; +defm PseudoVSEXT_VF4 : PseudoVEXT_VF4; +defm PseudoVSEXT_VF8 : PseudoVEXT_VF8; //===----------------------------------------------------------------------===// // 12.4. Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions //===----------------------------------------------------------------------===// -defm PseudoVADC : VPseudoBinaryV_VM_XM_IM; -defm PseudoVMADC : VPseudoBinaryM_VM_XM_IM<"@earlyclobber $rd">; -defm PseudoVMADC : VPseudoBinaryM_V_X_I<"@earlyclobber $rd">; +defm PseudoVADC : VPseudoVCALU_VM_XM_IM; +defm PseudoVMADC : VPseudoVCALUM_VM_XM_IM<"@earlyclobber $rd">; +defm PseudoVMADC : VPseudoVCALUM_V_X_I<"@earlyclobber $rd">; -defm PseudoVSBC : VPseudoBinaryV_VM_XM; -defm PseudoVMSBC : VPseudoBinaryM_VM_XM<"@earlyclobber $rd">; -defm PseudoVMSBC : VPseudoBinaryM_V_X<"@earlyclobber $rd">; +defm PseudoVSBC : VPseudoVCALU_VM_XM; +defm PseudoVMSBC : VPseudoVCALUM_VM_XM<"@earlyclobber $rd">; +defm PseudoVMSBC : VPseudoVCALUM_V_X<"@earlyclobber $rd">; //===----------------------------------------------------------------------===// // 12.5. Vector Bitwise Logical Instructions //===----------------------------------------------------------------------===// -defm PseudoVAND : VPseudoBinaryV_VV_VX_VI; -defm PseudoVOR : VPseudoBinaryV_VV_VX_VI; -defm PseudoVXOR : VPseudoBinaryV_VV_VX_VI; +defm PseudoVAND : VPseudoVALU_VV_VX_VI; +defm PseudoVOR : VPseudoVALU_VV_VX_VI; +defm PseudoVXOR : VPseudoVALU_VV_VX_VI; //===----------------------------------------------------------------------===// // 12.6. Vector Single-Width Bit Shift Instructions //===----------------------------------------------------------------------===// -defm PseudoVSLL : VPseudoBinaryV_VV_VX_VI<uimm5>; -defm PseudoVSRL : VPseudoBinaryV_VV_VX_VI<uimm5>; -defm PseudoVSRA : VPseudoBinaryV_VV_VX_VI<uimm5>; +defm PseudoVSLL : VPseudoVSHT_VV_VX_VI<uimm5>; +defm PseudoVSRL : VPseudoVSHT_VV_VX_VI<uimm5>; +defm PseudoVSRA : VPseudoVSHT_VV_VX_VI<uimm5>; //===----------------------------------------------------------------------===// // 12.7. Vector Narrowing Integer Right Shift Instructions //===----------------------------------------------------------------------===// -defm PseudoVNSRL : VPseudoBinaryV_WV_WX_WI; -defm PseudoVNSRA : VPseudoBinaryV_WV_WX_WI; +defm PseudoVNSRL : VPseudoVNSHT_WV_WX_WI; +defm PseudoVNSRA : VPseudoVNSHT_WV_WX_WI; //===----------------------------------------------------------------------===// // 12.8. Vector Integer Comparison Instructions //===----------------------------------------------------------------------===// -defm PseudoVMSEQ : VPseudoBinaryM_VV_VX_VI; -defm PseudoVMSNE : VPseudoBinaryM_VV_VX_VI; -defm PseudoVMSLTU : VPseudoBinaryM_VV_VX; -defm PseudoVMSLT : VPseudoBinaryM_VV_VX; -defm PseudoVMSLEU : VPseudoBinaryM_VV_VX_VI; -defm PseudoVMSLE : VPseudoBinaryM_VV_VX_VI; -defm PseudoVMSGTU : VPseudoBinaryM_VX_VI; -defm PseudoVMSGT : VPseudoBinaryM_VX_VI; +defm PseudoVMSEQ : VPseudoVCMPM_VV_VX_VI; +defm PseudoVMSNE : VPseudoVCMPM_VV_VX_VI; +defm PseudoVMSLTU : VPseudoVCMPM_VV_VX; +defm PseudoVMSLT : VPseudoVCMPM_VV_VX; +defm PseudoVMSLEU : VPseudoVCMPM_VV_VX_VI; +defm PseudoVMSLE : VPseudoVCMPM_VV_VX_VI; +defm PseudoVMSGTU : VPseudoVCMPM_VX_VI; +defm PseudoVMSGT : VPseudoVCMPM_VX_VI; //===----------------------------------------------------------------------===// // 12.9. Vector Integer Min/Max Instructions //===----------------------------------------------------------------------===// -defm PseudoVMINU : VPseudoBinaryV_VV_VX; -defm PseudoVMIN : VPseudoBinaryV_VV_VX; -defm PseudoVMAXU : VPseudoBinaryV_VV_VX; -defm PseudoVMAX : VPseudoBinaryV_VV_VX; +defm PseudoVMINU : VPseudoVMINMAX_VV_VX; +defm PseudoVMIN : VPseudoVMINMAX_VV_VX; +defm PseudoVMAXU : VPseudoVMINMAX_VV_VX; +defm PseudoVMAX : VPseudoVMINMAX_VV_VX; //===----------------------------------------------------------------------===// // 12.10. Vector Single-Width Integer Multiply Instructions //===----------------------------------------------------------------------===// -defm PseudoVMUL : VPseudoBinaryV_VV_VX; -defm PseudoVMULH : VPseudoBinaryV_VV_VX; -defm PseudoVMULHU : VPseudoBinaryV_VV_VX; -defm PseudoVMULHSU : VPseudoBinaryV_VV_VX; +defm PseudoVMUL : VPseudoVMUL_VV_VX; +defm PseudoVMULH : VPseudoVMUL_VV_VX; +defm PseudoVMULHU : VPseudoVMUL_VV_VX; +defm PseudoVMULHSU : VPseudoVMUL_VV_VX; //===----------------------------------------------------------------------===// // 12.11. Vector Integer Divide Instructions //===----------------------------------------------------------------------===// -defm PseudoVDIVU : VPseudoBinaryV_VV_VX; -defm PseudoVDIV : VPseudoBinaryV_VV_VX; -defm PseudoVREMU : VPseudoBinaryV_VV_VX; -defm PseudoVREM : VPseudoBinaryV_VV_VX; +defm PseudoVDIVU : VPseudoVDIV_VV_VX; +defm PseudoVDIV : VPseudoVDIV_VV_VX; +defm PseudoVREMU : VPseudoVDIV_VV_VX; +defm PseudoVREM : VPseudoVDIV_VV_VX; //===----------------------------------------------------------------------===// // 12.12. Vector Widening Integer Multiply Instructions //===----------------------------------------------------------------------===// -defm PseudoVWMUL : VPseudoBinaryW_VV_VX; -defm PseudoVWMULU : VPseudoBinaryW_VV_VX; -defm PseudoVWMULSU : VPseudoBinaryW_VV_VX; +defm PseudoVWMUL : VPseudoVWMUL_VV_VX; +defm PseudoVWMULU : VPseudoVWMUL_VV_VX; +defm PseudoVWMULSU : VPseudoVWMUL_VV_VX; //===----------------------------------------------------------------------===// // 12.13. Vector Single-Width Integer Multiply-Add Instructions //===----------------------------------------------------------------------===// -defm PseudoVMACC : VPseudoTernaryV_VV_VX_AAXA; -defm PseudoVNMSAC : VPseudoTernaryV_VV_VX_AAXA; -defm PseudoVMADD : VPseudoTernaryV_VV_VX_AAXA; -defm PseudoVNMSUB : VPseudoTernaryV_VV_VX_AAXA; +defm PseudoVMACC : VPseudoVMAC_VV_VX_AAXA; +defm PseudoVNMSAC : VPseudoVMAC_VV_VX_AAXA; +defm PseudoVMADD : VPseudoVMAC_VV_VX_AAXA; +defm PseudoVNMSUB : VPseudoVMAC_VV_VX_AAXA; //===----------------------------------------------------------------------===// // 12.14. Vector Widening Integer Multiply-Add Instructions //===----------------------------------------------------------------------===// -defm PseudoVWMACCU : VPseudoTernaryW_VV_VX; -defm PseudoVWMACC : VPseudoTernaryW_VV_VX; -defm PseudoVWMACCSU : VPseudoTernaryW_VV_VX; -defm PseudoVWMACCUS : VPseudoTernaryW_VX; +defm PseudoVWMACCU : VPseudoVWMAC_VV_VX; +defm PseudoVWMACC : VPseudoVWMAC_VV_VX; +defm PseudoVWMACCSU : VPseudoVWMAC_VV_VX; +defm PseudoVWMACCUS : VPseudoVWMAC_VX; //===----------------------------------------------------------------------===// // 12.15. Vector Integer Merge Instructions //===----------------------------------------------------------------------===// -defm PseudoVMERGE : VPseudoBinaryV_VM_XM_IM; +defm PseudoVMERGE : VPseudoVMRG_VM_XM_IM; //===----------------------------------------------------------------------===// // 12.16. Vector Integer Move Instructions //===----------------------------------------------------------------------===// -defm PseudoVMV_V : VPseudoUnaryV_V_X_I_NoDummyMask; +defm PseudoVMV_V : VPseudoUnaryVMV_V_X_I; //===----------------------------------------------------------------------===// // 13.1. Vector Single-Width Saturating Add and Subtract //===----------------------------------------------------------------------===// let Defs = [VXSAT], hasSideEffects = 1 in { - defm PseudoVSADDU : VPseudoBinaryV_VV_VX_VI; - defm PseudoVSADD : VPseudoBinaryV_VV_VX_VI; - defm PseudoVSSUBU : VPseudoBinaryV_VV_VX; - defm PseudoVSSUB : VPseudoBinaryV_VV_VX; + defm PseudoVSADDU : VPseudoVSALU_VV_VX_VI; + defm PseudoVSADD : VPseudoVSALU_VV_VX_VI; + defm PseudoVSSUBU : VPseudoVSALU_VV_VX; + defm PseudoVSSUB : VPseudoVSALU_VV_VX; } //===----------------------------------------------------------------------===// // 13.2. Vector Single-Width Averaging Add and Subtract //===----------------------------------------------------------------------===// let Uses = [VXRM], hasSideEffects = 1 in { - defm PseudoVAADDU : VPseudoBinaryV_VV_VX; - defm PseudoVAADD : VPseudoBinaryV_VV_VX; - defm PseudoVASUBU : VPseudoBinaryV_VV_VX; - defm PseudoVASUB : VPseudoBinaryV_VV_VX; + defm PseudoVAADDU : VPseudoVAALU_VV_VX; + defm PseudoVAADD : VPseudoVAALU_VV_VX; + defm PseudoVASUBU : VPseudoVAALU_VV_VX; + defm PseudoVASUB : VPseudoVAALU_VV_VX; } //===----------------------------------------------------------------------===// // 13.3. Vector Single-Width Fractional Multiply with Rounding and Saturation //===----------------------------------------------------------------------===// let Uses = [VXRM], Defs = [VXSAT], hasSideEffects = 1 in { - defm PseudoVSMUL : VPseudoBinaryV_VV_VX; + defm PseudoVSMUL : VPseudoVSMUL_VV_VX; } //===----------------------------------------------------------------------===// // 13.4. Vector Single-Width Scaling Shift Instructions //===----------------------------------------------------------------------===// let Uses = [VXRM], hasSideEffects = 1 in { - defm PseudoVSSRL : VPseudoBinaryV_VV_VX_VI<uimm5>; - defm PseudoVSSRA : VPseudoBinaryV_VV_VX_VI<uimm5>; + defm PseudoVSSRL : VPseudoVSSHT_VV_VX_VI<uimm5>; + defm PseudoVSSRA : VPseudoVSSHT_VV_VX_VI<uimm5>; } //===----------------------------------------------------------------------===// // 13.5. Vector Narrowing Fixed-Point Clip Instructions //===----------------------------------------------------------------------===// let Uses = [VXRM], Defs = [VXSAT], hasSideEffects = 1 in { - defm PseudoVNCLIP : VPseudoBinaryV_WV_WX_WI; - defm PseudoVNCLIPU : VPseudoBinaryV_WV_WX_WI; + defm PseudoVNCLIP : VPseudoVNCLP_WV_WX_WI; + defm PseudoVNCLIPU : VPseudoVNCLP_WV_WX_WI; } } // Predicates = [HasVInstructions] @@ -3825,156 +4062,156 @@ let Predicates = [HasVInstructionsAnyF] in { //===----------------------------------------------------------------------===// // 14.2. Vector Single-Width Floating-Point Add/Subtract Instructions //===----------------------------------------------------------------------===// -defm PseudoVFADD : VPseudoBinaryV_VV_VF; -defm PseudoVFSUB : VPseudoBinaryV_VV_VF; -defm PseudoVFRSUB : VPseudoBinaryV_VF; +defm PseudoVFADD : VPseudoVALU_VV_VF; +defm PseudoVFSUB : VPseudoVALU_VV_VF; +defm PseudoVFRSUB : VPseudoVALU_VF; //===----------------------------------------------------------------------===// // 14.3. Vector Widening Floating-Point Add/Subtract Instructions //===----------------------------------------------------------------------===// -defm PseudoVFWADD : VPseudoBinaryW_VV_VF; -defm PseudoVFWSUB : VPseudoBinaryW_VV_VF; -defm PseudoVFWADD : VPseudoBinaryW_WV_WF; -defm PseudoVFWSUB : VPseudoBinaryW_WV_WF; +defm PseudoVFWADD : VPseudoVFWALU_VV_VF; +defm PseudoVFWSUB : VPseudoVFWALU_VV_VF; +defm PseudoVFWADD : VPseudoVFWALU_WV_WF; +defm PseudoVFWSUB : VPseudoVFWALU_WV_WF; //===----------------------------------------------------------------------===// // 14.4. Vector Single-Width Floating-Point Multiply/Divide Instructions //===----------------------------------------------------------------------===// -defm PseudoVFMUL : VPseudoBinaryV_VV_VF; -defm PseudoVFDIV : VPseudoBinaryV_VV_VF; -defm PseudoVFRDIV : VPseudoBinaryV_VF; +defm PseudoVFMUL : VPseudoVFMUL_VV_VF; +defm PseudoVFDIV : VPseudoVFDIV_VV_VF; +defm PseudoVFRDIV : VPseudoVFRDIV_VF; //===----------------------------------------------------------------------===// // 14.5. Vector Widening Floating-Point Multiply //===----------------------------------------------------------------------===// -defm PseudoVFWMUL : VPseudoBinaryW_VV_VF; +defm PseudoVFWMUL : VPseudoVWMUL_VV_VF; //===----------------------------------------------------------------------===// // 14.6. Vector Single-Width Floating-Point Fused Multiply-Add Instructions //===----------------------------------------------------------------------===// -defm PseudoVFMACC : VPseudoTernaryV_VV_VF_AAXA; -defm PseudoVFNMACC : VPseudoTernaryV_VV_VF_AAXA; -defm PseudoVFMSAC : VPseudoTernaryV_VV_VF_AAXA; -defm PseudoVFNMSAC : VPseudoTernaryV_VV_VF_AAXA; -defm PseudoVFMADD : VPseudoTernaryV_VV_VF_AAXA; -defm PseudoVFNMADD : VPseudoTernaryV_VV_VF_AAXA; -defm PseudoVFMSUB : VPseudoTernaryV_VV_VF_AAXA; -defm PseudoVFNMSUB : VPseudoTernaryV_VV_VF_AAXA; +defm PseudoVFMACC : VPseudoVMAC_VV_VF_AAXA; +defm PseudoVFNMACC : VPseudoVMAC_VV_VF_AAXA; +defm PseudoVFMSAC : VPseudoVMAC_VV_VF_AAXA; +defm PseudoVFNMSAC : VPseudoVMAC_VV_VF_AAXA; +defm PseudoVFMADD : VPseudoVMAC_VV_VF_AAXA; +defm PseudoVFNMADD : VPseudoVMAC_VV_VF_AAXA; +defm PseudoVFMSUB : VPseudoVMAC_VV_VF_AAXA; +defm PseudoVFNMSUB : VPseudoVMAC_VV_VF_AAXA; //===----------------------------------------------------------------------===// // 14.7. Vector Widening Floating-Point Fused Multiply-Add Instructions //===----------------------------------------------------------------------===// -defm PseudoVFWMACC : VPseudoTernaryW_VV_VF; -defm PseudoVFWNMACC : VPseudoTernaryW_VV_VF; -defm PseudoVFWMSAC : VPseudoTernaryW_VV_VF; -defm PseudoVFWNMSAC : VPseudoTernaryW_VV_VF; +defm PseudoVFWMACC : VPseudoVWMAC_VV_VF; +defm PseudoVFWNMACC : VPseudoVWMAC_VV_VF; +defm PseudoVFWMSAC : VPseudoVWMAC_VV_VF; +defm PseudoVFWNMSAC : VPseudoVWMAC_VV_VF; //===----------------------------------------------------------------------===// // 14.8. Vector Floating-Point Square-Root Instruction //===----------------------------------------------------------------------===// -defm PseudoVFSQRT : VPseudoUnaryTAV_V; +defm PseudoVFSQRT : VPseudoVSQR_V; //===----------------------------------------------------------------------===// // 14.9. Vector Floating-Point Reciprocal Square-Root Estimate Instruction //===----------------------------------------------------------------------===// -defm PseudoVFRSQRT7 : VPseudoUnaryTAV_V; +defm PseudoVFRSQRT7 : VPseudoVRCP_V; //===----------------------------------------------------------------------===// // 14.10. Vector Floating-Point Reciprocal Estimate Instruction //===----------------------------------------------------------------------===// -defm PseudoVFREC7 : VPseudoUnaryTAV_V; +defm PseudoVFREC7 : VPseudoVRCP_V; //===----------------------------------------------------------------------===// // 14.11. Vector Floating-Point Min/Max Instructions //===----------------------------------------------------------------------===// -defm PseudoVFMIN : VPseudoBinaryV_VV_VF; -defm PseudoVFMAX : VPseudoBinaryV_VV_VF; +defm PseudoVFMIN : VPseudoVMAX_VV_VF; +defm PseudoVFMAX : VPseudoVMAX_VV_VF; //===----------------------------------------------------------------------===// // 14.12. Vector Floating-Point Sign-Injection Instructions //===----------------------------------------------------------------------===// -defm PseudoVFSGNJ : VPseudoBinaryV_VV_VF; -defm PseudoVFSGNJN : VPseudoBinaryV_VV_VF; -defm PseudoVFSGNJX : VPseudoBinaryV_VV_VF; +defm PseudoVFSGNJ : VPseudoVSGNJ_VV_VF; +defm PseudoVFSGNJN : VPseudoVSGNJ_VV_VF; +defm PseudoVFSGNJX : VPseudoVSGNJ_VV_VF; //===----------------------------------------------------------------------===// // 14.13. Vector Floating-Point Compare Instructions //===----------------------------------------------------------------------===// -defm PseudoVMFEQ : VPseudoBinaryM_VV_VF; -defm PseudoVMFNE : VPseudoBinaryM_VV_VF; -defm PseudoVMFLT : VPseudoBinaryM_VV_VF; -defm PseudoVMFLE : VPseudoBinaryM_VV_VF; -defm PseudoVMFGT : VPseudoBinaryM_VF; -defm PseudoVMFGE : VPseudoBinaryM_VF; +defm PseudoVMFEQ : VPseudoVCMPM_VV_VF; +defm PseudoVMFNE : VPseudoVCMPM_VV_VF; +defm PseudoVMFLT : VPseudoVCMPM_VV_VF; +defm PseudoVMFLE : VPseudoVCMPM_VV_VF; +defm PseudoVMFGT : VPseudoVCMPM_VF; +defm PseudoVMFGE : VPseudoVCMPM_VF; //===----------------------------------------------------------------------===// // 14.14. Vector Floating-Point Classify Instruction //===----------------------------------------------------------------------===// -defm PseudoVFCLASS : VPseudoUnaryV_V; +defm PseudoVFCLASS : VPseudoVCLS_V; //===----------------------------------------------------------------------===// // 14.15. Vector Floating-Point Merge Instruction //===----------------------------------------------------------------------===// -defm PseudoVFMERGE : VPseudoBinaryV_FM; +defm PseudoVFMERGE : VPseudoVMRG_FM; //===----------------------------------------------------------------------===// // 14.16. Vector Floating-Point Move Instruction //===----------------------------------------------------------------------===// -defm PseudoVFMV_V : VPseudoUnaryV_F_NoDummyMask; +defm PseudoVFMV_V : VPseudoVMV_F; //===----------------------------------------------------------------------===// // 14.17. Single-Width Floating-Point/Integer Type-Convert Instructions //===----------------------------------------------------------------------===// -defm PseudoVFCVT_XU_F : VPseudoConversionV_V; -defm PseudoVFCVT_X_F : VPseudoConversionV_V; -defm PseudoVFCVT_RTZ_XU_F : VPseudoConversionV_V; -defm PseudoVFCVT_RTZ_X_F : VPseudoConversionV_V; -defm PseudoVFCVT_F_XU : VPseudoConversionV_V; -defm PseudoVFCVT_F_X : VPseudoConversionV_V; +defm PseudoVFCVT_XU_F : VPseudoVCVTI_V; +defm PseudoVFCVT_X_F : VPseudoVCVTI_V; +defm PseudoVFCVT_RTZ_XU_F : VPseudoVCVTI_V; +defm PseudoVFCVT_RTZ_X_F : VPseudoVCVTI_V; +defm PseudoVFCVT_F_XU : VPseudoVCVTF_V; +defm PseudoVFCVT_F_X : VPseudoVCVTF_V; //===----------------------------------------------------------------------===// // 14.18. Widening Floating-Point/Integer Type-Convert Instructions //===----------------------------------------------------------------------===// -defm PseudoVFWCVT_XU_F : VPseudoConversionW_V; -defm PseudoVFWCVT_X_F : VPseudoConversionW_V; -defm PseudoVFWCVT_RTZ_XU_F : VPseudoConversionW_V; -defm PseudoVFWCVT_RTZ_X_F : VPseudoConversionW_V; -defm PseudoVFWCVT_F_XU : VPseudoConversionW_V; -defm PseudoVFWCVT_F_X : VPseudoConversionW_V; -defm PseudoVFWCVT_F_F : VPseudoConversionW_V; +defm PseudoVFWCVT_XU_F : VPseudoVWCVTI_V; +defm PseudoVFWCVT_X_F : VPseudoVWCVTI_V; +defm PseudoVFWCVT_RTZ_XU_F : VPseudoVWCVTI_V; +defm PseudoVFWCVT_RTZ_X_F : VPseudoVWCVTI_V; +defm PseudoVFWCVT_F_XU : VPseudoVWCVTF_V; +defm PseudoVFWCVT_F_X : VPseudoVWCVTF_V; +defm PseudoVFWCVT_F_F : VPseudoVWCVTD_V; //===----------------------------------------------------------------------===// // 14.19. Narrowing Floating-Point/Integer Type-Convert Instructions //===----------------------------------------------------------------------===// -defm PseudoVFNCVT_XU_F : VPseudoConversionV_W; -defm PseudoVFNCVT_X_F : VPseudoConversionV_W; -defm PseudoVFNCVT_RTZ_XU_F : VPseudoConversionV_W; -defm PseudoVFNCVT_RTZ_X_F : VPseudoConversionV_W; -defm PseudoVFNCVT_F_XU : VPseudoConversionV_W; -defm PseudoVFNCVT_F_X : VPseudoConversionV_W; -defm PseudoVFNCVT_F_F : VPseudoConversionV_W; -defm PseudoVFNCVT_ROD_F_F : VPseudoConversionV_W; +defm PseudoVFNCVT_XU_F : VPseudoVNCVTI_W; +defm PseudoVFNCVT_X_F : VPseudoVNCVTI_W; +defm PseudoVFNCVT_RTZ_XU_F : VPseudoVNCVTI_W; +defm PseudoVFNCVT_RTZ_X_F : VPseudoVNCVTI_W; +defm PseudoVFNCVT_F_XU : VPseudoVNCVTF_W; +defm PseudoVFNCVT_F_X : VPseudoVNCVTF_W; +defm PseudoVFNCVT_F_F : VPseudoVNCVTD_W; +defm PseudoVFNCVT_ROD_F_F : VPseudoVNCVTD_W; } // Predicates = [HasVInstructionsAnyF] let Predicates = [HasVInstructions] in { //===----------------------------------------------------------------------===// // 15.1. Vector Single-Width Integer Reduction Instructions //===----------------------------------------------------------------------===// -defm PseudoVREDSUM : VPseudoReductionV_VS; -defm PseudoVREDAND : VPseudoReductionV_VS; -defm PseudoVREDOR : VPseudoReductionV_VS; -defm PseudoVREDXOR : VPseudoReductionV_VS; -defm PseudoVREDMINU : VPseudoReductionV_VS; -defm PseudoVREDMIN : VPseudoReductionV_VS; -defm PseudoVREDMAXU : VPseudoReductionV_VS; -defm PseudoVREDMAX : VPseudoReductionV_VS; +defm PseudoVREDSUM : VPseudoVRED_VS; +defm PseudoVREDAND : VPseudoVRED_VS; +defm PseudoVREDOR : VPseudoVRED_VS; +defm PseudoVREDXOR : VPseudoVRED_VS; +defm PseudoVREDMINU : VPseudoVRED_VS; +defm PseudoVREDMIN : VPseudoVRED_VS; +defm PseudoVREDMAXU : VPseudoVRED_VS; +defm PseudoVREDMAX : VPseudoVRED_VS; //===----------------------------------------------------------------------===// // 15.2. Vector Widening Integer Reduction Instructions //===----------------------------------------------------------------------===// let IsRVVWideningReduction = 1 in { -defm PseudoVWREDSUMU : VPseudoReductionV_VS; -defm PseudoVWREDSUM : VPseudoReductionV_VS; +defm PseudoVWREDSUMU : VPseudoVWRED_VS; +defm PseudoVWREDSUM : VPseudoVWRED_VS; } } // Predicates = [HasVInstructions] @@ -3982,17 +4219,17 @@ let Predicates = [HasVInstructionsAnyF] in { //===----------------------------------------------------------------------===// // 15.3. Vector Single-Width Floating-Point Reduction Instructions //===----------------------------------------------------------------------===// -defm PseudoVFREDOSUM : VPseudoReductionV_VS; -defm PseudoVFREDUSUM : VPseudoReductionV_VS; -defm PseudoVFREDMIN : VPseudoReductionV_VS; -defm PseudoVFREDMAX : VPseudoReductionV_VS; +defm PseudoVFREDOSUM : VPseudoVFREDO_VS; +defm PseudoVFREDUSUM : VPseudoVFRED_VS; +defm PseudoVFREDMIN : VPseudoVFRED_VS; +defm PseudoVFREDMAX : VPseudoVFRED_VS; //===----------------------------------------------------------------------===// // 15.4. Vector Widening Floating-Point Reduction Instructions //===----------------------------------------------------------------------===// let IsRVVWideningReduction = 1 in { -defm PseudoVFWREDUSUM : VPseudoReductionV_VS; -defm PseudoVFWREDOSUM : VPseudoReductionV_VS; +defm PseudoVFWREDUSUM : VPseudoVFWRED_VS; +defm PseudoVFWREDOSUM : VPseudoVFWRED_VS; } } // Predicates = [HasVInstructionsAnyF] @@ -4005,55 +4242,57 @@ defm PseudoVFWREDOSUM : VPseudoReductionV_VS; // 16.1 Vector Mask-Register Logical Instructions //===----------------------------------------------------------------------===// -defm PseudoVMAND: VPseudoBinaryM_MM; -defm PseudoVMNAND: VPseudoBinaryM_MM; -defm PseudoVMANDN: VPseudoBinaryM_MM; -defm PseudoVMXOR: VPseudoBinaryM_MM; -defm PseudoVMOR: VPseudoBinaryM_MM; -defm PseudoVMNOR: VPseudoBinaryM_MM; -defm PseudoVMORN: VPseudoBinaryM_MM; -defm PseudoVMXNOR: VPseudoBinaryM_MM; +defm PseudoVMAND: VPseudoVALU_MM; +defm PseudoVMNAND: VPseudoVALU_MM; +defm PseudoVMANDN: VPseudoVALU_MM; +defm PseudoVMXOR: VPseudoVALU_MM; +defm PseudoVMOR: VPseudoVALU_MM; +defm PseudoVMNOR: VPseudoVALU_MM; +defm PseudoVMORN: VPseudoVALU_MM; +defm PseudoVMXNOR: VPseudoVALU_MM; // Pseudo instructions -defm PseudoVMCLR : VPseudoNullaryPseudoM<"VMXOR">; -defm PseudoVMSET : VPseudoNullaryPseudoM<"VMXNOR">; +defm PseudoVMCLR : VPseudoNullaryPseudoM<"VMXOR">, + Sched<[WriteVMALUV, ReadVMALUV, ReadVMALUV]>; +defm PseudoVMSET : VPseudoNullaryPseudoM<"VMXNOR">, + Sched<[WriteVMALUV, ReadVMALUV, ReadVMALUV]>; //===----------------------------------------------------------------------===// // 16.2. Vector mask population count vcpop //===----------------------------------------------------------------------===// -defm PseudoVCPOP: VPseudoUnaryS_M; +defm PseudoVCPOP: VPseudoVPOP_M; //===----------------------------------------------------------------------===// // 16.3. vfirst find-first-set mask bit //===----------------------------------------------------------------------===// -defm PseudoVFIRST: VPseudoUnaryS_M; +defm PseudoVFIRST: VPseudoV1ST_M; //===----------------------------------------------------------------------===// // 16.4. vmsbf.m set-before-first mask bit //===----------------------------------------------------------------------===// -defm PseudoVMSBF: VPseudoUnaryM_M; +defm PseudoVMSBF: VPseudoVSFS_M; //===----------------------------------------------------------------------===// // 16.5. vmsif.m set-including-first mask bit //===----------------------------------------------------------------------===// -defm PseudoVMSIF: VPseudoUnaryM_M; +defm PseudoVMSIF: VPseudoVSFS_M; //===----------------------------------------------------------------------===// // 16.6. vmsof.m set-only-first mask bit //===----------------------------------------------------------------------===// -defm PseudoVMSOF: VPseudoUnaryM_M; +defm PseudoVMSOF: VPseudoVSFS_M; //===----------------------------------------------------------------------===// // 16.8. Vector Iota Instruction //===----------------------------------------------------------------------===// -defm PseudoVIOTA_M: VPseudoUnaryV_M; +defm PseudoVIOTA_M: VPseudoVIOT_M; //===----------------------------------------------------------------------===// // 16.9. Vector Element Index Instruction //===----------------------------------------------------------------------===// -defm PseudoVID : VPseudoMaskNullaryV; +defm PseudoVID : VPseudoVID_V; //===----------------------------------------------------------------------===// // 17. Vector Permutation Instructions @@ -4068,15 +4307,18 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { foreach m = MxList.m in { let VLMul = m.value in { let HasSEWOp = 1, BaseInstr = VMV_X_S in - def PseudoVMV_X_S # "_" # m.MX: Pseudo<(outs GPR:$rd), - (ins m.vrclass:$rs2, ixlenimm:$sew), - []>, RISCVVPseudo; + def PseudoVMV_X_S # "_" # m.MX: + Pseudo<(outs GPR:$rd), (ins m.vrclass:$rs2, ixlenimm:$sew), []>, + Sched<[WriteVIMovVX, ReadVIMovVX]>, + RISCVVPseudo; let HasVLOp = 1, HasSEWOp = 1, BaseInstr = VMV_S_X, Constraints = "$rd = $rs1" in def PseudoVMV_S_X # "_" # m.MX: Pseudo<(outs m.vrclass:$rd), (ins m.vrclass:$rs1, GPR:$rs2, AVL:$vl, ixlenimm:$sew), - []>, RISCVVPseudo; + []>, + Sched<[WriteVIMovXV, ReadVIMovXV, ReadVIMovXX]>, + RISCVVPseudo; } } } @@ -4093,17 +4335,19 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { let VLMul = m.value in { let HasSEWOp = 1, BaseInstr = VFMV_F_S in def "PseudoVFMV_" # f.FX # "_S_" # m.MX : - Pseudo<(outs f.fprclass:$rd), - (ins m.vrclass:$rs2, - ixlenimm:$sew), - []>, RISCVVPseudo; + Pseudo<(outs f.fprclass:$rd), + (ins m.vrclass:$rs2, ixlenimm:$sew), []>, + Sched<[WriteVFMovVF, ReadVFMovVF]>, + RISCVVPseudo; let HasVLOp = 1, HasSEWOp = 1, BaseInstr = VFMV_S_F, Constraints = "$rd = $rs1" in def "PseudoVFMV_S_" # f.FX # "_" # m.MX : Pseudo<(outs m.vrclass:$rd), (ins m.vrclass:$rs1, f.fprclass:$rs2, AVL:$vl, ixlenimm:$sew), - []>, RISCVVPseudo; + []>, + Sched<[WriteVFMovFV, ReadVFMovFV, ReadVFMovFX]>, + RISCVVPseudo; } } } @@ -4114,52 +4358,33 @@ let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { // 17.3. Vector Slide Instructions //===----------------------------------------------------------------------===// let Predicates = [HasVInstructions] in { - defm PseudoVSLIDEUP : VPseudoTernaryV_VX_VI<uimm5, "@earlyclobber $rd">; - defm PseudoVSLIDEDOWN : VPseudoTernaryV_VX_VI<uimm5>; - defm PseudoVSLIDE1UP : VPseudoBinaryV_VX<"@earlyclobber $rd">; - defm PseudoVSLIDE1DOWN : VPseudoBinaryV_VX; + defm PseudoVSLIDEUP : VPseudoVSLD_VX_VI<uimm5, "@earlyclobber $rd">; + defm PseudoVSLIDEDOWN : VPseudoVSLD_VX_VI<uimm5>; + defm PseudoVSLIDE1UP : VPseudoVSLD1_VX<"@earlyclobber $rd">; + defm PseudoVSLIDE1DOWN : VPseudoVSLD1_VX; } // Predicates = [HasVInstructions] let Predicates = [HasVInstructionsAnyF] in { - defm PseudoVFSLIDE1UP : VPseudoBinaryV_VF<"@earlyclobber $rd">; - defm PseudoVFSLIDE1DOWN : VPseudoBinaryV_VF; + defm PseudoVFSLIDE1UP : VPseudoVSLD1_VF<"@earlyclobber $rd">; + defm PseudoVFSLIDE1DOWN : VPseudoVSLD1_VF; } // Predicates = [HasVInstructionsAnyF] //===----------------------------------------------------------------------===// // 17.4. Vector Register Gather Instructions //===----------------------------------------------------------------------===// -defm PseudoVRGATHER : VPseudoBinaryV_VV_VX_VI<uimm5, "@earlyclobber $rd">; -defm PseudoVRGATHEREI16 : VPseudoBinaryV_VV_EEW</* eew */ 16, "@earlyclobber $rd">; +defm PseudoVRGATHER : VPseudoVGTR_VV_VX_VI<uimm5, "@earlyclobber $rd">; +defm PseudoVRGATHEREI16 : VPseudoVGTR_VV_EEW</* eew */ 16, "@earlyclobber $rd">; //===----------------------------------------------------------------------===// // 17.5. Vector Compress Instruction //===----------------------------------------------------------------------===// -defm PseudoVCOMPRESS : VPseudoUnaryV_V_AnyMask; +defm PseudoVCOMPRESS : VPseudoVCPR_V; //===----------------------------------------------------------------------===// // Patterns. //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// 8. Vector AMO Operations -//===----------------------------------------------------------------------===// -let Predicates = [HasStdExtZvamo] in { - defm : VPatAMOV_WD<"int_riscv_vamoswap", "PseudoVAMOSWAP", AllIntegerVectors>; - defm : VPatAMOV_WD<"int_riscv_vamoadd", "PseudoVAMOADD", AllIntegerVectors>; - defm : VPatAMOV_WD<"int_riscv_vamoxor", "PseudoVAMOXOR", AllIntegerVectors>; - defm : VPatAMOV_WD<"int_riscv_vamoand", "PseudoVAMOAND", AllIntegerVectors>; - defm : VPatAMOV_WD<"int_riscv_vamoor", "PseudoVAMOOR", AllIntegerVectors>; - defm : VPatAMOV_WD<"int_riscv_vamomin", "PseudoVAMOMIN", AllIntegerVectors>; - defm : VPatAMOV_WD<"int_riscv_vamomax", "PseudoVAMOMAX", AllIntegerVectors>; - defm : VPatAMOV_WD<"int_riscv_vamominu", "PseudoVAMOMINU", AllIntegerVectors>; - defm : VPatAMOV_WD<"int_riscv_vamomaxu", "PseudoVAMOMAXU", AllIntegerVectors>; -} // Predicates = [HasStdExtZvamo] - -let Predicates = [HasStdExtZvamo, HasVInstructionsAnyF] in { - defm : VPatAMOV_WD<"int_riscv_vamoswap", "PseudoVAMOSWAP", AllFloatVectors>; -} // Predicates = [HasStdExtZvamo, HasVInstructionsAnyF] - -//===----------------------------------------------------------------------===// // 12. Vector Integer Arithmetic Instructions //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td index 461bdd348934..7eb8ae7d4193 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td @@ -382,50 +382,50 @@ def FSRI : RVBTernaryImm6<0b101, OPC_OP_IMM, "fsri", } // Predicates = [HasStdExtZbt] let Predicates = [HasStdExtZbb] in { -def CLZ : RVBUnary<0b0110000, 0b00000, 0b001, RISCVOpcode<0b0010011>, "clz">, +def CLZ : RVBUnary<0b0110000, 0b00000, 0b001, OPC_OP_IMM, "clz">, Sched<[WriteCLZ, ReadCLZ]>; -def CTZ : RVBUnary<0b0110000, 0b00001, 0b001, RISCVOpcode<0b0010011>, "ctz">, +def CTZ : RVBUnary<0b0110000, 0b00001, 0b001, OPC_OP_IMM, "ctz">, Sched<[WriteCTZ, ReadCTZ]>; -def CPOP : RVBUnary<0b0110000, 0b00010, 0b001, RISCVOpcode<0b0010011>, "cpop">, +def CPOP : RVBUnary<0b0110000, 0b00010, 0b001, OPC_OP_IMM, "cpop">, Sched<[WriteCPOP, ReadCPOP]>; } // Predicates = [HasStdExtZbb] let Predicates = [HasStdExtZbm, IsRV64] in -def BMATFLIP : RVBUnary<0b0110000, 0b00011, 0b001, RISCVOpcode<0b0010011>, - "bmatflip">, Sched<[]>; +def BMATFLIP : RVBUnary<0b0110000, 0b00011, 0b001, OPC_OP_IMM, "bmatflip">, + Sched<[]>; let Predicates = [HasStdExtZbb] in { -def SEXTB : RVBUnary<0b0110000, 0b00100, 0b001, RISCVOpcode<0b0010011>, - "sext.b">, Sched<[WriteIALU, ReadIALU]>; -def SEXTH : RVBUnary<0b0110000, 0b00101, 0b001, RISCVOpcode<0b0010011>, - "sext.h">, Sched<[WriteIALU, ReadIALU]>; +def SEXTB : RVBUnary<0b0110000, 0b00100, 0b001, OPC_OP_IMM, "sext.b">, + Sched<[WriteIALU, ReadIALU]>; +def SEXTH : RVBUnary<0b0110000, 0b00101, 0b001, OPC_OP_IMM, "sext.h">, + Sched<[WriteIALU, ReadIALU]>; } // Predicates = [HasStdExtZbb] let Predicates = [HasStdExtZbr] in { -def CRC32B : RVBUnary<0b0110000, 0b10000, 0b001, RISCVOpcode<0b0010011>, - "crc32.b">, Sched<[]>; -def CRC32H : RVBUnary<0b0110000, 0b10001, 0b001, RISCVOpcode<0b0010011>, - "crc32.h">, Sched<[]>; -def CRC32W : RVBUnary<0b0110000, 0b10010, 0b001, RISCVOpcode<0b0010011>, - "crc32.w">, Sched<[]>; +def CRC32B : RVBUnary<0b0110000, 0b10000, 0b001, OPC_OP_IMM, "crc32.b">, + Sched<[]>; +def CRC32H : RVBUnary<0b0110000, 0b10001, 0b001, OPC_OP_IMM, "crc32.h">, + Sched<[]>; +def CRC32W : RVBUnary<0b0110000, 0b10010, 0b001, OPC_OP_IMM, "crc32.w">, + Sched<[]>; } // Predicates = [HasStdExtZbr] let Predicates = [HasStdExtZbr, IsRV64] in -def CRC32D : RVBUnary<0b0110000, 0b10011, 0b001, RISCVOpcode<0b0010011>, - "crc32.d">, Sched<[]>; +def CRC32D : RVBUnary<0b0110000, 0b10011, 0b001, OPC_OP_IMM, "crc32.d">, + Sched<[]>; let Predicates = [HasStdExtZbr] in { -def CRC32CB : RVBUnary<0b0110000, 0b11000, 0b001, RISCVOpcode<0b0010011>, - "crc32c.b">, Sched<[]>; -def CRC32CH : RVBUnary<0b0110000, 0b11001, 0b001, RISCVOpcode<0b0010011>, - "crc32c.h">, Sched<[]>; -def CRC32CW : RVBUnary<0b0110000, 0b11010, 0b001, RISCVOpcode<0b0010011>, - "crc32c.w">, Sched<[]>; +def CRC32CB : RVBUnary<0b0110000, 0b11000, 0b001, OPC_OP_IMM, "crc32c.b">, + Sched<[]>; +def CRC32CH : RVBUnary<0b0110000, 0b11001, 0b001, OPC_OP_IMM, "crc32c.h">, + Sched<[]>; +def CRC32CW : RVBUnary<0b0110000, 0b11010, 0b001, OPC_OP_IMM, "crc32c.w">, + Sched<[]>; } // Predicates = [HasStdExtZbr] let Predicates = [HasStdExtZbr, IsRV64] in -def CRC32CD : RVBUnary<0b0110000, 0b11011, 0b001, RISCVOpcode<0b0010011>, - "crc32c.d">, Sched<[]>; +def CRC32CD : RVBUnary<0b0110000, 0b11011, 0b001, OPC_OP_IMM, "crc32c.d">, + Sched<[]>; let Predicates = [HasStdExtZbc] in { def CLMUL : ALU_rr<0b0000101, 0b001, "clmul">, Sched<[]>; @@ -523,12 +523,12 @@ def FSRIW : RVBTernaryImm5<0b10, 0b101, OPC_OP_IMM_32, } // Predicates = [HasStdExtZbt, IsRV64] let Predicates = [HasStdExtZbb, IsRV64] in { -def CLZW : RVBUnary<0b0110000, 0b00000, 0b001, RISCVOpcode<0b0011011>, - "clzw">, Sched<[WriteCLZ32, ReadCLZ32]>; -def CTZW : RVBUnary<0b0110000, 0b00001, 0b001, RISCVOpcode<0b0011011>, - "ctzw">, Sched<[WriteCTZ32, ReadCTZ32]>; -def CPOPW : RVBUnary<0b0110000, 0b00010, 0b001, RISCVOpcode<0b0011011>, - "cpopw">, Sched<[WriteCPOP32, ReadCPOP32]>; +def CLZW : RVBUnary<0b0110000, 0b00000, 0b001, OPC_OP_IMM_32, "clzw">, + Sched<[WriteCLZ32, ReadCLZ32]>; +def CTZW : RVBUnary<0b0110000, 0b00001, 0b001, OPC_OP_IMM_32, "ctzw">, + Sched<[WriteCTZ32, ReadCTZ32]>; +def CPOPW : RVBUnary<0b0110000, 0b00010, 0b001, OPC_OP_IMM_32, "cpopw">, + Sched<[WriteCPOP32, ReadCPOP32]>; } // Predicates = [HasStdExtZbb, IsRV64] let Predicates = [HasStdExtZbp, IsRV64] in { @@ -791,6 +791,9 @@ def : Pat<(xor GPR:$rs1, BSETINVMask:$mask), def : Pat<(and (srl GPR:$rs1, uimmlog2xlen:$shamt), (XLenVT 1)), (BEXTI GPR:$rs1, uimmlog2xlen:$shamt)>; +def : Pat<(and (not (srl GPR:$rs1, uimmlog2xlen:$shamt)), (XLenVT 1)), + (XORI (BEXTI GPR:$rs1, uimmlog2xlen:$shamt), (XLenVT 1))>; + def : Pat<(or GPR:$r, BSETINVTwoBitsMask:$i), (BSETI (BSETI GPR:$r, (TrailingZerosXForm BSETINVTwoBitsMask:$i)), (BSETINVTwoBitsMaskHigh BSETINVTwoBitsMask:$i))>; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td index a33494461869..663e44813899 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZfh.td @@ -28,41 +28,6 @@ def riscv_fmv_x_anyexth : SDNode<"RISCVISD::FMV_X_ANYEXTH", SDT_RISCVFMV_X_ANYEXTH>; //===----------------------------------------------------------------------===// -// Instruction class templates -//===----------------------------------------------------------------------===// - -let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -class FPFMAH_rrr_frm<RISCVOpcode opcode, string opcodestr> - : RVInstR4Frm<0b10, opcode, (outs FPR16:$rd), - (ins FPR16:$rs1, FPR16:$rs2, FPR16:$rs3, frmarg:$funct3), - opcodestr, "$rd, $rs1, $rs2, $rs3, $funct3">; - -class FPFMAHDynFrmAlias<FPFMAH_rrr_frm Inst, string OpcodeStr> - : InstAlias<OpcodeStr#" $rd, $rs1, $rs2, $rs3", - (Inst FPR16:$rd, FPR16:$rs1, FPR16:$rs2, FPR16:$rs3, 0b111)>; - -let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -class FPALUH_rr<bits<7> funct7, bits<3> funct3, string opcodestr> - : RVInstR<funct7, funct3, OPC_OP_FP, (outs FPR16:$rd), - (ins FPR16:$rs1, FPR16:$rs2), opcodestr, "$rd, $rs1, $rs2">; - -let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -class FPALUH_rr_frm<bits<7> funct7, string opcodestr> - : RVInstRFrm<funct7, OPC_OP_FP, (outs FPR16:$rd), - (ins FPR16:$rs1, FPR16:$rs2, frmarg:$funct3), opcodestr, - "$rd, $rs1, $rs2, $funct3">; - -class FPALUHDynFrmAlias<FPALUH_rr_frm Inst, string OpcodeStr> - : InstAlias<OpcodeStr#" $rd, $rs1, $rs2", - (Inst FPR16:$rd, FPR16:$rs1, FPR16:$rs2, 0b111)>; - -let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in -class FPCmpH_rr<bits<3> funct3, string opcodestr> - : RVInstR<0b1010010, funct3, OPC_OP_FP, (outs GPR:$rd), - (ins FPR16:$rs1, FPR16:$rs2), opcodestr, "$rd, $rs1, $rs2">, - Sched<[WriteFCmp16, ReadFCmp16, ReadFCmp16]>; - -//===----------------------------------------------------------------------===// // Instructions //===----------------------------------------------------------------------===// @@ -84,145 +49,120 @@ def FSH : RVInstS<0b001, OPC_STORE_FP, (outs), } // Predicates = [HasStdExtZfhmin] let Predicates = [HasStdExtZfh] in { -def FMADD_H : FPFMAH_rrr_frm<OPC_MADD, "fmadd.h">, - Sched<[WriteFMA16, ReadFMA16, ReadFMA16, ReadFMA16]>; -def : FPFMAHDynFrmAlias<FMADD_H, "fmadd.h">; -def FMSUB_H : FPFMAH_rrr_frm<OPC_MSUB, "fmsub.h">, - Sched<[WriteFMA16, ReadFMA16, ReadFMA16, ReadFMA16]>; -def : FPFMAHDynFrmAlias<FMSUB_H, "fmsub.h">; -def FNMSUB_H : FPFMAH_rrr_frm<OPC_NMSUB, "fnmsub.h">, - Sched<[WriteFMA16, ReadFMA16, ReadFMA16, ReadFMA16]>; -def : FPFMAHDynFrmAlias<FNMSUB_H, "fnmsub.h">; -def FNMADD_H : FPFMAH_rrr_frm<OPC_NMADD, "fnmadd.h">, - Sched<[WriteFMA16, ReadFMA16, ReadFMA16, ReadFMA16]>; -def : FPFMAHDynFrmAlias<FNMADD_H, "fnmadd.h">; - -def FADD_H : FPALUH_rr_frm<0b0000010, "fadd.h">, +let SchedRW = [WriteFMA16, ReadFMA16, ReadFMA16, ReadFMA16] in { +def FMADD_H : FPFMA_rrr_frm<OPC_MADD, 0b10, "fmadd.h", FPR16>; +def FMSUB_H : FPFMA_rrr_frm<OPC_MSUB, 0b10, "fmsub.h", FPR16>; +def FNMSUB_H : FPFMA_rrr_frm<OPC_NMSUB, 0b10, "fnmsub.h", FPR16>; +def FNMADD_H : FPFMA_rrr_frm<OPC_NMADD, 0b10, "fnmadd.h", FPR16>; +} + +def : FPFMADynFrmAlias<FMADD_H, "fmadd.h", FPR16>; +def : FPFMADynFrmAlias<FMSUB_H, "fmsub.h", FPR16>; +def : FPFMADynFrmAlias<FNMSUB_H, "fnmsub.h", FPR16>; +def : FPFMADynFrmAlias<FNMADD_H, "fnmadd.h", FPR16>; + +def FADD_H : FPALU_rr_frm<0b0000010, "fadd.h", FPR16>, Sched<[WriteFALU16, ReadFALU16, ReadFALU16]>; -def : FPALUHDynFrmAlias<FADD_H, "fadd.h">; -def FSUB_H : FPALUH_rr_frm<0b0000110, "fsub.h">, +def FSUB_H : FPALU_rr_frm<0b0000110, "fsub.h", FPR16>, Sched<[WriteFALU16, ReadFALU16, ReadFALU16]>; -def : FPALUHDynFrmAlias<FSUB_H, "fsub.h">; -def FMUL_H : FPALUH_rr_frm<0b0001010, "fmul.h">, +def FMUL_H : FPALU_rr_frm<0b0001010, "fmul.h", FPR16>, Sched<[WriteFMul16, ReadFMul16, ReadFMul16]>; -def : FPALUHDynFrmAlias<FMUL_H, "fmul.h">; -def FDIV_H : FPALUH_rr_frm<0b0001110, "fdiv.h">, +def FDIV_H : FPALU_rr_frm<0b0001110, "fdiv.h", FPR16>, Sched<[WriteFDiv16, ReadFDiv16, ReadFDiv16]>; -def : FPALUHDynFrmAlias<FDIV_H, "fdiv.h">; -def FSQRT_H : FPUnaryOp_r_frm<0b0101110, FPR16, FPR16, "fsqrt.h">, - Sched<[WriteFSqrt16, ReadFSqrt16]> { - let rs2 = 0b00000; -} +def : FPALUDynFrmAlias<FADD_H, "fadd.h", FPR16>; +def : FPALUDynFrmAlias<FSUB_H, "fsub.h", FPR16>; +def : FPALUDynFrmAlias<FMUL_H, "fmul.h", FPR16>; +def : FPALUDynFrmAlias<FDIV_H, "fdiv.h", FPR16>; + +def FSQRT_H : FPUnaryOp_r_frm<0b0101110, 0b00000, FPR16, FPR16, "fsqrt.h">, + Sched<[WriteFSqrt16, ReadFSqrt16]>; def : FPUnaryOpDynFrmAlias<FSQRT_H, "fsqrt.h", FPR16, FPR16>; -def FSGNJ_H : FPALUH_rr<0b0010010, 0b000, "fsgnj.h">, - Sched<[WriteFSGNJ16, ReadFSGNJ16, ReadFSGNJ16]>; -def FSGNJN_H : FPALUH_rr<0b0010010, 0b001, "fsgnjn.h">, - Sched<[WriteFSGNJ16, ReadFSGNJ16, ReadFSGNJ16]>; -def FSGNJX_H : FPALUH_rr<0b0010010, 0b010, "fsgnjx.h">, - Sched<[WriteFSGNJ16, ReadFSGNJ16, ReadFSGNJ16]>; -def FMIN_H : FPALUH_rr<0b0010110, 0b000, "fmin.h">, - Sched<[WriteFMinMax16, ReadFMinMax16, ReadFMinMax16]>; -def FMAX_H : FPALUH_rr<0b0010110, 0b001, "fmax.h">, - Sched<[WriteFMinMax16, ReadFMinMax16, ReadFMinMax16]>; - -def FCVT_W_H : FPUnaryOp_r_frm<0b1100010, GPR, FPR16, "fcvt.w.h">, - Sched<[WriteFCvtF16ToI32, ReadFCvtF16ToI32]> { - let rs2 = 0b00000; +let SchedRW = [WriteFSGNJ16, ReadFSGNJ16, ReadFSGNJ16], + mayRaiseFPException = 0 in { +def FSGNJ_H : FPALU_rr<0b0010010, 0b000, "fsgnj.h", FPR16>; +def FSGNJN_H : FPALU_rr<0b0010010, 0b001, "fsgnjn.h", FPR16>; +def FSGNJX_H : FPALU_rr<0b0010010, 0b010, "fsgnjx.h", FPR16>; } -def : FPUnaryOpDynFrmAlias<FCVT_W_H, "fcvt.w.h", GPR, FPR16>; -def FCVT_WU_H : FPUnaryOp_r_frm<0b1100010, GPR, FPR16, "fcvt.wu.h">, - Sched<[WriteFCvtF16ToI32, ReadFCvtF16ToI32]> { - let rs2 = 0b00001; +let SchedRW = [WriteFMinMax16, ReadFMinMax16, ReadFMinMax16] in { +def FMIN_H : FPALU_rr<0b0010110, 0b000, "fmin.h", FPR16>; +def FMAX_H : FPALU_rr<0b0010110, 0b001, "fmax.h", FPR16>; } + +def FCVT_W_H : FPUnaryOp_r_frm<0b1100010, 0b00000, GPR, FPR16, "fcvt.w.h">, + Sched<[WriteFCvtF16ToI32, ReadFCvtF16ToI32]>; +def : FPUnaryOpDynFrmAlias<FCVT_W_H, "fcvt.w.h", GPR, FPR16>; + +def FCVT_WU_H : FPUnaryOp_r_frm<0b1100010, 0b00001, GPR, FPR16, "fcvt.wu.h">, + Sched<[WriteFCvtF16ToI32, ReadFCvtF16ToI32]>; def : FPUnaryOpDynFrmAlias<FCVT_WU_H, "fcvt.wu.h", GPR, FPR16>; -def FCVT_H_W : FPUnaryOp_r_frm<0b1101010, FPR16, GPR, "fcvt.h.w">, - Sched<[WriteFCvtI32ToF16, ReadFCvtI32ToF16]> { - let rs2 = 0b00000; -} +def FCVT_H_W : FPUnaryOp_r_frm<0b1101010, 0b00000, FPR16, GPR, "fcvt.h.w">, + Sched<[WriteFCvtI32ToF16, ReadFCvtI32ToF16]>; def : FPUnaryOpDynFrmAlias<FCVT_H_W, "fcvt.h.w", FPR16, GPR>; -def FCVT_H_WU : FPUnaryOp_r_frm<0b1101010, FPR16, GPR, "fcvt.h.wu">, - Sched<[WriteFCvtI32ToF16, ReadFCvtI32ToF16]> { - let rs2 = 0b00001; -} +def FCVT_H_WU : FPUnaryOp_r_frm<0b1101010, 0b00001, FPR16, GPR, "fcvt.h.wu">, + Sched<[WriteFCvtI32ToF16, ReadFCvtI32ToF16]>; def : FPUnaryOpDynFrmAlias<FCVT_H_WU, "fcvt.h.wu", FPR16, GPR>; } // Predicates = [HasStdExtZfh] let Predicates = [HasStdExtZfhmin] in { -def FCVT_H_S : FPUnaryOp_r_frm<0b0100010, FPR16, FPR32, "fcvt.h.s">, - Sched<[WriteFCvtF32ToF16, ReadFCvtF32ToF16]> { - let rs2 = 0b00000; -} +def FCVT_H_S : FPUnaryOp_r_frm<0b0100010, 0b00000, FPR16, FPR32, "fcvt.h.s">, + Sched<[WriteFCvtF32ToF16, ReadFCvtF32ToF16]>; def : FPUnaryOpDynFrmAlias<FCVT_H_S, "fcvt.h.s", FPR16, FPR32>; -def FCVT_S_H : FPUnaryOp_r<0b0100000, 0b000, FPR32, FPR16, "fcvt.s.h">, - Sched<[WriteFCvtF16ToF32, ReadFCvtF16ToF32]> { - let rs2 = 0b00010; -} +def FCVT_S_H : FPUnaryOp_r<0b0100000, 0b00010, 0b000, FPR32, FPR16, "fcvt.s.h">, + Sched<[WriteFCvtF16ToF32, ReadFCvtF16ToF32]>; -def FMV_X_H : FPUnaryOp_r<0b1110010, 0b000, GPR, FPR16, "fmv.x.h">, - Sched<[WriteFMovF16ToI16, ReadFMovF16ToI16]> { - let rs2 = 0b00000; -} +let mayRaiseFPException = 0 in +def FMV_X_H : FPUnaryOp_r<0b1110010, 0b00000, 0b000, GPR, FPR16, "fmv.x.h">, + Sched<[WriteFMovF16ToI16, ReadFMovF16ToI16]>; -def FMV_H_X : FPUnaryOp_r<0b1111010, 0b000, FPR16, GPR, "fmv.h.x">, - Sched<[WriteFMovI16ToF16, ReadFMovI16ToF16]> { - let rs2 = 0b00000; -} +let mayRaiseFPException = 0 in +def FMV_H_X : FPUnaryOp_r<0b1111010, 0b00000, 0b000, FPR16, GPR, "fmv.h.x">, + Sched<[WriteFMovI16ToF16, ReadFMovI16ToF16]>; } // Predicates = [HasStdExtZfhmin] let Predicates = [HasStdExtZfh] in { -def FEQ_H : FPCmpH_rr<0b010, "feq.h">; -def FLT_H : FPCmpH_rr<0b001, "flt.h">; -def FLE_H : FPCmpH_rr<0b000, "fle.h">; -def FCLASS_H : FPUnaryOp_r<0b1110010, 0b001, GPR, FPR16, "fclass.h">, - Sched<[WriteFClass16, ReadFClass16]> { - let rs2 = 0b00000; +let SchedRW = [WriteFCmp16, ReadFCmp16, ReadFCmp16] in { +def FEQ_H : FPCmp_rr<0b1010010, 0b010, "feq.h", FPR16>; +def FLT_H : FPCmp_rr<0b1010010, 0b001, "flt.h", FPR16>; +def FLE_H : FPCmp_rr<0b1010010, 0b000, "fle.h", FPR16>; } + +let mayRaiseFPException = 0 in +def FCLASS_H : FPUnaryOp_r<0b1110010, 0b00000, 0b001, GPR, FPR16, "fclass.h">, + Sched<[WriteFClass16, ReadFClass16]>; } // Predicates = [HasStdExtZfh] let Predicates = [HasStdExtZfh, IsRV64] in { -def FCVT_L_H : FPUnaryOp_r_frm<0b1100010, GPR, FPR16, "fcvt.l.h">, - Sched<[WriteFCvtF16ToI64, ReadFCvtF16ToI64]> { - let rs2 = 0b00010; -} +def FCVT_L_H : FPUnaryOp_r_frm<0b1100010, 0b00010, GPR, FPR16, "fcvt.l.h">, + Sched<[WriteFCvtF16ToI64, ReadFCvtF16ToI64]>; def : FPUnaryOpDynFrmAlias<FCVT_L_H, "fcvt.l.h", GPR, FPR16>; -def FCVT_LU_H : FPUnaryOp_r_frm<0b1100010, GPR, FPR16, "fcvt.lu.h">, - Sched<[WriteFCvtF16ToI64, ReadFCvtF16ToI64]> { - let rs2 = 0b00011; -} +def FCVT_LU_H : FPUnaryOp_r_frm<0b1100010, 0b00011, GPR, FPR16, "fcvt.lu.h">, + Sched<[WriteFCvtF16ToI64, ReadFCvtF16ToI64]>; def : FPUnaryOpDynFrmAlias<FCVT_LU_H, "fcvt.lu.h", GPR, FPR16>; -def FCVT_H_L : FPUnaryOp_r_frm<0b1101010, FPR16, GPR, "fcvt.h.l">, - Sched<[WriteFCvtI64ToF16, ReadFCvtI64ToF16]> { - let rs2 = 0b00010; -} +def FCVT_H_L : FPUnaryOp_r_frm<0b1101010, 0b00010, FPR16, GPR, "fcvt.h.l">, + Sched<[WriteFCvtI64ToF16, ReadFCvtI64ToF16]>; def : FPUnaryOpDynFrmAlias<FCVT_H_L, "fcvt.h.l", FPR16, GPR>; -def FCVT_H_LU : FPUnaryOp_r_frm<0b1101010, FPR16, GPR, "fcvt.h.lu">, - Sched<[WriteFCvtI64ToF16, ReadFCvtI64ToF16]> { - let rs2 = 0b00011; -} +def FCVT_H_LU : FPUnaryOp_r_frm<0b1101010, 0b00011, FPR16, GPR, "fcvt.h.lu">, + Sched<[WriteFCvtI64ToF16, ReadFCvtI64ToF16]>; def : FPUnaryOpDynFrmAlias<FCVT_H_LU, "fcvt.h.lu", FPR16, GPR>; } // Predicates = [HasStdExtZfh, IsRV64] let Predicates = [HasStdExtZfhmin, HasStdExtD] in { -def FCVT_H_D : FPUnaryOp_r_frm<0b0100010, FPR16, FPR64, "fcvt.h.d">, - Sched<[WriteFCvtF64ToF16, ReadFCvtF64ToF16]> { - let rs2 = 0b00001; -} +def FCVT_H_D : FPUnaryOp_r_frm<0b0100010, 0b00001, FPR16, FPR64, "fcvt.h.d">, + Sched<[WriteFCvtF64ToF16, ReadFCvtF64ToF16]>; def : FPUnaryOpDynFrmAlias<FCVT_H_D, "fcvt.h.d", FPR16, FPR64>; -def FCVT_D_H : FPUnaryOp_r<0b0100001, 0b000, FPR64, FPR16, "fcvt.d.h">, - Sched<[WriteFCvtF16ToF64, ReadFCvtF16ToF64]> { - let rs2 = 0b00010; -} +def FCVT_D_H : FPUnaryOp_r<0b0100001, 0b00010, 0b000, FPR64, FPR16, "fcvt.d.h">, + Sched<[WriteFCvtF16ToF64, ReadFCvtF16ToF64]>; } // Predicates = [HasStdExtZfhmin, HasStdExtD] //===----------------------------------------------------------------------===// @@ -275,12 +215,12 @@ def : Pat<(f16 (fpimm0)), (FMV_H_X X0)>; /// Float arithmetic operations -def : PatFpr16Fpr16DynFrm<fadd, FADD_H>; -def : PatFpr16Fpr16DynFrm<fsub, FSUB_H>; -def : PatFpr16Fpr16DynFrm<fmul, FMUL_H>; -def : PatFpr16Fpr16DynFrm<fdiv, FDIV_H>; +def : PatFpr16Fpr16DynFrm<any_fadd, FADD_H>; +def : PatFpr16Fpr16DynFrm<any_fsub, FSUB_H>; +def : PatFpr16Fpr16DynFrm<any_fmul, FMUL_H>; +def : PatFpr16Fpr16DynFrm<any_fdiv, FDIV_H>; -def : Pat<(fsqrt FPR16:$rs1), (FSQRT_H FPR16:$rs1, 0b111)>; +def : Pat<(any_fsqrt FPR16:$rs1), (FSQRT_H FPR16:$rs1, 0b111)>; def : Pat<(fneg FPR16:$rs1), (FSGNJN_H $rs1, $rs1)>; def : Pat<(fabs FPR16:$rs1), (FSGNJX_H $rs1, $rs1)>; @@ -292,19 +232,19 @@ def : Pat<(fcopysign FPR16:$rs1, FPR32:$rs2), def : Pat<(fcopysign FPR32:$rs1, FPR16:$rs2), (FSGNJ_S $rs1, (FCVT_S_H $rs2))>; // fmadd: rs1 * rs2 + rs3 -def : Pat<(fma FPR16:$rs1, FPR16:$rs2, FPR16:$rs3), +def : Pat<(any_fma FPR16:$rs1, FPR16:$rs2, FPR16:$rs3), (FMADD_H $rs1, $rs2, $rs3, 0b111)>; // fmsub: rs1 * rs2 - rs3 -def : Pat<(fma FPR16:$rs1, FPR16:$rs2, (fneg FPR16:$rs3)), +def : Pat<(any_fma FPR16:$rs1, FPR16:$rs2, (fneg FPR16:$rs3)), (FMSUB_H FPR16:$rs1, FPR16:$rs2, FPR16:$rs3, 0b111)>; // fnmsub: -rs1 * rs2 + rs3 -def : Pat<(fma (fneg FPR16:$rs1), FPR16:$rs2, FPR16:$rs3), +def : Pat<(any_fma (fneg FPR16:$rs1), FPR16:$rs2, FPR16:$rs3), (FNMSUB_H FPR16:$rs1, FPR16:$rs2, FPR16:$rs3, 0b111)>; // fnmadd: -rs1 * rs2 - rs3 -def : Pat<(fma (fneg FPR16:$rs1), FPR16:$rs2, (fneg FPR16:$rs3)), +def : Pat<(any_fma (fneg FPR16:$rs1), FPR16:$rs2, (fneg FPR16:$rs3)), (FNMADD_H FPR16:$rs1, FPR16:$rs2, FPR16:$rs3, 0b111)>; // The ratified 20191213 ISA spec defines fmin and fmax in a way that matches @@ -337,8 +277,8 @@ defm : StPat<store, FSH, FPR16, f16>; /// Float conversion operations // f32 -> f16, f16 -> f32 -def : Pat<(fpround FPR32:$rs1), (FCVT_H_S FPR32:$rs1, 0b111)>; -def : Pat<(fpextend FPR16:$rs1), (FCVT_S_H FPR16:$rs1)>; +def : Pat<(any_fpround FPR32:$rs1), (FCVT_H_S FPR32:$rs1, 0b111)>; +def : Pat<(any_fpextend FPR16:$rs1), (FCVT_S_H FPR16:$rs1)>; // Moves (no conversion) def : Pat<(riscv_fmv_h_x GPR:$src), (FMV_H_X GPR:$src)>; @@ -347,8 +287,8 @@ def : Pat<(riscv_fmv_x_anyexth FPR16:$src), (FMV_X_H FPR16:$src)>; let Predicates = [HasStdExtZfh, IsRV32] in { // half->[u]int. Round-to-zero must be used. -def : Pat<(i32 (fp_to_sint FPR16:$rs1)), (FCVT_W_H $rs1, 0b001)>; -def : Pat<(i32 (fp_to_uint FPR16:$rs1)), (FCVT_WU_H $rs1, 0b001)>; +def : Pat<(i32 (any_fp_to_sint FPR16:$rs1)), (FCVT_W_H $rs1, 0b001)>; +def : Pat<(i32 (any_fp_to_uint FPR16:$rs1)), (FCVT_WU_H $rs1, 0b001)>; // Saturating float->[u]int32. def : Pat<(i32 (riscv_fcvt_x_rtz FPR16:$rs1)), (FCVT_W_H $rs1, 0b001)>; @@ -361,20 +301,20 @@ def : Pat<(i32 (lrint FPR16:$rs1)), (FCVT_W_H $rs1, 0b111)>; def : Pat<(i32 (lround FPR16:$rs1)), (FCVT_W_H $rs1, 0b100)>; // [u]int->half. Match GCC and default to using dynamic rounding mode. -def : Pat<(sint_to_fp (i32 GPR:$rs1)), (FCVT_H_W $rs1, 0b111)>; -def : Pat<(uint_to_fp (i32 GPR:$rs1)), (FCVT_H_WU $rs1, 0b111)>; +def : Pat<(any_sint_to_fp (i32 GPR:$rs1)), (FCVT_H_W $rs1, 0b111)>; +def : Pat<(any_uint_to_fp (i32 GPR:$rs1)), (FCVT_H_WU $rs1, 0b111)>; } // Predicates = [HasStdExtZfh, IsRV32] let Predicates = [HasStdExtZfh, IsRV64] in { // Use target specific isd nodes to help us remember the result is sign // extended. Matching sext_inreg+fptoui/fptosi may cause the conversion to be // duplicated if it has another user that didn't need the sign_extend. -def : Pat<(riscv_fcvt_w_rtz_rv64 FPR16:$rs1), (FCVT_W_H $rs1, 0b001)>; -def : Pat<(riscv_fcvt_wu_rtz_rv64 FPR16:$rs1), (FCVT_WU_H $rs1, 0b001)>; +def : Pat<(riscv_any_fcvt_w_rtz_rv64 FPR16:$rs1), (FCVT_W_H $rs1, 0b001)>; +def : Pat<(riscv_any_fcvt_wu_rtz_rv64 FPR16:$rs1), (FCVT_WU_H $rs1, 0b001)>; // half->[u]int64. Round-to-zero must be used. -def : Pat<(i64 (fp_to_sint FPR16:$rs1)), (FCVT_L_H $rs1, 0b001)>; -def : Pat<(i64 (fp_to_uint FPR16:$rs1)), (FCVT_LU_H $rs1, 0b001)>; +def : Pat<(i64 (any_fp_to_sint FPR16:$rs1)), (FCVT_L_H $rs1, 0b001)>; +def : Pat<(i64 (any_fp_to_uint FPR16:$rs1)), (FCVT_LU_H $rs1, 0b001)>; // Saturating float->[u]int64. def : Pat<(i64 (riscv_fcvt_x_rtz FPR16:$rs1)), (FCVT_L_H $rs1, 0b001)>; @@ -389,17 +329,17 @@ def : Pat<(i64 (lround FPR16:$rs1)), (FCVT_L_H $rs1, 0b100)>; def : Pat<(i64 (llround FPR16:$rs1)), (FCVT_L_H $rs1, 0b100)>; // [u]int->fp. Match GCC and default to using dynamic rounding mode. -def : Pat<(sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_H_W $rs1, 0b111)>; -def : Pat<(uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_H_WU $rs1, 0b111)>; -def : Pat<(sint_to_fp (i64 GPR:$rs1)), (FCVT_H_L $rs1, 0b111)>; -def : Pat<(uint_to_fp (i64 GPR:$rs1)), (FCVT_H_LU $rs1, 0b111)>; +def : Pat<(any_sint_to_fp (i64 (sexti32 (i64 GPR:$rs1)))), (FCVT_H_W $rs1, 0b111)>; +def : Pat<(any_uint_to_fp (i64 (zexti32 (i64 GPR:$rs1)))), (FCVT_H_WU $rs1, 0b111)>; +def : Pat<(any_sint_to_fp (i64 GPR:$rs1)), (FCVT_H_L $rs1, 0b111)>; +def : Pat<(any_uint_to_fp (i64 GPR:$rs1)), (FCVT_H_LU $rs1, 0b111)>; } // Predicates = [HasStdExtZfh, IsRV64] let Predicates = [HasStdExtZfhmin, HasStdExtD] in { /// Float conversion operations // f64 -> f16, f16 -> f64 -def : Pat<(fpround FPR64:$rs1), (FCVT_H_D FPR64:$rs1, 0b111)>; -def : Pat<(fpextend FPR16:$rs1), (FCVT_D_H FPR16:$rs1)>; +def : Pat<(any_fpround FPR64:$rs1), (FCVT_H_D FPR64:$rs1, 0b111)>; +def : Pat<(any_fpextend FPR16:$rs1), (FCVT_D_H FPR16:$rs1)>; /// Float arithmetic operations def : Pat<(fcopysign FPR16:$rs1, FPR64:$rs2), diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp index 798532d5bc44..9094dff1dda1 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -105,7 +105,6 @@ BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const { // Floating point environment registers. markSuperRegs(Reserved, RISCV::FRM); markSuperRegs(Reserved, RISCV::FFLAGS); - markSuperRegs(Reserved, RISCV::FCSR); assert(checkAllSuperRegsMarked(Reserved)); return Reserved; diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td index a56f992d320e..20903b317180 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td @@ -550,16 +550,15 @@ def VRM8NoV0 : VReg<[vint8m8_t, vint16m8_t, vint32m8_t, vint64m8_t, vfloat16m8_t, vfloat32m8_t, vfloat64m8_t], (add V8M8, V16M8, V24M8), 8>; -defvar VMaskVTs = [vbool64_t, vbool32_t, vbool16_t, vbool8_t, - vbool4_t, vbool2_t, vbool1_t]; +defvar VMaskVTs = [vbool1_t, vbool2_t, vbool4_t, vbool8_t, vbool16_t, + vbool32_t, vbool64_t]; def VMV0 : RegisterClass<"RISCV", VMaskVTs, 64, (add V0)> { let Size = 64; } // The register class is added for inline assembly for vector mask types. -def VM : VReg<[vbool1_t, vbool2_t, vbool4_t, vbool8_t, vbool16_t, - vbool32_t, vbool64_t], +def VM : VReg<VMaskVTs, (add (sequence "V%u", 8, 31), (sequence "V%u", 0, 7)), 1>; @@ -578,7 +577,6 @@ foreach m = LMULList.m in { // Special registers def FFLAGS : RISCVReg<0, "fflags">; def FRM : RISCVReg<0, "frm">; -def FCSR : RISCVReg<0, "fcsr">; // Any type register. Used for .insn directives when we don't know what the // register types could be. diff --git a/llvm/lib/Target/RISCV/RISCVSchedRocket.td b/llvm/lib/Target/RISCV/RISCVSchedRocket.td index 14f59152ed42..d5a0932c8778 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedRocket.td +++ b/llvm/lib/Target/RISCV/RISCVSchedRocket.td @@ -16,7 +16,8 @@ def RocketModel : SchedMachineModel { let IssueWidth = 1; // 1 micro-op is dispatched per cycle. let LoadLatency = 3; let MispredictPenalty = 3; - let UnsupportedFeatures = [HasStdExtV, HasStdExtZvamo, HasStdExtZvlsseg]; + let CompleteModel = false; + let UnsupportedFeatures = [HasStdExtV, HasStdExtZvlsseg]; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td index 5b435fcb16a2..7f9d0aabc4ed 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td @@ -15,7 +15,7 @@ def SiFive7Model : SchedMachineModel { let LoadLatency = 3; let MispredictPenalty = 3; let CompleteModel = 0; - let UnsupportedFeatures = [HasStdExtV, HasStdExtZvamo, HasStdExtZvlsseg]; + let UnsupportedFeatures = [HasStdExtV, HasStdExtZvlsseg]; } // The SiFive7 microarchitecture has two pipelines: A and B. diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index deb2a11f98f1..d0330e6984a5 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -51,7 +51,6 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { bool HasStdExtZbt = false; bool HasStdExtV = false; bool HasStdExtZvlsseg = false; - bool HasStdExtZvamo = false; bool HasStdExtZfhmin = false; bool HasStdExtZfh = false; bool HasRV64 = false; @@ -118,7 +117,6 @@ public: bool hasStdExtZbt() const { return HasStdExtZbt; } bool hasStdExtV() const { return HasStdExtV; } bool hasStdExtZvlsseg() const { return HasStdExtZvlsseg; } - bool hasStdExtZvamo() const { return HasStdExtZvamo; } bool hasStdExtZfhmin() const { return HasStdExtZfhmin; } bool hasStdExtZfh() const { return HasStdExtZfh; } bool is64Bit() const { return HasRV64; } diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 56f0952fafc9..c435430a1288 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -162,3 +162,94 @@ InstructionCost RISCVTTIImpl::getGatherScatterOpCost( getMemoryOpCost(Opcode, VTy->getElementType(), Alignment, 0, CostKind, I); return NumLoads * MemOpCost; } + +void RISCVTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, + TTI::UnrollingPreferences &UP, + OptimizationRemarkEmitter *ORE) { + // TODO: More tuning on benchmarks and metrics with changes as needed + // would apply to all settings below to enable performance. + + // Support explicit targets enabled for SiFive with the unrolling preferences + // below + bool UseDefaultPreferences = true; + if (ST->getTuneCPU().contains("sifive-e76") || + ST->getTuneCPU().contains("sifive-s76") || + ST->getTuneCPU().contains("sifive-u74") || + ST->getTuneCPU().contains("sifive-7")) + UseDefaultPreferences = false; + + if (UseDefaultPreferences) + return BasicTTIImplBase::getUnrollingPreferences(L, SE, UP, ORE); + + // Enable Upper bound unrolling universally, not dependant upon the conditions + // below. + UP.UpperBound = true; + + // Disable loop unrolling for Oz and Os. + UP.OptSizeThreshold = 0; + UP.PartialOptSizeThreshold = 0; + if (L->getHeader()->getParent()->hasOptSize()) + return; + + SmallVector<BasicBlock *, 4> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + LLVM_DEBUG(dbgs() << "Loop has:\n" + << "Blocks: " << L->getNumBlocks() << "\n" + << "Exit blocks: " << ExitingBlocks.size() << "\n"); + + // Only allow another exit other than the latch. This acts as an early exit + // as it mirrors the profitability calculation of the runtime unroller. + if (ExitingBlocks.size() > 2) + return; + + // Limit the CFG of the loop body for targets with a branch predictor. + // Allowing 4 blocks permits if-then-else diamonds in the body. + if (L->getNumBlocks() > 4) + return; + + // Don't unroll vectorized loops, including the remainder loop + if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized")) + return; + + // Scan the loop: don't unroll loops with calls as this could prevent + // inlining. + InstructionCost Cost = 0; + for (auto *BB : L->getBlocks()) { + for (auto &I : *BB) { + // Initial setting - Don't unroll loops containing vectorized + // instructions. + if (I.getType()->isVectorTy()) + return; + + if (isa<CallInst>(I) || isa<InvokeInst>(I)) { + if (const Function *F = cast<CallBase>(I).getCalledFunction()) { + if (!isLoweredToCall(F)) + continue; + } + return; + } + + SmallVector<const Value *> Operands(I.operand_values()); + Cost += + getUserCost(&I, Operands, TargetTransformInfo::TCK_SizeAndLatency); + } + } + + LLVM_DEBUG(dbgs() << "Cost of loop: " << Cost << "\n"); + + UP.Partial = true; + UP.Runtime = true; + UP.UnrollRemainder = true; + UP.UnrollAndJam = true; + UP.UnrollAndJamInnerLoopThreshold = 60; + + // Force unrolling small loops can be very useful because of the branch + // taken cost of the backedge. + if (Cost < 12) + UP.Force = true; +} + +void RISCVTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE, + TTI::PeelingPreferences &PP) { + BaseT::getPeelingPreferences(L, SE, PP); +} diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index 675681616d6e..7353496f4684 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -73,6 +73,13 @@ public: llvm_unreachable("Unsupported register kind"); } + void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, + TTI::UnrollingPreferences &UP, + OptimizationRemarkEmitter *ORE); + + void getPeelingPreferences(Loop *L, ScalarEvolution &SE, + TTI::PeelingPreferences &PP); + unsigned getMinVectorRegisterBitWidth() const { return ST->hasVInstructions() ? ST->getMinRVVVectorSizeInBits() : 0; } @@ -178,7 +185,9 @@ public: } unsigned getMaxInterleaveFactor(unsigned VF) { - return ST->getMaxInterleaveFactor(); + // If the loop will not be vectorized, don't interleave the loop. + // Let regular unroll to unroll the loop. + return VF == 1 ? 1 : ST->getMaxInterleaveFactor(); } }; |
