diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2024-01-11 18:29:01 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2024-04-19 21:24:43 +0000 |
| commit | 52e4ee64c25fe0837e9cf783a63e8c214b3180cf (patch) | |
| tree | ef0a98fe51363441060377330e2e714855102bf6 /contrib/llvm-project/llvm/lib/Target | |
| parent | ed89c59104c13195cbbad881f64c6a71f687c1e4 (diff) | |
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target')
60 files changed, 1171 insertions, 647 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index 90e1ce9ddf66..7d2ff146a340 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -256,6 +256,11 @@ void AArch64AsmPrinter::emitStartOfAsmFile(Module &M) { if (BTE->getZExtValue()) Flags |= ELF::GNU_PROPERTY_AARCH64_FEATURE_1_BTI; + if (const auto *GCS = mdconst::extract_or_null<ConstantInt>( + M.getModuleFlag("guarded-control-stack"))) + if (GCS->getZExtValue()) + Flags |= ELF::GNU_PROPERTY_AARCH64_FEATURE_1_GCS; + if (const auto *Sign = mdconst::extract_or_null<ConstantInt>( M.getModuleFlag("sign-return-address"))) if (Sign->getZExtValue()) diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index edc8cc7d4d1e..ea5679b4d5e3 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -6834,10 +6834,10 @@ static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) { return getPackedVectorTypeFromPredicateType( Ctx, Root->getOperand(6)->getValueType(0), /*NumVec=*/4); case Intrinsic::aarch64_sve_ld1udq: - case Intrinsic::aarch64_sve_st1udq: + case Intrinsic::aarch64_sve_st1dq: return EVT(MVT::nxv1i64); case Intrinsic::aarch64_sve_ld1uwq: - case Intrinsic::aarch64_sve_st1uwq: + case Intrinsic::aarch64_sve_st1wq: return EVT(MVT::nxv1i32); } } diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 47e665176e8b..e2d07a096496 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4513,8 +4513,7 @@ static SDValue skipExtensionForVectorMULL(SDValue N, SelectionDAG &DAG) { SDLoc dl(N); SmallVector<SDValue, 8> Ops; for (unsigned i = 0; i != NumElts; ++i) { - ConstantSDNode *C = cast<ConstantSDNode>(N.getOperand(i)); - const APInt &CInt = C->getAPIntValue(); + const APInt &CInt = N.getConstantOperandAPInt(i); // Element types smaller than 32 bits are not legal, so use i32 elements. // The values are implicitly truncated so sext vs. zext doesn't matter. Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32)); diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 1cfbf4737a6f..42b7a6418032 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -4214,6 +4214,9 @@ static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) { switch (FirstOpc) { default: return false; + case AArch64::LDRQui: + case AArch64::LDURQi: + return SecondOpc == AArch64::LDRQui || SecondOpc == AArch64::LDURQi; case AArch64::LDRWui: case AArch64::LDURWi: return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi; diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index b435b3ce03e7..e90b8a8ca7ac 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -1326,10 +1326,14 @@ static int alignTo(int Num, int PowOf2) { static bool mayAlias(MachineInstr &MIa, SmallVectorImpl<MachineInstr *> &MemInsns, AliasAnalysis *AA) { - for (MachineInstr *MIb : MemInsns) - if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false)) + for (MachineInstr *MIb : MemInsns) { + if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false)) { + LLVM_DEBUG(dbgs() << "Aliasing with: "; MIb->dump()); return true; + } + } + LLVM_DEBUG(dbgs() << "No aliases found\n"); return false; } @@ -1757,9 +1761,11 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // Remember any instructions that read/write memory between FirstMI and MI. SmallVector<MachineInstr *, 4> MemInsns; + LLVM_DEBUG(dbgs() << "Find match for: "; FirstMI.dump()); for (unsigned Count = 0; MBBI != E && Count < Limit; MBBI = next_nodbg(MBBI, E)) { MachineInstr &MI = *MBBI; + LLVM_DEBUG(dbgs() << "Analysing 2nd insn: "; MI.dump()); UsedInBetween.accumulate(MI); @@ -1859,6 +1865,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); MemInsns.push_back(&MI); + LLVM_DEBUG(dbgs() << "Offset doesn't fit in immediate, " + << "keep looking.\n"); continue; } // If the alignment requirements of the paired (scaled) instruction @@ -1868,6 +1876,9 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); MemInsns.push_back(&MI); + LLVM_DEBUG(dbgs() + << "Offset doesn't fit due to alignment requirements, " + << "keep looking.\n"); continue; } } @@ -1884,14 +1895,22 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, const bool SameLoadReg = MayLoad && TRI->isSuperOrSubRegisterEq( Reg, getLdStRegOp(MI).getReg()); - // If the Rt of the second instruction was not modified or used between - // the two instructions and none of the instructions between the second - // and first alias with the second, we can combine the second into the - // first. - if (ModifiedRegUnits.available(getLdStRegOp(MI).getReg()) && - !(MI.mayLoad() && !SameLoadReg && - !UsedRegUnits.available(getLdStRegOp(MI).getReg())) && - !mayAlias(MI, MemInsns, AA)) { + // If the Rt of the second instruction (destination register of the + // load) was not modified or used between the two instructions and none + // of the instructions between the second and first alias with the + // second, we can combine the second into the first. + bool RtNotModified = + ModifiedRegUnits.available(getLdStRegOp(MI).getReg()); + bool RtNotUsed = !(MI.mayLoad() && !SameLoadReg && + !UsedRegUnits.available(getLdStRegOp(MI).getReg())); + + LLVM_DEBUG(dbgs() << "Checking, can combine 2nd into 1st insn:\n" + << "Reg '" << getLdStRegOp(MI) << "' not modified: " + << (RtNotModified ? "true" : "false") << "\n" + << "Reg '" << getLdStRegOp(MI) << "' not used: " + << (RtNotUsed ? "true" : "false") << "\n"); + + if (RtNotModified && RtNotUsed && !mayAlias(MI, MemInsns, AA)) { // For pairs loading into the same reg, try to find a renaming // opportunity to allow the renaming of Reg between FirstMI and MI // and combine MI into FirstMI; otherwise bail and keep looking. @@ -1904,6 +1923,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); MemInsns.push_back(&MI); + LLVM_DEBUG(dbgs() << "Can't find reg for renaming, " + << "keep looking.\n"); continue; } Flags.setRenameReg(*RenameReg); @@ -1919,10 +1940,15 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // between the two instructions and none of the instructions between the // first and the second alias with the first, we can combine the first // into the second. - if (!(MayLoad && - !UsedRegUnits.available(getLdStRegOp(FirstMI).getReg())) && - !mayAlias(FirstMI, MemInsns, AA)) { + RtNotModified = !( + MayLoad && !UsedRegUnits.available(getLdStRegOp(FirstMI).getReg())); + LLVM_DEBUG(dbgs() << "Checking, can combine 1st into 2nd insn:\n" + << "Reg '" << getLdStRegOp(FirstMI) + << "' not modified: " + << (RtNotModified ? "true" : "false") << "\n"); + + if (RtNotModified && !mayAlias(FirstMI, MemInsns, AA)) { if (ModifiedRegUnits.available(getLdStRegOp(FirstMI).getReg())) { Flags.setMergeForward(true); Flags.clearRenameReg(); @@ -1938,8 +1964,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, MBBIWithRenameReg = MBBI; } } - // Unable to combine these instructions due to interference in between. - // Keep looking. + LLVM_DEBUG(dbgs() << "Unable to combine these instructions due to " + << "interference in between, keep looking.\n"); } } @@ -1948,16 +1974,20 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // If the instruction wasn't a matching load or store. Stop searching if we // encounter a call instruction that might modify memory. - if (MI.isCall()) + if (MI.isCall()) { + LLVM_DEBUG(dbgs() << "Found a call, stop looking.\n"); return E; + } // Update modified / uses register units. LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); // Otherwise, if the base register is modified, we have no match, so // return early. - if (!ModifiedRegUnits.available(BaseReg)) + if (!ModifiedRegUnits.available(BaseReg)) { + LLVM_DEBUG(dbgs() << "Base reg is modified, stop looking.\n"); return E; + } // Update list of instructions that read/write memory. if (MI.mayLoadOrStore()) diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp index 6fcd9c290e9c..6c6cd120b035 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp @@ -53,7 +53,7 @@ using namespace PatternMatch; #define DEBUG_TYPE "aarch64-loop-idiom-transform" static cl::opt<bool> - DisableAll("disable-aarch64-lit-all", cl::Hidden, cl::init(true), + DisableAll("disable-aarch64-lit-all", cl::Hidden, cl::init(false), cl::desc("Disable AArch64 Loop Idiom Transform Pass.")); static cl::opt<bool> DisableByteCmp( diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index ee10a7d1c706..4782ad076c60 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1397,17 +1397,17 @@ let Predicates = [HasSVEorSME] in { (RegImmInst Z_q:$Zt, PPR3bAny:$Pg, GPR64sp:$base, (i64 0))>; } - // ld1quw/st1quw + // ld1quw/st1qw defm : sve_ld1q_pat<nxv4i32, nxv1i1, int_aarch64_sve_ld1uwq, LD1W_Q, LD1W_Q_IMM, am_sve_regreg_lsl2>; defm : sve_ld1q_pat<nxv4f32, nxv1i1, int_aarch64_sve_ld1uwq, LD1W_Q, LD1W_Q_IMM, am_sve_regreg_lsl2>; - defm : sve_st1q_pat<nxv4i32, nxv1i1, int_aarch64_sve_st1uwq, ST1W_Q, ST1W_Q_IMM, am_sve_regreg_lsl2>; - defm : sve_st1q_pat<nxv4f32, nxv1i1, int_aarch64_sve_st1uwq, ST1W_Q, ST1W_Q_IMM, am_sve_regreg_lsl2>; + defm : sve_st1q_pat<nxv4i32, nxv1i1, int_aarch64_sve_st1wq, ST1W_Q, ST1W_Q_IMM, am_sve_regreg_lsl2>; + defm : sve_st1q_pat<nxv4f32, nxv1i1, int_aarch64_sve_st1wq, ST1W_Q, ST1W_Q_IMM, am_sve_regreg_lsl2>; - // ld1qud/st1qud + // ld1qud/st1qd defm : sve_ld1q_pat<nxv2i64, nxv1i1, int_aarch64_sve_ld1udq, LD1D_Q, LD1D_Q_IMM, am_sve_regreg_lsl3>; defm : sve_ld1q_pat<nxv2f64, nxv1i1, int_aarch64_sve_ld1udq, LD1D_Q, LD1D_Q_IMM, am_sve_regreg_lsl3>; - defm : sve_st1q_pat<nxv2i64, nxv1i1, int_aarch64_sve_st1udq, ST1D_Q, ST1D_Q_IMM, am_sve_regreg_lsl3>; - defm : sve_st1q_pat<nxv2f64, nxv1i1, int_aarch64_sve_st1udq, ST1D_Q, ST1D_Q_IMM, am_sve_regreg_lsl3>; + defm : sve_st1q_pat<nxv2i64, nxv1i1, int_aarch64_sve_st1dq, ST1D_Q, ST1D_Q_IMM, am_sve_regreg_lsl3>; + defm : sve_st1q_pat<nxv2f64, nxv1i1, int_aarch64_sve_st1dq, ST1D_Q, ST1D_Q_IMM, am_sve_regreg_lsl3>; } // End HasSVEorSME @@ -4006,7 +4006,9 @@ defm WHILEHS_CXX : sve2p1_int_while_rr_pn<"whilehs", 0b100>; defm WHILEHI_CXX : sve2p1_int_while_rr_pn<"whilehi", 0b101>; defm WHILELO_CXX : sve2p1_int_while_rr_pn<"whilelo", 0b110>; defm WHILELS_CXX : sve2p1_int_while_rr_pn<"whilels", 0b111>; +} // End HasSVE2p1_or_HasSME2 +let Predicates = [HasSVEorSME] in { // Aliases for existing SVE instructions for which predicate-as-counter are // accepted as an operand to the instruction @@ -4025,7 +4027,7 @@ def : InstAlias<"mov $Pd, $Pn", def : InstAlias<"pfalse\t$Pd", (PFALSE PNRasPPR8:$Pd), 0>; -} // End HasSVE2p1_or_HasSME2 +} //===----------------------------------------------------------------------===// // Non-widening BFloat16 to BFloat16 instructions @@ -4095,7 +4097,7 @@ defm FMAXQV : sve2p1_fp_reduction_q<0b110, "fmaxqv", int_aarch64_sve_fmaxqv>; defm FMINQV : sve2p1_fp_reduction_q<0b111, "fminqv", int_aarch64_sve_fminqv>; defm DUPQ_ZZI : sve2p1_dupq<"dupq">; -defm EXTQ_ZZI : sve2p1_extq<"extq", int_aarch64_sve_extq_lane>; +defm EXTQ_ZZI : sve2p1_extq<"extq", int_aarch64_sve_extq>; defm PMOV_PZI : sve2p1_vector_to_pred<"pmov", int_aarch64_sve_pmov_to_pred_lane, int_aarch64_sve_pmov_to_pred_lane_zero>; defm PMOV_ZIP : sve2p1_pred_to_vector<"pmov", int_aarch64_sve_pmov_to_vector_lane_merging, int_aarch64_sve_pmov_to_vector_lane_zeroing>; diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index b5b8b6829178..13b5e578391d 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1406,9 +1406,23 @@ static std::optional<Instruction *> instCombineSVEAllActive(IntrinsicInst &II, return &II; } +// Simplify operations where predicate has all inactive lanes or try to replace +// with _u form when all lanes are active +static std::optional<Instruction *> +instCombineSVEAllOrNoActive(InstCombiner &IC, IntrinsicInst &II, + Intrinsic::ID IID) { + if (match(II.getOperand(0), m_ZeroInt())) { + // llvm_ir, pred(0), op1, op2 - Spec says to return op1 when all lanes are + // inactive for sv[func]_m + return IC.replaceInstUsesWith(II, II.getOperand(1)); + } + return instCombineSVEAllActive(II, IID); +} + static std::optional<Instruction *> instCombineSVEVectorAdd(InstCombiner &IC, IntrinsicInst &II) { - if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_add_u)) + if (auto II_U = + instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_add_u)) return II_U; if (auto MLA = instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul, Intrinsic::aarch64_sve_mla>( @@ -1423,7 +1437,8 @@ static std::optional<Instruction *> instCombineSVEVectorAdd(InstCombiner &IC, static std::optional<Instruction *> instCombineSVEVectorFAdd(InstCombiner &IC, IntrinsicInst &II) { - if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fadd_u)) + if (auto II_U = + instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fadd_u)) return II_U; if (auto FMLA = instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul, @@ -1465,7 +1480,8 @@ instCombineSVEVectorFAddU(InstCombiner &IC, IntrinsicInst &II) { static std::optional<Instruction *> instCombineSVEVectorFSub(InstCombiner &IC, IntrinsicInst &II) { - if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fsub_u)) + if (auto II_U = + instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fsub_u)) return II_U; if (auto FMLS = instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul, @@ -1507,7 +1523,8 @@ instCombineSVEVectorFSubU(InstCombiner &IC, IntrinsicInst &II) { static std::optional<Instruction *> instCombineSVEVectorSub(InstCombiner &IC, IntrinsicInst &II) { - if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_sub_u)) + if (auto II_U = + instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_sub_u)) return II_U; if (auto MLS = instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul, Intrinsic::aarch64_sve_mls>( @@ -1523,11 +1540,6 @@ static std::optional<Instruction *> instCombineSVEVectorMul(InstCombiner &IC, auto *OpMultiplicand = II.getOperand(1); auto *OpMultiplier = II.getOperand(2); - // Canonicalise a non _u intrinsic only. - if (II.getIntrinsicID() != IID) - if (auto II_U = instCombineSVEAllActive(II, IID)) - return II_U; - // Return true if a given instruction is a unit splat value, false otherwise. auto IsUnitSplat = [](auto *I) { auto *SplatValue = getSplatValue(I); @@ -1891,34 +1903,38 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, case Intrinsic::aarch64_sve_ptest_last: return instCombineSVEPTest(IC, II); case Intrinsic::aarch64_sve_fabd: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fabd_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fabd_u); case Intrinsic::aarch64_sve_fadd: return instCombineSVEVectorFAdd(IC, II); case Intrinsic::aarch64_sve_fadd_u: return instCombineSVEVectorFAddU(IC, II); case Intrinsic::aarch64_sve_fdiv: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fdiv_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fdiv_u); case Intrinsic::aarch64_sve_fmax: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmax_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmax_u); case Intrinsic::aarch64_sve_fmaxnm: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmaxnm_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmaxnm_u); case Intrinsic::aarch64_sve_fmin: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmin_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmin_u); case Intrinsic::aarch64_sve_fminnm: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fminnm_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fminnm_u); case Intrinsic::aarch64_sve_fmla: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmla_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmla_u); case Intrinsic::aarch64_sve_fmls: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmls_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmls_u); case Intrinsic::aarch64_sve_fmul: + if (auto II_U = + instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmul_u)) + return II_U; + return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_fmul_u); case Intrinsic::aarch64_sve_fmul_u: return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_fmul_u); case Intrinsic::aarch64_sve_fmulx: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmulx_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmulx_u); case Intrinsic::aarch64_sve_fnmla: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fnmla_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fnmla_u); case Intrinsic::aarch64_sve_fnmls: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fnmls_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fnmls_u); case Intrinsic::aarch64_sve_fsub: return instCombineSVEVectorFSub(IC, II); case Intrinsic::aarch64_sve_fsub_u: @@ -1930,20 +1946,24 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, Intrinsic::aarch64_sve_mla_u>( IC, II, true); case Intrinsic::aarch64_sve_mla: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_mla_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_mla_u); case Intrinsic::aarch64_sve_mls: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_mls_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_mls_u); case Intrinsic::aarch64_sve_mul: + if (auto II_U = + instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_mul_u)) + return II_U; + return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_mul_u); case Intrinsic::aarch64_sve_mul_u: return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_mul_u); case Intrinsic::aarch64_sve_sabd: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_sabd_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_sabd_u); case Intrinsic::aarch64_sve_smax: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_smax_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_smax_u); case Intrinsic::aarch64_sve_smin: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_smin_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_smin_u); case Intrinsic::aarch64_sve_smulh: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_smulh_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_smulh_u); case Intrinsic::aarch64_sve_sub: return instCombineSVEVectorSub(IC, II); case Intrinsic::aarch64_sve_sub_u: @@ -1951,31 +1971,31 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, Intrinsic::aarch64_sve_mls_u>( IC, II, true); case Intrinsic::aarch64_sve_uabd: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_uabd_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_uabd_u); case Intrinsic::aarch64_sve_umax: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_umax_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_umax_u); case Intrinsic::aarch64_sve_umin: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_umin_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_umin_u); case Intrinsic::aarch64_sve_umulh: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_umulh_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_umulh_u); case Intrinsic::aarch64_sve_asr: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_asr_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_asr_u); case Intrinsic::aarch64_sve_lsl: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_lsl_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_lsl_u); case Intrinsic::aarch64_sve_lsr: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_lsr_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_lsr_u); case Intrinsic::aarch64_sve_and: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_and_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_and_u); case Intrinsic::aarch64_sve_bic: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_bic_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_bic_u); case Intrinsic::aarch64_sve_eor: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_eor_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_eor_u); case Intrinsic::aarch64_sve_orr: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_orr_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_orr_u); case Intrinsic::aarch64_sve_sqsub: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_sqsub_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_sqsub_u); case Intrinsic::aarch64_sve_uqsub: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_uqsub_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_uqsub_u); case Intrinsic::aarch64_sve_tbl: return instCombineSVETBL(IC, II); case Intrinsic::aarch64_sve_uunpkhi: diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index b657a0954d78..302116447efc 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -1166,7 +1166,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) getActionDefinitionsBuilder(G_FMAD).lower(); // Access to floating-point environment. - getActionDefinitionsBuilder({G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE}) + getActionDefinitionsBuilder({G_GET_FPENV, G_SET_FPENV, G_RESET_FPENV, + G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE}) .libcall(); getActionDefinitionsBuilder(G_IS_FPCLASS).lower(); diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp index 496ab18e9b19..6e074b6a63c4 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp @@ -120,7 +120,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, assert((!Target.getSymA() || Target.getSymA()->getKind() == MCSymbolRefExpr::VK_None || - Target.getSymA()->getKind() == MCSymbolRefExpr::VK_PLT) && + Target.getSymA()->getKind() == MCSymbolRefExpr::VK_PLT || + Target.getSymA()->getKind() == MCSymbolRefExpr::VK_GOTPCREL) && "Should only be expression-level modifiers here"); assert((!Target.getSymB() || @@ -206,7 +207,10 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, case FK_Data_2: return R_CLS(ABS16); case FK_Data_4: - return R_CLS(ABS32); + return (!IsILP32 && + Target.getAccessVariant() == MCSymbolRefExpr::VK_GOTPCREL) + ? ELF::R_AARCH64_GOTPCREL32 + : R_CLS(ABS32); case FK_Data_8: if (IsILP32) { Ctx.reportError(Fixup.getLoc(), diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/SMEInstrFormats.td b/contrib/llvm-project/llvm/lib/Target/AArch64/SMEInstrFormats.td index 70f3c2c99f0f..44d9a8ac7cb6 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -1268,7 +1268,7 @@ multiclass sve2_int_perm_revd<string asm, SDPatternOperator op> { } class sve2_clamp<string asm, bits<2> sz, bit U, ZPRRegOp zpr_ty> - : I<(outs zpr_ty:$Zd), (ins zpr_ty:$Zn, zpr_ty:$Zm, zpr_ty:$_Zd), + : I<(outs zpr_ty:$Zd), (ins zpr_ty:$_Zd, zpr_ty:$Zn, zpr_ty:$Zm), asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> { bits<5> Zm; diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCombine.td index 0c77fe725958..b9411e205212 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCombine.td +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUCombine.td @@ -111,7 +111,7 @@ def smulu64 : GICombineRule< [{ return matchCombine_s_mul_u64(*${smul}, ${matchinfo}); }]), (apply [{ applyCombine_s_mul_u64(*${smul}, ${matchinfo}); }])>; -def sign_exension_in_reg_matchdata : GIDefMatchData<"MachineInstr *">; +def sign_exension_in_reg_matchdata : GIDefMatchData<"std::pair<MachineInstr *, unsigned>">; def sign_extension_in_reg : GICombineRule< (defs root:$sign_inreg, sign_exension_in_reg_matchdata:$matchinfo), diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 719ae2e8750c..41462d7a133e 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -1579,13 +1579,9 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, bool AMDGPUDAGToDAGISel::SelectBUFSOffset(SDValue ByteOffsetNode, SDValue &SOffset) const { - if (Subtarget->hasRestrictedSOffset()) { - if (auto SOffsetConst = dyn_cast<ConstantSDNode>(ByteOffsetNode)) { - if (SOffsetConst->isZero()) { - SOffset = CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32); - return true; - } - } + if (Subtarget->hasRestrictedSOffset() && isNullConstant(ByteOffsetNode)) { + SOffset = CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32); + return true; } SOffset = ByteOffsetNode; @@ -2483,7 +2479,7 @@ void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) { SDValue PtrBase = Ptr.getOperand(0); SDValue PtrOffset = Ptr.getOperand(1); - const APInt &OffsetVal = cast<ConstantSDNode>(PtrOffset)->getAPIntValue(); + const APInt &OffsetVal = PtrOffset->getAsAPIntVal(); if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue())) { N = glueCopyToM0(N, PtrBase); Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32); diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp index d2a02143e4e7..5762f1906a16 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp @@ -1026,6 +1026,51 @@ public: return N; } + /// Strip "amdgpu-no-lds-kernel-id" from any functions where we may have + /// introduced its use. If AMDGPUAttributor ran prior to the pass, we inferred + /// the lack of llvm.amdgcn.lds.kernel.id calls. + void removeNoLdsKernelIdFromReachable(CallGraph &CG, Function *KernelRoot) { + KernelRoot->removeFnAttr("amdgpu-no-lds-kernel-id"); + + SmallVector<Function *> Tmp({CG[KernelRoot]->getFunction()}); + if (!Tmp.back()) + return; + + SmallPtrSet<Function *, 8> Visited; + bool SeenUnknownCall = false; + + do { + Function *F = Tmp.pop_back_val(); + + for (auto &N : *CG[F]) { + if (!N.second) + continue; + + Function *Callee = N.second->getFunction(); + if (!Callee) { + if (!SeenUnknownCall) { + SeenUnknownCall = true; + + // If we see any indirect calls, assume nothing about potential + // targets. + // TODO: This could be refined to possible LDS global users. + for (auto &N : *CG.getExternalCallingNode()) { + Function *PotentialCallee = N.second->getFunction(); + if (!isKernelLDS(PotentialCallee)) + PotentialCallee->removeFnAttr("amdgpu-no-lds-kernel-id"); + } + + continue; + } + } + + Callee->removeFnAttr("amdgpu-no-lds-kernel-id"); + if (Visited.insert(Callee).second) + Tmp.push_back(Callee); + } + } while (!Tmp.empty()); + } + DenseMap<Function *, GlobalVariable *> lowerDynamicLDSVariables( Module &M, LDSUsesInfoTy &LDSUsesInfo, DenseSet<Function *> const &KernelsThatIndirectlyAllocateDynamicLDS, @@ -1175,6 +1220,13 @@ public: M, TableLookupVariablesOrdered, OrderedKernels, KernelToReplacement); replaceUsesInInstructionsWithTableLookup(M, TableLookupVariablesOrdered, LookupTable); + + // Strip amdgpu-no-lds-kernel-id from all functions reachable from the + // kernel. We may have inferred this wasn't used prior to the pass. + // + // TODO: We could filter out subgraphs that do not access LDS globals. + for (Function *F : KernelsThatAllocateTableLDS) + removeNoLdsKernelIdFromReachable(CG, F); } DenseMap<Function *, GlobalVariable *> KernelToCreatedDynamicLDS = diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp index 21bfab52c6c4..bb1d6cb72e80 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp @@ -99,10 +99,10 @@ public: // Combine unsigned buffer load and signed extension instructions to generate // signed buffer laod instructions. - bool matchCombineSignExtendInReg(MachineInstr &MI, - MachineInstr *&MatchInfo) const; - void applyCombineSignExtendInReg(MachineInstr &MI, - MachineInstr *&MatchInfo) const; + bool matchCombineSignExtendInReg( + MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchInfo) const; + void applyCombineSignExtendInReg( + MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchInfo) const; // Find the s_mul_u64 instructions where the higher bits are either // zero-extended or sign-extended. @@ -395,34 +395,36 @@ bool AMDGPUPostLegalizerCombinerImpl::matchRemoveFcanonicalize( // Identify buffer_load_{u8, u16}. bool AMDGPUPostLegalizerCombinerImpl::matchCombineSignExtendInReg( - MachineInstr &MI, MachineInstr *&SubwordBufferLoad) const { - Register Op0Reg = MI.getOperand(1).getReg(); - SubwordBufferLoad = MRI.getVRegDef(Op0Reg); - - if (!MRI.hasOneNonDBGUse(Op0Reg)) + MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchData) const { + Register LoadReg = MI.getOperand(1).getReg(); + if (!MRI.hasOneNonDBGUse(LoadReg)) return false; // Check if the first operand of the sign extension is a subword buffer load // instruction. - return SubwordBufferLoad->getOpcode() == AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE || - SubwordBufferLoad->getOpcode() == AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT; + MachineInstr *LoadMI = MRI.getVRegDef(LoadReg); + int64_t Width = MI.getOperand(2).getImm(); + switch (LoadMI->getOpcode()) { + case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE: + MatchData = {LoadMI, AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE}; + return Width == 8; + case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT: + MatchData = {LoadMI, AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT}; + return Width == 16; + } + return false; } // Combine buffer_load_{u8, u16} and the sign extension instruction to generate // buffer_load_{i8, i16}. void AMDGPUPostLegalizerCombinerImpl::applyCombineSignExtendInReg( - MachineInstr &MI, MachineInstr *&SubwordBufferLoad) const { - // Modify the opcode and the destination of buffer_load_{u8, u16}: - // Replace the opcode. - unsigned Opc = - SubwordBufferLoad->getOpcode() == AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE - ? AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE - : AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT; - SubwordBufferLoad->setDesc(TII.get(Opc)); - // Update the destination register of SubwordBufferLoad with the destination - // register of the sign extension. + MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchData) const { + auto [LoadMI, NewOpcode] = MatchData; + LoadMI->setDesc(TII.get(NewOpcode)); + // Update the destination register of the load with the destination register + // of the sign extension. Register SignExtendInsnDst = MI.getOperand(0).getReg(); - SubwordBufferLoad->getOperand(0).setReg(SignExtendInsnDst); + LoadMI->getOperand(0).setReg(SignExtendInsnDst); // Remove the sign extension. MI.eraseFromParent(); } diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index b7f043860115..ba79affe683d 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -1342,10 +1342,8 @@ private: unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); bool ParseRegRange(unsigned& Num, unsigned& Width); - unsigned getRegularReg(RegisterKind RegKind, - unsigned RegNum, - unsigned RegWidth, - SMLoc Loc); + unsigned getRegularReg(RegisterKind RegKind, unsigned RegNum, unsigned SubReg, + unsigned RegWidth, SMLoc Loc); bool isRegister(); bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; @@ -2616,6 +2614,8 @@ AMDGPUAsmParser::isRegister(const AsmToken &Token, StringRef RegName = Reg->Name; StringRef RegSuffix = Str.substr(RegName.size()); if (!RegSuffix.empty()) { + RegSuffix.consume_back(".l"); + RegSuffix.consume_back(".h"); unsigned Num; // A single register with an index: rXX if (getRegNum(RegSuffix, Num)) @@ -2636,12 +2636,9 @@ AMDGPUAsmParser::isRegister() return isRegister(getToken(), peekToken()); } -unsigned -AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, - unsigned RegNum, - unsigned RegWidth, - SMLoc Loc) { - +unsigned AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum, + unsigned SubReg, unsigned RegWidth, + SMLoc Loc) { assert(isRegularReg(RegKind)); unsigned AlignSize = 1; @@ -2670,7 +2667,17 @@ AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, return AMDGPU::NoRegister; } - return RC.getRegister(RegIdx); + unsigned Reg = RC.getRegister(RegIdx); + + if (SubReg) { + Reg = TRI->getSubReg(Reg, SubReg); + + // Currently all regular registers have their .l and .h subregisters, so + // we should never need to generate an error here. + assert(Reg && "Invalid subregister!"); + } + + return Reg; } bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) { @@ -2748,7 +2755,17 @@ unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, RegKind = RI->Kind; StringRef RegSuffix = RegName.substr(RI->Name.size()); + unsigned SubReg = NoSubRegister; if (!RegSuffix.empty()) { + // We don't know the opcode till we are done parsing, so we don't know if + // registers should be 16 or 32 bit. It is therefore mandatory to put .l or + // .h to correctly specify 16 bit registers. We also can't determine class + // VGPR_16_Lo128 or VGPR_16, so always parse them as VGPR_16. + if (RegSuffix.consume_back(".l")) + SubReg = AMDGPU::lo16; + else if (RegSuffix.consume_back(".h")) + SubReg = AMDGPU::hi16; + // Single 32-bit register: vXX. if (!getRegNum(RegSuffix, RegNum)) { Error(Loc, "invalid register index"); @@ -2761,7 +2778,7 @@ unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, return AMDGPU::NoRegister; } - return getRegularReg(RegKind, RegNum, RegWidth, Loc); + return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc); } unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, @@ -2813,7 +2830,7 @@ unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, } if (isRegularReg(RegKind)) - Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); + Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc); return Reg; } diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index a7d8ff0242b8..bcd93e30d6c2 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -1450,20 +1450,27 @@ bool GCNHazardRecognizer::fixLdsDirectVMEMHazard(MachineInstr *MI) { return false; return I.readsRegister(VDSTReg, &TRI) || I.modifiesRegister(VDSTReg, &TRI); }; - auto IsExpiredFn = [](const MachineInstr &I, int) { + bool LdsdirCanWait = ST.hasLdsWaitVMSRC(); + auto IsExpiredFn = [this, LdsdirCanWait](const MachineInstr &I, int) { return SIInstrInfo::isVALU(I) || SIInstrInfo::isEXP(I) || (I.getOpcode() == AMDGPU::S_WAITCNT && !I.getOperand(0).getImm()) || (I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR && - AMDGPU::DepCtr::decodeFieldVmVsrc(I.getOperand(0).getImm()) == 0); + AMDGPU::DepCtr::decodeFieldVmVsrc(I.getOperand(0).getImm()) == 0) || + (LdsdirCanWait && SIInstrInfo::isLDSDIR(I) && + !TII.getNamedOperand(I, AMDGPU::OpName::waitvsrc)->getImm()); }; if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) == std::numeric_limits<int>::max()) return false; - BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), - TII.get(AMDGPU::S_WAITCNT_DEPCTR)) - .addImm(AMDGPU::DepCtr::encodeFieldVmVsrc(0)); + if (LdsdirCanWait) { + TII.getNamedOperand(*MI, AMDGPU::OpName::waitvsrc)->setImm(0); + } else { + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + TII.get(AMDGPU::S_WAITCNT_DEPCTR)) + .addImm(AMDGPU::DepCtr::encodeFieldVmVsrc(0)); + } return true; } diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSubtarget.h index f6f37f5170a4..85d062a9a6f5 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -1128,6 +1128,8 @@ public: bool hasLdsDirect() const { return getGeneration() >= GFX11; } + bool hasLdsWaitVMSRC() const { return getGeneration() >= GFX12; } + bool hasVALUPartialForwardingHazard() const { return getGeneration() >= GFX11; } diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp index d539d75fdff0..201cc8d01e2d 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp @@ -31,7 +31,6 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT, InlineAsmEnd = ";#ASMEND"; //===--- Data Emission Directives -------------------------------------===// - SunStyleELFSectionSwitchSyntax = true; UsesELFSectionDirectiveForBSS = true; //===--- Global Variable Emission Directives --------------------------===// diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 6ddc7e864fb2..5a9222e91588 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -8181,12 +8181,8 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, // SGPR_NULL to avoid generating an extra s_mov with zero. static SDValue selectSOffset(SDValue SOffset, SelectionDAG &DAG, const GCNSubtarget *Subtarget) { - if (Subtarget->hasRestrictedSOffset()) - if (auto SOffsetConst = dyn_cast<ConstantSDNode>(SOffset)) { - if (SOffsetConst->isZero()) { - return DAG.getRegister(AMDGPU::SGPR_NULL, MVT::i32); - } - } + if (Subtarget->hasRestrictedSOffset() && isNullConstant(SOffset)) + return DAG.getRegister(AMDGPU::SGPR_NULL, MVT::i32); return SOffset; } diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 1cb1d32707f2..1f480c248154 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -292,7 +292,7 @@ public: VgprVmemTypes[GprNo] = 0; } - void setNonKernelFunctionInitialState() { + void setStateOnFunctionEntryOrReturn() { setScoreUB(VS_CNT, getWaitCountMax(VS_CNT)); PendingEvents |= WaitEventMaskForInst[VS_CNT]; } @@ -1487,6 +1487,7 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst, if (callWaitsOnFunctionReturn(Inst)) { // Act as a wait on everything ScoreBrackets->applyWaitcnt(AMDGPU::Waitcnt::allZeroExceptVsCnt()); + ScoreBrackets->setStateOnFunctionEntryOrReturn(); } else { // May need to way wait for anything. ScoreBrackets->applyWaitcnt(AMDGPU::Waitcnt()); @@ -1879,7 +1880,7 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) { auto NonKernelInitialState = std::make_unique<WaitcntBrackets>(ST, Limits, Encoding); - NonKernelInitialState->setNonKernelFunctionInitialState(); + NonKernelInitialState->setStateOnFunctionEntryOrReturn(); BlockInfos[&EntryBB].Incoming = std::move(NonKernelInitialState); Modified = true; diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index fee900b3efb2..e50f5f28e030 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -5276,10 +5276,15 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const { case AMDGPU::S_FLOOR_F32: return AMDGPU::V_FLOOR_F32_e64; case AMDGPU::S_TRUNC_F32: return AMDGPU::V_TRUNC_F32_e64; case AMDGPU::S_RNDNE_F32: return AMDGPU::V_RNDNE_F32_e64; - case AMDGPU::S_CEIL_F16: return AMDGPU::V_CEIL_F16_t16_e64; - case AMDGPU::S_FLOOR_F16: return AMDGPU::V_FLOOR_F16_t16_e64; - case AMDGPU::S_TRUNC_F16: return AMDGPU::V_TRUNC_F16_t16_e64; - case AMDGPU::S_RNDNE_F16: return AMDGPU::V_RNDNE_F16_t16_e64; + case AMDGPU::S_CEIL_F16: + return ST.useRealTrue16Insts() ? AMDGPU::V_CEIL_F16_t16_e64 + : AMDGPU::V_CEIL_F16_fake16_e64; + case AMDGPU::S_FLOOR_F16: + return AMDGPU::V_FLOOR_F16_fake16_e64; + case AMDGPU::S_TRUNC_F16: + return AMDGPU::V_TRUNC_F16_fake16_e64; + case AMDGPU::S_RNDNE_F16: + return AMDGPU::V_RNDNE_F16_fake16_e64; case AMDGPU::S_ADD_F32: return AMDGPU::V_ADD_F32_e64; case AMDGPU::S_SUB_F32: return AMDGPU::V_SUB_F32_e64; case AMDGPU::S_MIN_F32: return AMDGPU::V_MIN_F32_e64; @@ -5328,15 +5333,15 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const { case AMDGPU::S_CMP_NEQ_F16: return AMDGPU::V_CMP_NEQ_F16_t16_e64; case AMDGPU::S_CMP_NLT_F16: return AMDGPU::V_CMP_NLT_F16_t16_e64; case AMDGPU::V_S_EXP_F32_e64: return AMDGPU::V_EXP_F32_e64; - case AMDGPU::V_S_EXP_F16_e64: return AMDGPU::V_EXP_F16_t16_e64; + case AMDGPU::V_S_EXP_F16_e64: return AMDGPU::V_EXP_F16_fake16_e64; case AMDGPU::V_S_LOG_F32_e64: return AMDGPU::V_LOG_F32_e64; - case AMDGPU::V_S_LOG_F16_e64: return AMDGPU::V_LOG_F16_t16_e64; + case AMDGPU::V_S_LOG_F16_e64: return AMDGPU::V_LOG_F16_fake16_e64; case AMDGPU::V_S_RCP_F32_e64: return AMDGPU::V_RCP_F32_e64; - case AMDGPU::V_S_RCP_F16_e64: return AMDGPU::V_RCP_F16_t16_e64; + case AMDGPU::V_S_RCP_F16_e64: return AMDGPU::V_RCP_F16_fake16_e64; case AMDGPU::V_S_RSQ_F32_e64: return AMDGPU::V_RSQ_F32_e64; - case AMDGPU::V_S_RSQ_F16_e64: return AMDGPU::V_RSQ_F16_t16_e64; + case AMDGPU::V_S_RSQ_F16_e64: return AMDGPU::V_RSQ_F16_fake16_e64; case AMDGPU::V_S_SQRT_F32_e64: return AMDGPU::V_SQRT_F32_e64; - case AMDGPU::V_S_SQRT_F16_e64: return AMDGPU::V_SQRT_F16_t16_e64; + case AMDGPU::V_S_SQRT_F16_e64: return AMDGPU::V_SQRT_F16_fake16_e64; } llvm_unreachable( "Unexpected scalar opcode without corresponding vector one!"); @@ -7266,8 +7271,14 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist, if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src0_modifiers) >= 0) NewInstr.addImm(0); - if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src0) >= 0) - NewInstr->addOperand(Inst.getOperand(1)); + if (AMDGPU::hasNamedOperand(NewOpcode, AMDGPU::OpName::src0)) { + MachineOperand Src = Inst.getOperand(1); + if (AMDGPU::isTrue16Inst(NewOpcode) && ST.useRealTrue16Insts() && + Src.isReg() && RI.isVGPR(MRI, Src.getReg())) + NewInstr.addReg(Src.getReg(), 0, AMDGPU::lo16); + else + NewInstr->addOperand(Src); + } if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) { // We are converting these to a BFE, so we need to add the missing diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.td index f07b8fa0ea4c..04c92155f5aa 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -1773,28 +1773,27 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC, class getInsVOP3Base<RegisterOperand Src0RC, RegisterOperand Src1RC, RegisterOperand Src2RC, int NumSrcArgs, bit HasClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod, - Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOpSel, - bit IsVOP3P> { + Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOpSel> { // getInst64 handles clamp and omod. implicit mutex between vop3p and omod dag base = getIns64 <Src0RC, Src1RC, Src2RC, NumSrcArgs, HasClamp, HasModifiers, HasSrc2Mods, HasOMod, Src0Mod, Src1Mod, Src2Mod>.ret; dag opsel = (ins op_sel0:$op_sel); - dag vop3pOpsel = (ins op_sel_hi0:$op_sel_hi); - dag vop3pFields = !con(!if(HasOpSel, vop3pOpsel, (ins)), (ins neg_lo0:$neg_lo, neg_hi0:$neg_hi)); - - dag ret = !con(base, - !if(HasOpSel, opsel,(ins)), - !if(IsVOP3P, vop3pFields,(ins))); + dag ret = !con(base, !if(HasOpSel, opsel, (ins))); } class getInsVOP3P <RegisterOperand Src0RC, RegisterOperand Src1RC, RegisterOperand Src2RC, int NumSrcArgs, bit HasClamp, bit HasOpSel, Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> { - dag ret = getInsVOP3Base<Src0RC, Src1RC, Src2RC, NumSrcArgs, + dag base = getInsVOP3Base<Src0RC, Src1RC, Src2RC, NumSrcArgs, HasClamp, 1/*HasModifiers*/, 1/*HasSrc2Mods*/, - 0/*HasOMod*/, Src0Mod, Src1Mod, Src2Mod, - HasOpSel, 1/*IsVOP3P*/>.ret; + 0/*HasOMod*/, Src0Mod, Src1Mod, Src2Mod, HasOpSel>.ret; + + dag vop3pOpsel = (ins op_sel_hi0:$op_sel_hi); + dag vop3p_neg = (ins neg_lo0:$neg_lo, neg_hi0:$neg_hi); + + dag vop3pFields = !con(!if(HasOpSel, vop3pOpsel, (ins)), vop3p_neg); + dag ret = !con(base, vop3pFields); } class getInsVOP3OpSel <RegisterOperand Src0RC, RegisterOperand Src1RC, @@ -1804,7 +1803,7 @@ class getInsVOP3OpSel <RegisterOperand Src0RC, RegisterOperand Src1RC, dag ret = getInsVOP3Base<Src0RC, Src1RC, Src2RC, NumSrcArgs, HasClamp, 1/*HasModifiers*/, 1/*HasSrc2Mods*/, HasOMod, - Src0Mod, Src1Mod, Src2Mod, 1/*HasOpSel*/, 0>.ret; + Src0Mod, Src1Mod, Src2Mod, /*HasOpSel=*/1>.ret; } class getInsDPPBase <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC, @@ -2390,9 +2389,15 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> { field dag InsDPP8 = getInsDPP8<DstRCDPP, Src0DPP, Src1DPP, Src2DPP, NumSrcArgs, HasModifiers, Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret; - field dag InsVOP3Base = getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP, + defvar InsVOP3DPPBase = getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP, Src2VOP3DPP, NumSrcArgs, HasClamp, HasModifiers, HasSrc2Mods, HasOMod, - Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP, HasOpSel, IsVOP3P>.ret; + Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP, HasOpSel>.ret; + defvar InsVOP3PDPPBase = getInsVOP3P<Src0VOP3DPP, Src1VOP3DPP, + Src2VOP3DPP, NumSrcArgs, HasClamp, HasOpSel, + Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP>.ret; + + field dag InsVOP3Base = !if(IsVOP3P, InsVOP3PDPPBase, InsVOP3DPPBase); + field dag InsVOP3DPP = getInsVOP3DPP<InsVOP3Base, DstRCVOP3DPP, NumSrcArgs>.ret; field dag InsVOP3DPP16 = getInsVOP3DPP16<InsVOP3Base, DstRCVOP3DPP, NumSrcArgs>.ret; field dag InsVOP3DPP8 = getInsVOP3DPP8<InsVOP3Base, DstRCVOP3DPP, NumSrcArgs>.ret; diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP1Instructions.td index 27a7c29cb1ac..99960c94e598 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -74,6 +74,7 @@ class VOP1_Real <VOP1_Pseudo ps, int EncodingFamily, string real_name = ps.Mnemo // copy relevant pseudo op flags let SubtargetPredicate = ps.SubtargetPredicate; + let OtherPredicates = ps.OtherPredicates; let AsmMatchConverter = ps.AsmMatchConverter; let AsmVariantName = ps.AsmVariantName; let Constraints = ps.Constraints; @@ -157,8 +158,11 @@ multiclass VOP1Inst_t16<string opName, let OtherPredicates = [NotHasTrue16BitInsts, Has16BitInsts] in { defm NAME : VOP1Inst<opName, P, node>; } - let OtherPredicates = [HasTrue16BitInsts] in { - defm _t16 : VOP1Inst<opName#"_t16", VOPProfile_Fake16<P>, node>; + let OtherPredicates = [UseRealTrue16Insts] in { + defm _t16 : VOP1Inst<opName#"_t16", VOPProfile_True16<P>, node>; + } + let OtherPredicates = [UseFakeTrue16Insts] in { + defm _fake16 : VOP1Inst<opName#"_fake16", VOPProfile_Fake16<P>, node>; } } @@ -679,6 +683,7 @@ class VOP1_DPP<bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile p = ps.Pfl, bit isDPP1 let SchedRW = ps.SchedRW; let Uses = ps.Uses; let TRANS = ps.TRANS; + let OtherPredicates = ps.OtherPredicates; bits<8> vdst; let Inst{8-0} = 0xfa; @@ -707,6 +712,7 @@ class VOP1_DPP8<bits<8> op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl> : let Defs = ps.Defs; let SchedRW = ps.SchedRW; let Uses = ps.Uses; + let OtherPredicates = ps.OtherPredicates; bits<8> vdst; let Inst{8-0} = fi; @@ -742,7 +748,9 @@ multiclass VOP1_Real_e32<GFXGen Gen, bits<9> op, string opName = NAME> { multiclass VOP1_Real_e32_with_name<GFXGen Gen, bits<9> op, string opName, string asmName> { defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); - let AsmString = asmName # ps.AsmOperands in { + let AsmString = asmName # ps.AsmOperands, + DecoderNamespace = Gen.DecoderNamespace # + !if(ps.Pfl.IsRealTrue16, "", "_FAKE16") in { defm NAME : VOP1_Real_e32<Gen, op, opName>; } } @@ -761,7 +769,9 @@ multiclass VOP1_Real_dpp<GFXGen Gen, bits<9> op, string opName = NAME> { multiclass VOP1_Real_dpp_with_name<GFXGen Gen, bits<9> op, string opName, string asmName> { defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); - let AsmString = asmName # ps.Pfl.AsmDPP16 in { + let AsmString = asmName # ps.Pfl.AsmDPP16, + DecoderNamespace = "DPP" # Gen.DecoderNamespace # + !if(ps.Pfl.IsRealTrue16, "", "_FAKE16") in { defm NAME : VOP1_Real_dpp<Gen, op, opName>; } } @@ -774,7 +784,9 @@ multiclass VOP1_Real_dpp8<GFXGen Gen, bits<9> op, string opName = NAME> { multiclass VOP1_Real_dpp8_with_name<GFXGen Gen, bits<9> op, string opName, string asmName> { defvar ps = !cast<VOP1_Pseudo>(opName#"_e32"); - let AsmString = asmName # ps.Pfl.AsmDPP8 in { + let AsmString = asmName # ps.Pfl.AsmDPP8, + DecoderNamespace = "DPP8" # Gen.DecoderNamespace # + !if(ps.Pfl.IsRealTrue16, "", "_FAKE16") in { defm NAME : VOP1_Real_dpp8<Gen, op, opName>; } } @@ -854,29 +866,30 @@ defm V_CLS_I32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x03b, "V_FFBH_I32", "v_cls_i32">; defm V_PERMLANE64_B32 : VOP1Only_Real_gfx11_gfx12<0x067>; defm V_MOV_B16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x01c, "v_mov_b16">; -defm V_NOT_B16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x069, "v_not_b16">; -defm V_CVT_I32_I16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06a, "v_cvt_i32_i16">; -defm V_CVT_U32_U16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06b, "v_cvt_u32_u16">; +defm V_NOT_B16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x069, "v_not_b16">; +defm V_CVT_I32_I16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06a, "v_cvt_i32_i16">; +defm V_CVT_U32_U16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06b, "v_cvt_u32_u16">; defm V_CVT_F16_U16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x050, "v_cvt_f16_u16">; defm V_CVT_F16_I16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x051, "v_cvt_f16_i16">; defm V_CVT_U16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x052, "v_cvt_u16_f16">; defm V_CVT_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x053, "v_cvt_i16_f16">; -defm V_RCP_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x054, "v_rcp_f16">; -defm V_SQRT_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x055, "v_sqrt_f16">; -defm V_RSQ_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x056, "v_rsq_f16">; -defm V_LOG_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x057, "v_log_f16">; -defm V_EXP_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x058, "v_exp_f16">; -defm V_FREXP_MANT_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x059, "v_frexp_mant_f16">; +defm V_RCP_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x054, "v_rcp_f16">; +defm V_SQRT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x055, "v_sqrt_f16">; +defm V_RSQ_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x056, "v_rsq_f16">; +defm V_LOG_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x057, "v_log_f16">; +defm V_EXP_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x058, "v_exp_f16">; +defm V_FREXP_MANT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x059, "v_frexp_mant_f16">; defm V_FREXP_EXP_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05a, "v_frexp_exp_i16_f16">; -defm V_FLOOR_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">; +defm V_FLOOR_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">; defm V_CEIL_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05c, "v_ceil_f16">; -defm V_TRUNC_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05d, "v_trunc_f16">; -defm V_RNDNE_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05e, "v_rndne_f16">; -defm V_FRACT_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05f, "v_fract_f16">; -defm V_SIN_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x060, "v_sin_f16">; -defm V_COS_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x061, "v_cos_f16">; -defm V_SAT_PK_U8_I16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x062, "v_sat_pk_u8_i16">; +defm V_CEIL_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05c, "v_ceil_f16">; +defm V_TRUNC_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05d, "v_trunc_f16">; +defm V_RNDNE_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05e, "v_rndne_f16">; +defm V_FRACT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05f, "v_fract_f16">; +defm V_SIN_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x060, "v_sin_f16">; +defm V_COS_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x061, "v_cos_f16">; +defm V_SAT_PK_U8_I16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x062, "v_sat_pk_u8_i16">; defm V_CVT_NORM_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x063, "v_cvt_norm_i16_f16">; defm V_CVT_NORM_U16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x064, "v_cvt_norm_u16_f16">; diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP2Instructions.td index ecee61daa1c8..48d4e259bc1c 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -111,8 +111,8 @@ class VOP2_Real <VOP2_Pseudo ps, int EncodingFamily, string real_name = ps.Mnemo class VOP2_Real_Gen <VOP2_Pseudo ps, GFXGen Gen, string real_name = ps.Mnemonic> : VOP2_Real <ps, Gen.Subtarget, real_name> { - let AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, - Gen.AssemblerPredicate); + let AssemblerPredicate = Gen.AssemblerPredicate; + let OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], []); let DecoderNamespace = Gen.DecoderNamespace# !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"); } @@ -437,7 +437,7 @@ class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, v let InsDPP16 = !con(InsDPP, (ins FI:$fi)); let InsVOP3Base = getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP, RegisterOperand<VGPR_32>, 3, 0, HasModifiers, HasModifiers, HasOMod, - Src0ModVOP3DPP, Src1ModVOP3DPP, Src2Mod, HasOpSel, 0/*IsVOP3P*/>.ret; + Src0ModVOP3DPP, Src1ModVOP3DPP, Src2Mod, HasOpSel>.ret; // We need a dummy src2 tied to dst to track the use of that register for s_delay_alu let InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X, VGPRSrc_32:$src2X); let InsVOPDXDeferred = @@ -1275,8 +1275,8 @@ class VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps, int subtarget, class VOP2_DPP16_Gen<bits<6> op, VOP2_DPP_Pseudo ps, GFXGen Gen, string opName = ps.OpName, VOPProfile p = ps.Pfl> : VOP2_DPP16<op, ps, Gen.Subtarget, opName, p> { - let AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, - Gen.AssemblerPredicate); + let AssemblerPredicate = Gen.AssemblerPredicate; + let OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], []); let DecoderNamespace = "DPP"#Gen.DecoderNamespace# !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"); } @@ -1304,8 +1304,8 @@ class VOP2_DPP8<bits<6> op, VOP2_Pseudo ps, class VOP2_DPP8_Gen<bits<6> op, VOP2_Pseudo ps, GFXGen Gen, VOPProfile p = ps.Pfl> : VOP2_DPP8<op, ps, p> { - let AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, - Gen.AssemblerPredicate); + let AssemblerPredicate = Gen.AssemblerPredicate; + let OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], []); let DecoderNamespace = "DPP8"#Gen.DecoderNamespace# !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"); } diff --git a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPInstructions.td b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPInstructions.td index fd4626d902ac..c4b9e7063093 100644 --- a/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/contrib/llvm-project/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -208,8 +208,8 @@ class VOP3_Real <VOP_Pseudo ps, int EncodingFamily, string asm_name = ps.Mnemoni class VOP3_Real_Gen <VOP_Pseudo ps, GFXGen Gen, string asm_name = ps.Mnemonic> : VOP3_Real <ps, Gen.Subtarget, asm_name> { - let AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, - Gen.AssemblerPredicate); + let AssemblerPredicate = Gen.AssemblerPredicate; + let OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], []); let DecoderNamespace = Gen.DecoderNamespace# !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"); } @@ -1340,8 +1340,8 @@ class VOP3_DPP16<bits<10> op, VOP_DPP_Pseudo ps, int subtarget, class VOP3_DPP16_Gen<bits<10> op, VOP_DPP_Pseudo ps, GFXGen Gen, string opName = ps.OpName> : VOP3_DPP16 <op, ps, Gen.Subtarget, opName> { - let AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, - Gen.AssemblerPredicate); + let AssemblerPredicate = Gen.AssemblerPredicate; + let OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], []); let DecoderNamespace = "DPP"#Gen.DecoderNamespace# !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"); } @@ -1470,9 +1470,8 @@ multiclass VOP3_Real_dpp8_with_name<GFXGen Gen, bits<10> op, string opName, let AsmString = asmName # ps.Pfl.AsmVOP3DPP8, DecoderNamespace = "DPP8"#Gen.DecoderNamespace# !if(ps.Pfl.IsRealTrue16, "", "_FAKE16"), - AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts, - Gen.AssemblerPredicate) in { - + OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], + [TruePredicate]) in { defm NAME : VOP3_Real_dpp8_Base<Gen, op, opName>; } } diff --git a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp index 568085bd0ab3..f8a281032c77 100644 --- a/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -9577,8 +9577,7 @@ static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) { SmallVector<SDValue, 8> Ops; SDLoc dl(N); for (unsigned i = 0; i != NumElts; ++i) { - ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i)); - const APInt &CInt = C->getAPIntValue(); + const APInt &CInt = N->getConstantOperandAPInt(i); // Element types smaller than 32 bits are not legal, so use i32 elements. // The values are implicitly truncated so sext vs. zext doesn't matter. Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32)); @@ -18080,8 +18079,7 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D SDValue Op0 = CMOV->getOperand(0); SDValue Op1 = CMOV->getOperand(1); - auto CCNode = cast<ConstantSDNode>(CMOV->getOperand(2)); - auto CC = CCNode->getAPIntValue().getLimitedValue(); + auto CC = CMOV->getConstantOperandAPInt(2).getLimitedValue(); SDValue CmpZ = CMOV->getOperand(4); // The compare must be against zero. @@ -20109,8 +20107,7 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, // The operand to BFI is already a mask suitable for removing the bits it // sets. - ConstantSDNode *CI = cast<ConstantSDNode>(Op.getOperand(2)); - const APInt &Mask = CI->getAPIntValue(); + const APInt &Mask = Op.getConstantOperandAPInt(2); Known.Zero &= Mask; Known.One &= Mask; return; diff --git a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp index e68904863cfc..fc066f001316 100644 --- a/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -1149,15 +1149,10 @@ SDValue MSP430TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { // but they are different from CMP. // FIXME: since we're doing a post-processing, use a pseudoinstr here, so // lowering & isel wouldn't diverge. - bool andCC = false; - if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) { - if (RHSC->isZero() && LHS.hasOneUse() && - (LHS.getOpcode() == ISD::AND || - (LHS.getOpcode() == ISD::TRUNCATE && - LHS.getOperand(0).getOpcode() == ISD::AND))) { - andCC = true; - } - } + bool andCC = isNullConstant(RHS) && LHS.hasOneUse() && + (LHS.getOpcode() == ISD::AND || + (LHS.getOpcode() == ISD::TRUNCATE && + LHS.getOperand(0).getOpcode() == ISD::AND)); ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); SDValue TargetCC; SDValue Flag = EmitCMP(LHS, RHS, TargetCC, CC, dl, DAG); diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index c65090d915ef..34c5569b8076 100644 --- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -2019,9 +2019,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, DL, RetTy, Args, Outs, retAlignment, HasVAArgs ? std::optional<std::pair<unsigned, const APInt &>>(std::make_pair( - CLI.NumFixedArgs, - cast<ConstantSDNode>(VADeclareParam->getOperand(1)) - ->getAPIntValue())) + CLI.NumFixedArgs, VADeclareParam->getConstantOperandAPInt(1))) : std::nullopt, *CB, UniqueCallSite); const char *ProtoStr = nvTM->getStrPool().save(Proto).data(); @@ -2297,7 +2295,7 @@ SDValue NVPTXTargetLowering::LowerBUILD_VECTOR(SDValue Op, if (VT == MVT::v2f16 || VT == MVT::v2bf16) Value = cast<ConstantFPSDNode>(Operand)->getValueAPF().bitcastToAPInt(); else if (VT == MVT::v2i16 || VT == MVT::v4i8) - Value = cast<ConstantSDNode>(Operand)->getAPIntValue(); + Value = Operand->getAsAPIntVal(); else llvm_unreachable("Unsupported type"); // i8 values are carried around as i16, so we need to zero out upper bits, diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td index 13665985f52e..e1cced327544 100644 --- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -164,6 +164,9 @@ def True : Predicate<"true">; class hasPTX<int version>: Predicate<"Subtarget->getPTXVersion() >= " # version>; class hasSM<int version>: Predicate<"Subtarget->getSmVersion() >= " # version>; +// Explicit records for arch-accelerated SM versions +def hasSM90a : Predicate<"Subtarget->getFullSmVersion() == 901">; + // non-sync shfl instructions are not available on sm_70+ in PTX6.4+ def hasSHFL : Predicate<"!(Subtarget->getSmVersion() >= 70" "&& Subtarget->getPTXVersion() >= 64)">; diff --git a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td index 85eae44f349a..6b062a7f3912 100644 --- a/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -6727,3 +6727,16 @@ def is_explicit_cluster: NVPTXInst<(outs Int1Regs:$d), (ins), "mov.pred\t$d, %is_explicit_cluster;", [(set Int1Regs:$d, (int_nvvm_is_explicit_cluster))]>, Requires<[hasSM<90>, hasPTX<78>]>; + +// setmaxnreg inc/dec intrinsics +let isConvergent = true in { +multiclass SET_MAXNREG<string Action, Intrinsic Intr> { + def : NVPTXInst<(outs), (ins i32imm:$reg_count), + "setmaxnreg." # Action # ".sync.aligned.u32 $reg_count;", + [(Intr timm:$reg_count)]>, + Requires<[hasSM90a, hasPTX<80>]>; +} + +defm INT_SET_MAXNREG_INC : SET_MAXNREG<"inc", int_nvvm_setmaxnreg_inc_sync_aligned_u32>; +defm INT_SET_MAXNREG_DEC : SET_MAXNREG<"dec", int_nvvm_setmaxnreg_dec_sync_aligned_u32>; +} // isConvergent diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 235df1880b37..4e164fda1d8d 100644 --- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -16241,7 +16241,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, // Since we are doing this pre-legalize, the RHS can be a constant of // arbitrary bitwidth which may cause issues when trying to get the value // from the underlying APInt. - auto RHSAPInt = cast<ConstantSDNode>(RHS)->getAPIntValue(); + auto RHSAPInt = RHS->getAsAPIntVal(); if (!RHSAPInt.isIntN(64)) break; diff --git a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.td index b1601739fd45..bf756e39bd5d 100644 --- a/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -1909,7 +1909,7 @@ def STWAT : X_RD5_RS5_IM5<31, 710, (outs), (ins gprc:$RST, gprc:$RA, u5imm:$RB), "stwat $RST, $RA, $RB", IIC_LdStStore>, Requires<[IsISA3_0]>; -let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in +let isTrap = 1, hasCtrlDep = 1 in def TRAP : XForm_24<31, 4, (outs), (ins), "trap", IIC_LdStLoad, [(trap)]>; def TWI : DForm_base<3, (outs), (ins u5imm:$RST, gprc:$RA, s16imm:$D, variable_ops), diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index d616aaeddf41..7d42481db57f 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -199,6 +199,8 @@ class RISCVAsmParser : public MCTargetAsmParser { ParseStatus parseInsnDirectiveOpcode(OperandVector &Operands); ParseStatus parseInsnCDirectiveOpcode(OperandVector &Operands); ParseStatus parseGPRAsFPR(OperandVector &Operands); + template <bool IsRV64Inst> ParseStatus parseGPRPair(OperandVector &Operands); + ParseStatus parseGPRPair(OperandVector &Operands, bool IsRV64Inst); ParseStatus parseFRMArg(OperandVector &Operands); ParseStatus parseFenceArg(OperandVector &Operands); ParseStatus parseReglist(OperandVector &Operands); @@ -466,6 +468,12 @@ public: bool isGPRAsFPR() const { return isGPR() && Reg.IsGPRAsFPR; } + bool isGPRPair() const { + return Kind == KindTy::Register && + RISCVMCRegisterClasses[RISCV::GPRPairRegClassID].contains( + Reg.RegNum); + } + static bool evaluateConstantImm(const MCExpr *Expr, int64_t &Imm, RISCVMCExpr::VariantKind &VK) { if (auto *RE = dyn_cast<RISCVMCExpr>(Expr)) { @@ -1295,11 +1303,15 @@ unsigned RISCVAsmParser::checkTargetMatchPredicate(MCInst &Inst) { const MCInstrDesc &MCID = MII.get(Inst.getOpcode()); for (unsigned I = 0; I < MCID.NumOperands; ++I) { - if (MCID.operands()[I].RegClass == RISCV::GPRPF64RegClassID) { + if (MCID.operands()[I].RegClass == RISCV::GPRPairRegClassID) { const auto &Op = Inst.getOperand(I); assert(Op.isReg()); MCRegister Reg = Op.getReg(); + if (RISCVMCRegisterClasses[RISCV::GPRPairRegClassID].contains(Reg)) + continue; + + // FIXME: We should form a paired register during parsing/matching. if (((Reg.id() - RISCV::X0) & 1) != 0) return Match_RequiresEvenGPRs; } @@ -2222,6 +2234,48 @@ ParseStatus RISCVAsmParser::parseGPRAsFPR(OperandVector &Operands) { return ParseStatus::Success; } +template <bool IsRV64> +ParseStatus RISCVAsmParser::parseGPRPair(OperandVector &Operands) { + return parseGPRPair(Operands, IsRV64); +} + +ParseStatus RISCVAsmParser::parseGPRPair(OperandVector &Operands, + bool IsRV64Inst) { + // If this is not an RV64 GPRPair instruction, don't parse as a GPRPair on + // RV64 as it will prevent matching the RV64 version of the same instruction + // that doesn't use a GPRPair. + // If this is an RV64 GPRPair instruction, there is no RV32 version so we can + // still parse as a pair. + if (!IsRV64Inst && isRV64()) + return ParseStatus::NoMatch; + + if (getLexer().isNot(AsmToken::Identifier)) + return ParseStatus::NoMatch; + + StringRef Name = getLexer().getTok().getIdentifier(); + MCRegister RegNo = matchRegisterNameHelper(isRVE(), Name); + + if (!RegNo) + return ParseStatus::NoMatch; + + if (!RISCVMCRegisterClasses[RISCV::GPRRegClassID].contains(RegNo)) + return ParseStatus::NoMatch; + + if ((RegNo - RISCV::X0) & 1) + return TokError("register must be even"); + + SMLoc S = getLoc(); + SMLoc E = SMLoc::getFromPointer(S.getPointer() + Name.size()); + getLexer().Lex(); + + const MCRegisterInfo *RI = getContext().getRegisterInfo(); + unsigned Pair = RI->getMatchingSuperReg( + RegNo, RISCV::sub_gpr_even, + &RISCVMCRegisterClasses[RISCV::GPRPairRegClassID]); + Operands.push_back(RISCVOperand::createReg(Pair, S, E)); + return ParseStatus::Success; +} + ParseStatus RISCVAsmParser::parseFRMArg(OperandVector &Operands) { if (getLexer().isNot(AsmToken::Identifier)) return TokError( @@ -3335,27 +3389,6 @@ bool RISCVAsmParser::validateInstruction(MCInst &Inst, return Error(Loc, "Operand must be constant 4."); } - bool IsAMOCAS_D = Opcode == RISCV::AMOCAS_D || Opcode == RISCV::AMOCAS_D_AQ || - Opcode == RISCV::AMOCAS_D_RL || - Opcode == RISCV::AMOCAS_D_AQ_RL; - bool IsAMOCAS_Q = Opcode == RISCV::AMOCAS_Q || Opcode == RISCV::AMOCAS_Q_AQ || - Opcode == RISCV::AMOCAS_Q_RL || - Opcode == RISCV::AMOCAS_Q_AQ_RL; - if ((!isRV64() && IsAMOCAS_D) || IsAMOCAS_Q) { - unsigned Rd = Inst.getOperand(0).getReg(); - unsigned Rs2 = Inst.getOperand(2).getReg(); - assert(Rd >= RISCV::X0 && Rd <= RISCV::X31); - if ((Rd - RISCV::X0) % 2 != 0) { - SMLoc Loc = Operands[1]->getStartLoc(); - return Error(Loc, "The destination register must be even."); - } - assert(Rs2 >= RISCV::X0 && Rs2 <= RISCV::X31); - if ((Rs2 - RISCV::X0) % 2 != 0) { - SMLoc Loc = Operands[2]->getStartLoc(); - return Error(Loc, "The source register must be even."); - } - } - const MCInstrDesc &MCID = MII.get(Opcode); if (!(MCID.TSFlags & RISCVII::ConstraintMask)) return false; diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp index ed80da14c795..4dd039159e29 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp @@ -171,7 +171,7 @@ static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint32_t RegNo, return MCDisassembler::Success; } -static DecodeStatus DecodeGPRPF64RegisterClass(MCInst &Inst, uint32_t RegNo, +static DecodeStatus DecodeGPRPairRegisterClass(MCInst &Inst, uint32_t RegNo, uint64_t Address, const MCDisassembler *Decoder) { if (RegNo >= 32 || RegNo & 1) @@ -546,6 +546,10 @@ DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size, !STI.hasFeature(RISCV::Feature64Bit), DecoderTableRV32Zdinx32, "RV32Zdinx table (Double in Integer and rv32)"); + TRY_TO_DECODE(STI.hasFeature(RISCV::FeatureStdExtZacas) && + !STI.hasFeature(RISCV::Feature64Bit), + DecoderTableRV32Zacas32, + "RV32Zacas table (Compare-And-Swap and rv32)"); TRY_TO_DECODE_FEATURE(RISCV::FeatureStdExtZfinx, DecoderTableRVZfinx32, "RVZfinx table (Float in Integer)"); TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXVentanaCondOps, diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp index ab8070772fe5..ae02e86baf6e 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp @@ -47,10 +47,50 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) const LLT s32 = LLT::scalar(32); const LLT s64 = LLT::scalar(64); + const LLT nxv1s8 = LLT::scalable_vector(1, s8); + const LLT nxv2s8 = LLT::scalable_vector(2, s8); + const LLT nxv4s8 = LLT::scalable_vector(4, s8); + const LLT nxv8s8 = LLT::scalable_vector(8, s8); + const LLT nxv16s8 = LLT::scalable_vector(16, s8); + const LLT nxv32s8 = LLT::scalable_vector(32, s8); + const LLT nxv64s8 = LLT::scalable_vector(64, s8); + + const LLT nxv1s16 = LLT::scalable_vector(1, s16); + const LLT nxv2s16 = LLT::scalable_vector(2, s16); + const LLT nxv4s16 = LLT::scalable_vector(4, s16); + const LLT nxv8s16 = LLT::scalable_vector(8, s16); + const LLT nxv16s16 = LLT::scalable_vector(16, s16); + const LLT nxv32s16 = LLT::scalable_vector(32, s16); + + const LLT nxv1s32 = LLT::scalable_vector(1, s32); + const LLT nxv2s32 = LLT::scalable_vector(2, s32); + const LLT nxv4s32 = LLT::scalable_vector(4, s32); + const LLT nxv8s32 = LLT::scalable_vector(8, s32); + const LLT nxv16s32 = LLT::scalable_vector(16, s32); + + const LLT nxv1s64 = LLT::scalable_vector(1, s64); + const LLT nxv2s64 = LLT::scalable_vector(2, s64); + const LLT nxv4s64 = LLT::scalable_vector(4, s64); + const LLT nxv8s64 = LLT::scalable_vector(8, s64); + using namespace TargetOpcode; + auto AllVecTys = {nxv1s8, nxv2s8, nxv4s8, nxv8s8, nxv16s8, nxv32s8, + nxv64s8, nxv1s16, nxv2s16, nxv4s16, nxv8s16, nxv16s16, + nxv32s16, nxv1s32, nxv2s32, nxv4s32, nxv8s32, nxv16s32, + nxv1s64, nxv2s64, nxv4s64, nxv8s64}; + getActionDefinitionsBuilder({G_ADD, G_SUB, G_AND, G_OR, G_XOR}) .legalFor({s32, sXLen}) + .legalIf(all( + typeInSet(0, AllVecTys), + LegalityPredicate([=, &ST](const LegalityQuery &Query) { + return ST.hasVInstructions() && + (Query.Types[0].getScalarSizeInBits() != 64 || + ST.hasVInstructionsI64()) && + (Query.Types[0].getElementCount().getKnownMinValue() != 1 || + ST.getELen() == 64); + }))) .widenScalarToNextPow2(0) .clampScalar(0, s32, sXLen); diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp index 0799267eaf7c..76e5b3ed4025 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp @@ -106,6 +106,8 @@ unsigned RISCVELFObjectWriter::getRelocType(MCContext &Ctx, if (Expr->getKind() == MCExpr::Target && cast<RISCVMCExpr>(Expr)->getKind() == RISCVMCExpr::VK_RISCV_32_PCREL) return ELF::R_RISCV_32_PCREL; + if (Target.getSymA()->getKind() == MCSymbolRefExpr::VK_GOTPCREL) + return ELF::R_RISCV_GOT32_PCREL; return ELF::R_RISCV_32; case FK_Data_8: return ELF::R_RISCV_64; diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp index 9db5148208b3..961b8f0afe22 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp @@ -37,6 +37,13 @@ RISCVTargetELFStreamer::RISCVTargetELFStreamer(MCStreamer &S, auto &MAB = static_cast<RISCVAsmBackend &>(MCA.getBackend()); setTargetABI(RISCVABI::computeTargetABI(STI.getTargetTriple(), Features, MAB.getTargetOptions().getABIName())); + // `j label` in `.option norelax; j label; .option relax; ...; label:` needs a + // relocation to ensure the jump target is correct after linking. This is due + // to a limitation that shouldForceRelocation has to make the decision upfront + // without knowing a possibly future .option relax. When RISCVAsmParser is used, + // its ParseInstruction may call setForceRelocs as well. + if (STI.hasFeature(RISCV::FeatureRelax)) + static_cast<RISCVAsmBackend &>(MAB).setForceRelocs(); } RISCVELFStreamer &RISCVTargetELFStreamer::getStreamer() { diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp index 103a2e2da7b9..ed2b1ceb7d6f 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp @@ -308,8 +308,10 @@ bool RISCVExpandPseudo::expandRV32ZdinxStore(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { DebugLoc DL = MBBI->getDebugLoc(); const TargetRegisterInfo *TRI = STI->getRegisterInfo(); - Register Lo = TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_32); - Register Hi = TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_32_hi); + Register Lo = + TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_gpr_even); + Register Hi = + TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_gpr_odd); BuildMI(MBB, MBBI, DL, TII->get(RISCV::SW)) .addReg(Lo, getKillRegState(MBBI->getOperand(0).isKill())) .addReg(MBBI->getOperand(1).getReg()) @@ -342,8 +344,10 @@ bool RISCVExpandPseudo::expandRV32ZdinxLoad(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { DebugLoc DL = MBBI->getDebugLoc(); const TargetRegisterInfo *TRI = STI->getRegisterInfo(); - Register Lo = TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_32); - Register Hi = TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_32_hi); + Register Lo = + TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_gpr_even); + Register Hi = + TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_gpr_odd); // If the register of operand 1 is equal to the Lo register, then swap the // order of loading the Lo and Hi statements. diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td index bb7a3291085d..279509575bb5 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -736,6 +736,7 @@ def FeatureStdExtZacas def HasStdExtZacas : Predicate<"Subtarget->hasStdExtZacas()">, AssemblerPredicate<(all_of FeatureStdExtZacas), "'Zacas' (Atomic Compare-And-Swap Instructions)">; +def NoStdExtZacas : Predicate<"!Subtarget->hasStdExtZacas()">; //===----------------------------------------------------------------------===// // Vendor extensions diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 0a1a466af591..cb9ffabc4123 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -138,7 +138,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, if (Subtarget.is64Bit()) addRegisterClass(MVT::f64, &RISCV::GPRRegClass); else - addRegisterClass(MVT::f64, &RISCV::GPRPF64RegClass); + addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass); } static const MVT::SimpleValueType BoolVecVTs[] = { @@ -814,8 +814,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT, Custom); setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom); - setOperationAction({ISD::AVGFLOORU, ISD::SADDSAT, ISD::UADDSAT, - ISD::SSUBSAT, ISD::USUBSAT}, + setOperationAction({ISD::AVGFLOORU, ISD::AVGCEILU, ISD::SADDSAT, + ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, VT, Legal); // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL" @@ -1185,8 +1185,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV()) setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Custom); - setOperationAction({ISD::AVGFLOORU, ISD::SADDSAT, ISD::UADDSAT, - ISD::SSUBSAT, ISD::USUBSAT}, + setOperationAction({ISD::AVGFLOORU, ISD::AVGCEILU, ISD::SADDSAT, + ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}, VT, Custom); setOperationAction(ISD::VSELECT, VT, Custom); @@ -5466,6 +5466,7 @@ static unsigned getRISCVVLOp(SDValue Op) { OP_CASE(SSUBSAT) OP_CASE(USUBSAT) OP_CASE(AVGFLOORU) + OP_CASE(AVGCEILU) OP_CASE(FADD) OP_CASE(FSUB) OP_CASE(FMUL) @@ -5570,7 +5571,7 @@ static bool hasMergeOp(unsigned Opcode) { Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE && "not a RISC-V target specific op"); static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == - 125 && + 126 && RISCVISD::LAST_RISCV_STRICTFP_OPCODE - ISD::FIRST_TARGET_STRICTFP_OPCODE == 21 && @@ -5596,7 +5597,7 @@ static bool hasMaskOp(unsigned Opcode) { Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE && "not a RISC-V target specific op"); static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP == - 125 && + 126 && RISCVISD::LAST_RISCV_STRICTFP_OPCODE - ISD::FIRST_TARGET_STRICTFP_OPCODE == 21 && @@ -6461,6 +6462,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, return SplitVectorOp(Op, DAG); [[fallthrough]]; case ISD::AVGFLOORU: + case ISD::AVGCEILU: case ISD::SADDSAT: case ISD::UADDSAT: case ISD::SSUBSAT: @@ -7023,8 +7025,7 @@ foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG, if (!NewConstOp) return SDValue(); - const APInt &NewConstAPInt = - cast<ConstantSDNode>(NewConstOp)->getAPIntValue(); + const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal(); if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes()) return SDValue(); @@ -7154,8 +7155,8 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { // is SETGE/SETLE to avoid an XORI. if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) && CCVal == ISD::SETLT) { - const APInt &TrueVal = cast<ConstantSDNode>(TrueV)->getAPIntValue(); - const APInt &FalseVal = cast<ConstantSDNode>(FalseV)->getAPIntValue(); + const APInt &TrueVal = TrueV->getAsAPIntVal(); + const APInt &FalseVal = FalseV->getAsAPIntVal(); if (TrueVal - 1 == FalseVal) return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV); if (TrueVal + 1 == FalseVal) @@ -16345,7 +16346,7 @@ static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI, Register SrcReg = MI.getOperand(2).getReg(); const TargetRegisterClass *SrcRC = MI.getOpcode() == RISCV::SplitF64Pseudo_INX - ? &RISCV::GPRPF64RegClass + ? &RISCV::GPRPairRegClass : &RISCV::FPR64RegClass; int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); @@ -16384,7 +16385,7 @@ static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI, Register HiReg = MI.getOperand(2).getReg(); const TargetRegisterClass *DstRC = - MI.getOpcode() == RISCV::BuildPairF64Pseudo_INX ? &RISCV::GPRPF64RegClass + MI.getOpcode() == RISCV::BuildPairF64Pseudo_INX ? &RISCV::GPRPairRegClass : &RISCV::FPR64RegClass; int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF); @@ -18596,6 +18597,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(UREM_VL) NODE_NAME_CASE(XOR_VL) NODE_NAME_CASE(AVGFLOORU_VL) + NODE_NAME_CASE(AVGCEILU_VL) NODE_NAME_CASE(SADDSAT_VL) NODE_NAME_CASE(UADDSAT_VL) NODE_NAME_CASE(SSUBSAT_VL) @@ -18752,7 +18754,7 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, if (VT == MVT::f32 && Subtarget.hasStdExtZfinx()) return std::make_pair(0U, &RISCV::GPRF32RegClass); if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit()) - return std::make_pair(0U, &RISCV::GPRPF64RegClass); + return std::make_pair(0U, &RISCV::GPRPairRegClass); return std::make_pair(0U, &RISCV::GPRNoX0RegClass); case 'f': if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16) @@ -18934,7 +18936,7 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, // Subtarget into account. if (Res.second == &RISCV::GPRF16RegClass || Res.second == &RISCV::GPRF32RegClass || - Res.second == &RISCV::GPRPF64RegClass) + Res.second == &RISCV::GPRPairRegClass) return std::make_pair(Res.first, &RISCV::GPRRegClass); return Res; @@ -19362,6 +19364,11 @@ bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, return false; } +ISD::NodeType RISCVTargetLowering::getExtendForAtomicCmpSwapArg() const { + // Zacas will use amocas.w which does not require extension. + return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND; +} + Register RISCVTargetLowering::getExceptionPointerRegister( const Constant *PersonalityFn) const { return RISCV::X10; @@ -20017,8 +20024,13 @@ unsigned RISCVTargetLowering::getCustomCtpopCost(EVT VT, } bool RISCVTargetLowering::fallBackToDAGISel(const Instruction &Inst) const { - // At the moment, the only scalable instruction GISel knows how to lower is - // ret with scalable argument. + + // GISel support is in progress or complete for G_ADD, G_SUB, G_AND, G_OR, and + // G_XOR. + unsigned Op = Inst.getOpcode(); + if (Op == Instruction::Add || Op == Instruction::Sub || + Op == Instruction::And || Op == Instruction::Or || Op == Instruction::Xor) + return false; if (Inst.getType()->isScalableTy()) return true; diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h index 5d51fe168b04..c65953e37b17 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -255,6 +255,8 @@ enum NodeType : unsigned { // Averaging adds of unsigned integers. AVGFLOORU_VL, + // Rounding averaging adds of unsigned integers. + AVGCEILU_VL, MULHS_VL, MULHU_VL, @@ -631,9 +633,7 @@ public: return ISD::SIGN_EXTEND; } - ISD::NodeType getExtendForAtomicCmpSwapArg() const override { - return ISD::SIGN_EXTEND; - } + ISD::NodeType getExtendForAtomicCmpSwapArg() const override; bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override; diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index e591aa935c0b..6c9e529e4bfb 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -1464,20 +1464,6 @@ static void doUnion(DemandedFields &A, DemandedFields B) { A.MaskPolicy |= B.MaskPolicy; } -static bool isNonZeroAVL(const MachineOperand &MO, - const MachineRegisterInfo &MRI) { - if (MO.isReg()) { - if (MO.getReg() == RISCV::X0) - return true; - if (MachineInstr *MI = MRI.getVRegDef(MO.getReg()); - MI && isNonZeroLoadImmediate(*MI)) - return true; - return false; - } - assert(MO.isImm()); - return 0 != MO.getImm(); -} - // Return true if we can mutate PrevMI to match MI without changing any the // fields which would be observed. static bool canMutatePriorConfig(const MachineInstr &PrevMI, @@ -1491,21 +1477,26 @@ static bool canMutatePriorConfig(const MachineInstr &PrevMI, if (Used.VLAny) return false; - // We don't bother to handle the equally zero case here as it's largely - // uninteresting. if (Used.VLZeroness) { if (isVLPreservingConfig(PrevMI)) return false; - if (!isNonZeroAVL(MI.getOperand(1), MRI) || - !isNonZeroAVL(PrevMI.getOperand(1), MRI)) + if (!getInfoForVSETVLI(PrevMI).hasEquallyZeroAVL(getInfoForVSETVLI(MI), + MRI)) return false; } - // TODO: Track whether the register is defined between - // PrevMI and MI. - if (MI.getOperand(1).isReg() && - RISCV::X0 != MI.getOperand(1).getReg()) - return false; + auto &AVL = MI.getOperand(1); + auto &PrevAVL = PrevMI.getOperand(1); + assert(MRI.isSSA()); + + // If the AVL is a register, we need to make sure MI's AVL dominates PrevMI. + // For now just check that PrevMI uses the same virtual register. + if (AVL.isReg() && AVL.getReg() != RISCV::X0) { + if (AVL.getReg().isPhysical()) + return false; + if (!PrevAVL.isReg() || PrevAVL.getReg() != AVL.getReg()) + return false; + } } if (!PrevMI.getOperand(2).isImm() || !MI.getOperand(2).isImm()) diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 351f48c1708e..9813c7a70dfc 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -414,15 +414,16 @@ void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB, return; } - if (RISCV::GPRPF64RegClass.contains(DstReg, SrcReg)) { - // Emit an ADDI for both parts of GPRPF64. + if (RISCV::GPRPairRegClass.contains(DstReg, SrcReg)) { + // Emit an ADDI for both parts of GPRPair. BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), - TRI->getSubReg(DstReg, RISCV::sub_32)) - .addReg(TRI->getSubReg(SrcReg, RISCV::sub_32), getKillRegState(KillSrc)) + TRI->getSubReg(DstReg, RISCV::sub_gpr_even)) + .addReg(TRI->getSubReg(SrcReg, RISCV::sub_gpr_even), + getKillRegState(KillSrc)) .addImm(0); BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), - TRI->getSubReg(DstReg, RISCV::sub_32_hi)) - .addReg(TRI->getSubReg(SrcReg, RISCV::sub_32_hi), + TRI->getSubReg(DstReg, RISCV::sub_gpr_odd)) + .addReg(TRI->getSubReg(SrcReg, RISCV::sub_gpr_odd), getKillRegState(KillSrc)) .addImm(0); return; @@ -607,7 +608,7 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ? RISCV::SW : RISCV::SD; IsScalableVector = false; - } else if (RISCV::GPRPF64RegClass.hasSubClassEq(RC)) { + } else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) { Opcode = RISCV::PseudoRV32ZdinxSD; IsScalableVector = false; } else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) { @@ -690,7 +691,7 @@ void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ? RISCV::LW : RISCV::LD; IsScalableVector = false; - } else if (RISCV::GPRPF64RegClass.hasSubClassEq(RC)) { + } else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) { Opcode = RISCV::PseudoRV32ZdinxLD; IsScalableVector = false; } else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) { diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoA.td index 4d0567e41abc..44552c00c62e 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoA.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoA.td @@ -157,7 +157,16 @@ defm : AMOPat<"atomic_load_min_32", "AMOMIN_W">; defm : AMOPat<"atomic_load_umax_32", "AMOMAXU_W">; defm : AMOPat<"atomic_load_umin_32", "AMOMINU_W">; -let Predicates = [HasStdExtA] in { +defm : AMOPat<"atomic_swap_64", "AMOSWAP_D", i64, [IsRV64]>; +defm : AMOPat<"atomic_load_add_64", "AMOADD_D", i64, [IsRV64]>; +defm : AMOPat<"atomic_load_and_64", "AMOAND_D", i64, [IsRV64]>; +defm : AMOPat<"atomic_load_or_64", "AMOOR_D", i64, [IsRV64]>; +defm : AMOPat<"atomic_load_xor_64", "AMOXOR_D", i64, [IsRV64]>; +defm : AMOPat<"atomic_load_max_64", "AMOMAX_D", i64, [IsRV64]>; +defm : AMOPat<"atomic_load_min_64", "AMOMIN_D", i64, [IsRV64]>; +defm : AMOPat<"atomic_load_umax_64", "AMOMAXU_D", i64, [IsRV64]>; +defm : AMOPat<"atomic_load_umin_64", "AMOMINU_D", i64, [IsRV64]>; + /// Pseudo AMOs @@ -169,21 +178,6 @@ class PseudoAMO : Pseudo<(outs GPR:$res, GPR:$scratch), let hasSideEffects = 0; } -let Size = 20 in -def PseudoAtomicLoadNand32 : PseudoAMO; -// Ordering constants must be kept in sync with the AtomicOrdering enum in -// AtomicOrdering.h. -def : Pat<(XLenVT (atomic_load_nand_32_monotonic GPR:$addr, GPR:$incr)), - (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 2)>; -def : Pat<(XLenVT (atomic_load_nand_32_acquire GPR:$addr, GPR:$incr)), - (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 4)>; -def : Pat<(XLenVT (atomic_load_nand_32_release GPR:$addr, GPR:$incr)), - (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 5)>; -def : Pat<(XLenVT (atomic_load_nand_32_acq_rel GPR:$addr, GPR:$incr)), - (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 6)>; -def : Pat<(XLenVT (atomic_load_nand_32_seq_cst GPR:$addr, GPR:$incr)), - (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 7)>; - class PseudoMaskedAMO : Pseudo<(outs GPR:$res, GPR:$scratch), (ins GPR:$addr, GPR:$incr, GPR:$mask, ixlenimm:$ordering), []> { @@ -224,6 +218,23 @@ class PseudoMaskedAMOMinMaxPat<Intrinsic intrin, Pseudo AMOInst> (AMOInst GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt, timm:$ordering)>; +let Predicates = [HasStdExtA] in { + +let Size = 20 in +def PseudoAtomicLoadNand32 : PseudoAMO; +// Ordering constants must be kept in sync with the AtomicOrdering enum in +// AtomicOrdering.h. +def : Pat<(XLenVT (atomic_load_nand_32_monotonic GPR:$addr, GPR:$incr)), + (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 2)>; +def : Pat<(XLenVT (atomic_load_nand_32_acquire GPR:$addr, GPR:$incr)), + (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 4)>; +def : Pat<(XLenVT (atomic_load_nand_32_release GPR:$addr, GPR:$incr)), + (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 5)>; +def : Pat<(XLenVT (atomic_load_nand_32_acq_rel GPR:$addr, GPR:$incr)), + (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 6)>; +def : Pat<(XLenVT (atomic_load_nand_32_seq_cst GPR:$addr, GPR:$incr)), + (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 7)>; + let Size = 28 in def PseudoMaskedAtomicSwap32 : PseudoMaskedAMO; def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_xchg_i32, @@ -256,6 +267,43 @@ let Size = 36 in def PseudoMaskedAtomicLoadUMin32 : PseudoMaskedAMOUMinUMax; def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umin_i32, PseudoMaskedAtomicLoadUMin32>; +} // Predicates = [HasStdExtA] + +let Predicates = [HasStdExtA, IsRV64] in { + +let Size = 20 in +def PseudoAtomicLoadNand64 : PseudoAMO; +// Ordering constants must be kept in sync with the AtomicOrdering enum in +// AtomicOrdering.h. +def : Pat<(i64 (atomic_load_nand_64_monotonic GPR:$addr, GPR:$incr)), + (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 2)>; +def : Pat<(i64 (atomic_load_nand_64_acquire GPR:$addr, GPR:$incr)), + (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 4)>; +def : Pat<(i64 (atomic_load_nand_64_release GPR:$addr, GPR:$incr)), + (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 5)>; +def : Pat<(i64 (atomic_load_nand_64_acq_rel GPR:$addr, GPR:$incr)), + (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 6)>; +def : Pat<(i64 (atomic_load_nand_64_seq_cst GPR:$addr, GPR:$incr)), + (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 7)>; + +def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_xchg_i64, + PseudoMaskedAtomicSwap32>; +def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_add_i64, + PseudoMaskedAtomicLoadAdd32>; +def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_sub_i64, + PseudoMaskedAtomicLoadSub32>; +def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_nand_i64, + PseudoMaskedAtomicLoadNand32>; +def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_max_i64, + PseudoMaskedAtomicLoadMax32>; +def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_min_i64, + PseudoMaskedAtomicLoadMin32>; +def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umax_i64, + PseudoMaskedAtomicLoadUMax32>; +def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umin_i64, + PseudoMaskedAtomicLoadUMin32>; +} // Predicates = [HasStdExtA, IsRV64] + /// Compare and exchange @@ -285,9 +333,17 @@ multiclass PseudoCmpXchgPat<string Op, Pseudo CmpXchgInst, (CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 7)>; } +let Predicates = [HasStdExtA, NoStdExtZacas] in { def PseudoCmpXchg32 : PseudoCmpXchg; defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoCmpXchg32>; +} + +let Predicates = [HasStdExtA, NoStdExtZacas, IsRV64] in { +def PseudoCmpXchg64 : PseudoCmpXchg; +defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoCmpXchg64, i64>; +} +let Predicates = [HasStdExtA] in { def PseudoMaskedCmpXchg32 : Pseudo<(outs GPR:$res, GPR:$scratch), (ins GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, @@ -303,60 +359,9 @@ def : Pat<(int_riscv_masked_cmpxchg_i32 GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering), (PseudoMaskedCmpXchg32 GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering)>; - } // Predicates = [HasStdExtA] -defm : AMOPat<"atomic_swap_64", "AMOSWAP_D", i64, [IsRV64]>; -defm : AMOPat<"atomic_load_add_64", "AMOADD_D", i64, [IsRV64]>; -defm : AMOPat<"atomic_load_and_64", "AMOAND_D", i64, [IsRV64]>; -defm : AMOPat<"atomic_load_or_64", "AMOOR_D", i64, [IsRV64]>; -defm : AMOPat<"atomic_load_xor_64", "AMOXOR_D", i64, [IsRV64]>; -defm : AMOPat<"atomic_load_max_64", "AMOMAX_D", i64, [IsRV64]>; -defm : AMOPat<"atomic_load_min_64", "AMOMIN_D", i64, [IsRV64]>; -defm : AMOPat<"atomic_load_umax_64", "AMOMAXU_D", i64, [IsRV64]>; -defm : AMOPat<"atomic_load_umin_64", "AMOMINU_D", i64, [IsRV64]>; - let Predicates = [HasStdExtA, IsRV64] in { - -/// 64-bit pseudo AMOs - -let Size = 20 in -def PseudoAtomicLoadNand64 : PseudoAMO; -// Ordering constants must be kept in sync with the AtomicOrdering enum in -// AtomicOrdering.h. -def : Pat<(i64 (atomic_load_nand_64_monotonic GPR:$addr, GPR:$incr)), - (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 2)>; -def : Pat<(i64 (atomic_load_nand_64_acquire GPR:$addr, GPR:$incr)), - (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 4)>; -def : Pat<(i64 (atomic_load_nand_64_release GPR:$addr, GPR:$incr)), - (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 5)>; -def : Pat<(i64 (atomic_load_nand_64_acq_rel GPR:$addr, GPR:$incr)), - (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 6)>; -def : Pat<(i64 (atomic_load_nand_64_seq_cst GPR:$addr, GPR:$incr)), - (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 7)>; - -def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_xchg_i64, - PseudoMaskedAtomicSwap32>; -def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_add_i64, - PseudoMaskedAtomicLoadAdd32>; -def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_sub_i64, - PseudoMaskedAtomicLoadSub32>; -def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_nand_i64, - PseudoMaskedAtomicLoadNand32>; -def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_max_i64, - PseudoMaskedAtomicLoadMax32>; -def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_min_i64, - PseudoMaskedAtomicLoadMin32>; -def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umax_i64, - PseudoMaskedAtomicLoadUMax32>; -def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umin_i64, - PseudoMaskedAtomicLoadUMin32>; - -/// 64-bit compare and exchange - -def PseudoCmpXchg64 : PseudoCmpXchg; -defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoCmpXchg64, i64>; - def : Pat<(int_riscv_masked_cmpxchg_i64 GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering), (PseudoMaskedCmpXchg32 @@ -408,6 +413,7 @@ defm : AMOPat2<"atomic_load_min_32", "AMOMIN_W", i32>; defm : AMOPat2<"atomic_load_umax_32", "AMOMAXU_W", i32>; defm : AMOPat2<"atomic_load_umin_32", "AMOMINU_W", i32>; +let Predicates = [HasStdExtA, IsRV64] in defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoCmpXchg32, i32>; let Predicates = [HasAtomicLdSt] in { diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoD.td index 418421b2a556..fec43d814098 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoD.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoD.td @@ -33,8 +33,8 @@ def AddrRegImmINX : ComplexPattern<iPTR, 2, "SelectAddrRegImmINX">; // Zdinx -def GPRPF64AsFPR : AsmOperandClass { - let Name = "GPRPF64AsFPR"; +def GPRPairAsFPR : AsmOperandClass { + let Name = "GPRPairAsFPR"; let ParserMethod = "parseGPRAsFPR"; let PredicateMethod = "isGPRAsFPR"; let RenderMethod = "addRegOperands"; @@ -52,8 +52,8 @@ def FPR64INX : RegisterOperand<GPR> { let DecoderMethod = "DecodeGPRRegisterClass"; } -def FPR64IN32X : RegisterOperand<GPRPF64> { - let ParserMatchClass = GPRPF64AsFPR; +def FPR64IN32X : RegisterOperand<GPRPair> { + let ParserMatchClass = GPRPairAsFPR; } def DExt : ExtInfo<"", "", [HasStdExtD], f64, FPR64, FPR32, FPR64, ?>; @@ -515,15 +515,15 @@ def PseudoFROUND_D_IN32X : PseudoFROUND<FPR64IN32X, f64>; /// Loads let isCall = 0, mayLoad = 1, mayStore = 0, Size = 8, isCodeGenOnly = 1 in -def PseudoRV32ZdinxLD : Pseudo<(outs GPRPF64:$dst), (ins GPR:$rs1, simm12:$imm12), []>; +def PseudoRV32ZdinxLD : Pseudo<(outs GPRPair:$dst), (ins GPR:$rs1, simm12:$imm12), []>; def : Pat<(f64 (load (AddrRegImmINX (XLenVT GPR:$rs1), simm12:$imm12))), (PseudoRV32ZdinxLD GPR:$rs1, simm12:$imm12)>; /// Stores let isCall = 0, mayLoad = 0, mayStore = 1, Size = 8, isCodeGenOnly = 1 in -def PseudoRV32ZdinxSD : Pseudo<(outs), (ins GPRPF64:$rs2, GPRNoX0:$rs1, simm12:$imm12), []>; -def : Pat<(store (f64 GPRPF64:$rs2), (AddrRegImmINX (XLenVT GPR:$rs1), simm12:$imm12)), - (PseudoRV32ZdinxSD GPRPF64:$rs2, GPR:$rs1, simm12:$imm12)>; +def PseudoRV32ZdinxSD : Pseudo<(outs), (ins GPRPair:$rs2, GPRNoX0:$rs1, simm12:$imm12), []>; +def : Pat<(store (f64 GPRPair:$rs2), (AddrRegImmINX (XLenVT GPR:$rs1), simm12:$imm12)), + (PseudoRV32ZdinxSD GPRPair:$rs2, GPR:$rs1, simm12:$imm12)>; /// Pseudo-instructions needed for the soft-float ABI with RV32D diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td index 4f87c36506e5..8ebd8b89c119 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -877,6 +877,23 @@ multiclass VPatMultiplyAddSDNode_VV_VX<SDNode op, string instruction_name> { } } +multiclass VPatAVGADD_VV_VX_RM<SDNode vop, int vxrm> { + foreach vti = AllIntegerVectors in { + let Predicates = GetVTypePredicates<vti>.Predicates in { + def : Pat<(vop (vti.Vector vti.RegClass:$rs1), + (vti.Vector vti.RegClass:$rs2)), + (!cast<Instruction>("PseudoVAADDU_VV_"#vti.LMul.MX) + (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs2, + vxrm, vti.AVL, vti.Log2SEW, TA_MA)>; + def : Pat<(vop (vti.Vector vti.RegClass:$rs1), + (vti.Vector (SplatPat (XLenVT GPR:$rs2)))), + (!cast<Instruction>("PseudoVAADDU_VX_"#vti.LMul.MX) + (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, GPR:$rs2, + vxrm, vti.AVL, vti.Log2SEW, TA_MA)>; + } + } +} + //===----------------------------------------------------------------------===// // Patterns. //===----------------------------------------------------------------------===// @@ -1132,20 +1149,8 @@ defm : VPatBinarySDNode_VV_VX<ssubsat, "PseudoVSSUB">; defm : VPatBinarySDNode_VV_VX<usubsat, "PseudoVSSUBU">; // 12.2. Vector Single-Width Averaging Add and Subtract -foreach vti = AllIntegerVectors in { - let Predicates = GetVTypePredicates<vti>.Predicates in { - def : Pat<(avgflooru (vti.Vector vti.RegClass:$rs1), - (vti.Vector vti.RegClass:$rs2)), - (!cast<Instruction>("PseudoVAADDU_VV_"#vti.LMul.MX) - (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs2, - 0b10, vti.AVL, vti.Log2SEW, TA_MA)>; - def : Pat<(avgflooru (vti.Vector vti.RegClass:$rs1), - (vti.Vector (SplatPat (XLenVT GPR:$rs2)))), - (!cast<Instruction>("PseudoVAADDU_VX_"#vti.LMul.MX) - (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, GPR:$rs2, - 0b10, vti.AVL, vti.Log2SEW, TA_MA)>; - } -} +defm : VPatAVGADD_VV_VX_RM<avgflooru, 0b10>; +defm : VPatAVGADD_VV_VX_RM<avgceilu, 0b00>; // 15. Vector Mask Instructions diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index d60ff4b5fab0..1deb9a709463 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -112,6 +112,7 @@ def riscv_cttz_vl : SDNode<"RISCVISD::CTTZ_VL", SDT_RISCVIntUnOp_VL> def riscv_ctpop_vl : SDNode<"RISCVISD::CTPOP_VL", SDT_RISCVIntUnOp_VL>; def riscv_avgflooru_vl : SDNode<"RISCVISD::AVGFLOORU_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>; +def riscv_avgceilu_vl : SDNode<"RISCVISD::AVGCEILU_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>; def riscv_saddsat_vl : SDNode<"RISCVISD::SADDSAT_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>; def riscv_uaddsat_vl : SDNode<"RISCVISD::UADDSAT_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>; def riscv_ssubsat_vl : SDNode<"RISCVISD::SSUBSAT_VL", SDT_RISCVIntBinOp_VL>; @@ -2031,6 +2032,25 @@ multiclass VPatSlide1VL_VF<SDNode vop, string instruction_name> { } } +multiclass VPatAVGADDVL_VV_VX_RM<SDNode vop, int vxrm> { + foreach vti = AllIntegerVectors in { + let Predicates = GetVTypePredicates<vti>.Predicates in { + def : Pat<(vop (vti.Vector vti.RegClass:$rs1), + (vti.Vector vti.RegClass:$rs2), + vti.RegClass:$merge, (vti.Mask V0), VLOpFrag), + (!cast<Instruction>("PseudoVAADDU_VV_"#vti.LMul.MX#"_MASK") + vti.RegClass:$merge, vti.RegClass:$rs1, vti.RegClass:$rs2, + (vti.Mask V0), vxrm, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + def : Pat<(vop (vti.Vector vti.RegClass:$rs1), + (vti.Vector (SplatPat (XLenVT GPR:$rs2))), + vti.RegClass:$merge, (vti.Mask V0), VLOpFrag), + (!cast<Instruction>("PseudoVAADDU_VX_"#vti.LMul.MX#"_MASK") + vti.RegClass:$merge, vti.RegClass:$rs1, GPR:$rs2, + (vti.Mask V0), vxrm, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + } + } +} + //===----------------------------------------------------------------------===// // Patterns. //===----------------------------------------------------------------------===// @@ -2308,22 +2328,8 @@ defm : VPatBinaryVL_VV_VX<riscv_ssubsat_vl, "PseudoVSSUB">; defm : VPatBinaryVL_VV_VX<riscv_usubsat_vl, "PseudoVSSUBU">; // 12.2. Vector Single-Width Averaging Add and Subtract -foreach vti = AllIntegerVectors in { - let Predicates = GetVTypePredicates<vti>.Predicates in { - def : Pat<(riscv_avgflooru_vl (vti.Vector vti.RegClass:$rs1), - (vti.Vector vti.RegClass:$rs2), - vti.RegClass:$merge, (vti.Mask V0), VLOpFrag), - (!cast<Instruction>("PseudoVAADDU_VV_"#vti.LMul.MX#"_MASK") - vti.RegClass:$merge, vti.RegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask V0), 0b10, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(riscv_avgflooru_vl (vti.Vector vti.RegClass:$rs1), - (vti.Vector (SplatPat (XLenVT GPR:$rs2))), - vti.RegClass:$merge, (vti.Mask V0), VLOpFrag), - (!cast<Instruction>("PseudoVAADDU_VX_"#vti.LMul.MX#"_MASK") - vti.RegClass:$merge, vti.RegClass:$rs1, GPR:$rs2, - (vti.Mask V0), 0b10, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - } -} +defm : VPatAVGADDVL_VV_VX_RM<riscv_avgflooru_vl, 0b10>; +defm : VPatAVGADDVL_VV_VX_RM<riscv_avgceilu_vl, 0b00>; // 12.5. Vector Narrowing Fixed-Point Clip Instructions class VPatTruncSatClipMaxMinBase<string inst, diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td index a09f5715b24f..ffcdd0010749 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td @@ -17,15 +17,107 @@ // Zacas (Atomic Compare-and-Swap) //===----------------------------------------------------------------------===// +def GPRPairRV32Operand : AsmOperandClass { + let Name = "GPRPairRV32"; + let ParserMethod = "parseGPRPair<false>"; + let PredicateMethod = "isGPRPair"; + let RenderMethod = "addRegOperands"; +} + +def GPRPairRV64Operand : AsmOperandClass { + let Name = "GPRPairRV64"; + let ParserMethod = "parseGPRPair<true>"; + let PredicateMethod = "isGPRPair"; + let RenderMethod = "addRegOperands"; +} + +def GPRPairRV32 : RegisterOperand<GPRPair> { + let ParserMatchClass = GPRPairRV32Operand; +} + +def GPRPairRV64 : RegisterOperand<GPRPair> { + let ParserMatchClass = GPRPairRV64Operand; +} + +let hasSideEffects = 0, mayLoad = 1, mayStore = 1, Constraints = "$rd = $rd_wb" in +class AMO_cas<bits<5> funct5, bit aq, bit rl, bits<3> funct3, string opcodestr, + DAGOperand RC> + : RVInstRAtomic<funct5, aq, rl, funct3, OPC_AMO, + (outs RC:$rd_wb), (ins RC:$rd, GPRMemZeroOffset:$rs1, RC:$rs2), + opcodestr, "$rd, $rs2, $rs1">; + +multiclass AMO_cas_aq_rl<bits<5> funct5, bits<3> funct3, string opcodestr, + DAGOperand RC> { + def "" : AMO_cas<funct5, 0, 0, funct3, opcodestr, RC>; + def _AQ : AMO_cas<funct5, 1, 0, funct3, opcodestr # ".aq", RC>; + def _RL : AMO_cas<funct5, 0, 1, funct3, opcodestr # ".rl", RC>; + def _AQ_RL : AMO_cas<funct5, 1, 1, funct3, opcodestr # ".aqrl", RC>; +} + let Predicates = [HasStdExtZacas] in { -defm AMOCAS_W : AMO_rr_aq_rl<0b00101, 0b010, "amocas.w">; -defm AMOCAS_D : AMO_rr_aq_rl<0b00101, 0b011, "amocas.d">; +defm AMOCAS_W : AMO_cas_aq_rl<0b00101, 0b010, "amocas.w", GPR>; } // Predicates = [HasStdExtZacas] +let Predicates = [HasStdExtZacas, IsRV32], DecoderNamespace = "RV32Zacas" in { +defm AMOCAS_D_RV32 : AMO_cas_aq_rl<0b00101, 0b011, "amocas.d", GPRPairRV32>; +} // Predicates = [HasStdExtZacas, IsRV32] + let Predicates = [HasStdExtZacas, IsRV64] in { -defm AMOCAS_Q : AMO_rr_aq_rl<0b00101, 0b100, "amocas.q">; +defm AMOCAS_D_RV64 : AMO_cas_aq_rl<0b00101, 0b011, "amocas.d", GPR>; +defm AMOCAS_Q : AMO_cas_aq_rl<0b00101, 0b100, "amocas.q", GPRPairRV64>; } // Predicates = [HasStdExtZacas, IsRV64] +multiclass AMOCASPat<string AtomicOp, string BaseInst, ValueType vt = XLenVT, + list<Predicate> ExtraPreds = []> { + let Predicates = !listconcat([HasStdExtZacas, NotHasStdExtZtso], ExtraPreds) in { + def : Pat<(!cast<PatFrag>(AtomicOp#"_monotonic") (vt GPR:$addr), + (vt GPR:$cmp), + (vt GPR:$new)), + (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>; + def : Pat<(!cast<PatFrag>(AtomicOp#"_acquire") (vt GPR:$addr), + (vt GPR:$cmp), + (vt GPR:$new)), + (!cast<RVInst>(BaseInst#"_AQ") GPR:$cmp, GPR:$addr, GPR:$new)>; + def : Pat<(!cast<PatFrag>(AtomicOp#"_release") (vt GPR:$addr), + (vt GPR:$cmp), + (vt GPR:$new)), + (!cast<RVInst>(BaseInst#"_RL") GPR:$cmp, GPR:$addr, GPR:$new)>; + def : Pat<(!cast<PatFrag>(AtomicOp#"_acq_rel") (vt GPR:$addr), + (vt GPR:$cmp), + (vt GPR:$new)), + (!cast<RVInst>(BaseInst#"_AQ_RL") GPR:$cmp, GPR:$addr, GPR:$new)>; + def : Pat<(!cast<PatFrag>(AtomicOp#"_seq_cst") (vt GPR:$addr), + (vt GPR:$cmp), + (vt GPR:$new)), + (!cast<RVInst>(BaseInst#"_AQ_RL") GPR:$cmp, GPR:$addr, GPR:$new)>; + } // Predicates = !listconcat([HasStdExtZacas, NotHasStdExtZtso], ExtraPreds) + let Predicates = !listconcat([HasStdExtZacas, HasStdExtZtso], ExtraPreds) in { + def : Pat<(!cast<PatFrag>(AtomicOp#"_monotonic") (vt GPR:$addr), + (vt GPR:$cmp), + (vt GPR:$new)), + (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>; + def : Pat<(!cast<PatFrag>(AtomicOp#"_acquire") (vt GPR:$addr), + (vt GPR:$cmp), + (vt GPR:$new)), + (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>; + def : Pat<(!cast<PatFrag>(AtomicOp#"_release") (vt GPR:$addr), + (vt GPR:$cmp), + (vt GPR:$new)), + (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>; + def : Pat<(!cast<PatFrag>(AtomicOp#"_acq_rel") (vt GPR:$addr), + (vt GPR:$cmp), + (vt GPR:$new)), + (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>; + def : Pat<(!cast<PatFrag>(AtomicOp#"_seq_cst") (vt GPR:$addr), + (vt GPR:$cmp), + (vt GPR:$new)), + (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>; + } // Predicates = !listconcat([HasStdExtZacas, HasStdExtZtso], ExtraPreds) +} + +defm : AMOCASPat<"atomic_cmp_swap_32", "AMOCAS_W">; +defm : AMOCASPat<"atomic_cmp_swap_64", "AMOCAS_D_RV64", i64, [IsRV64]>; + //===----------------------------------------------------------------------===// // Zawrs (Wait-on-Reservation-Set) //===----------------------------------------------------------------------===// diff --git a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td index a59d058382fe..5a4d8c4cfece 100644 --- a/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVRegisterInfo.td @@ -63,7 +63,10 @@ def sub_vrm1_5 : ComposedSubRegIndex<sub_vrm2_2, sub_vrm1_1>; def sub_vrm1_6 : ComposedSubRegIndex<sub_vrm2_3, sub_vrm1_0>; def sub_vrm1_7 : ComposedSubRegIndex<sub_vrm2_3, sub_vrm1_1>; -def sub_32_hi : SubRegIndex<32, 32>; +// GPR sizes change with HwMode. +// FIXME: Support HwMode in SubRegIndex? +def sub_gpr_even : SubRegIndex<-1>; +def sub_gpr_odd : SubRegIndex<-1, -1>; } // Namespace = "RISCV" // Integer registers @@ -118,6 +121,8 @@ def XLenVT : ValueTypeByHwMode<[RV32, RV64], // Allow f64 in GPR for ZDINX on RV64. def XLenFVT : ValueTypeByHwMode<[RV64], [f64]>; +def XLenPairFVT : ValueTypeByHwMode<[RV32], + [f64]>; def XLenRI : RegInfoByHwMode< [RV32, RV64], [RegInfo<32,32,32>, RegInfo<64,64,64>]>; @@ -546,33 +551,37 @@ def DUMMY_REG_PAIR_WITH_X0 : RISCVReg<0, "0">; def GPRAll : GPRRegisterClass<(add GPR, DUMMY_REG_PAIR_WITH_X0)>; let RegAltNameIndices = [ABIRegAltName] in { - def X0_PD : RISCVRegWithSubRegs<0, X0.AsmName, - [X0, DUMMY_REG_PAIR_WITH_X0], - X0.AltNames> { - let SubRegIndices = [sub_32, sub_32_hi]; + def X0_Pair : RISCVRegWithSubRegs<0, X0.AsmName, + [X0, DUMMY_REG_PAIR_WITH_X0], + X0.AltNames> { + let SubRegIndices = [sub_gpr_even, sub_gpr_odd]; let CoveredBySubRegs = 1; } foreach I = 1-15 in { defvar Index = !shl(I, 1); + defvar IndexP1 = !add(Index, 1); defvar Reg = !cast<Register>("X"#Index); - defvar RegP1 = !cast<Register>("X"#!add(Index,1)); - def X#Index#_PD : RISCVRegWithSubRegs<Index, Reg.AsmName, - [Reg, RegP1], - Reg.AltNames> { - let SubRegIndices = [sub_32, sub_32_hi]; + defvar RegP1 = !cast<Register>("X"#IndexP1); + def "X" # Index #"_X" # IndexP1 : RISCVRegWithSubRegs<Index, + Reg.AsmName, + [Reg, RegP1], + Reg.AltNames> { + let SubRegIndices = [sub_gpr_even, sub_gpr_odd]; let CoveredBySubRegs = 1; } } } -let RegInfos = RegInfoByHwMode<[RV64], [RegInfo<64, 64, 64>]> in -def GPRPF64 : RegisterClass<"RISCV", [f64], 64, (add - X10_PD, X12_PD, X14_PD, X16_PD, - X6_PD, - X28_PD, X30_PD, - X8_PD, - X18_PD, X20_PD, X22_PD, X24_PD, X26_PD, - X0_PD, X2_PD, X4_PD +let RegInfos = RegInfoByHwMode<[RV32, RV64], + [RegInfo<64, 64, 64>, RegInfo<128, 128, 128>]>, + DecoderMethod = "DecodeGPRPairRegisterClass" in +def GPRPair : RegisterClass<"RISCV", [XLenPairFVT], 64, (add + X10_X11, X12_X13, X14_X15, X16_X17, + X6_X7, + X28_X29, X30_X31, + X8_X9, + X18_X19, X20_X21, X22_X23, X24_X25, X26_X27, + X0_Pair, X2_X3, X4_X5 )>; // The register class is added for inline assembly for vector mask types. diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index 320f91c76057..815eca1240d8 100644 --- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -1649,7 +1649,7 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) { } } if (Node->getValueType(0) == MVT::i128) { - const APInt &Val = cast<ConstantSDNode>(Node)->getAPIntValue(); + const APInt &Val = Node->getAsAPIntVal(); SystemZVectorConstantInfo VCI(Val); if (VCI.isVectorConstantLegal(*Subtarget)) { loadVectorConstant(VCI, Node); diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 2450c6801a66..7d387c7b9f2f 100644 --- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -340,6 +340,13 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, setLibcallName(RTLIB::SHL_I128, nullptr); setLibcallName(RTLIB::SRA_I128, nullptr); + // Also expand 256 bit shifts if i128 is a legal type. + if (isTypeLegal(MVT::i128)) { + setOperationAction(ISD::SRL_PARTS, MVT::i128, Expand); + setOperationAction(ISD::SHL_PARTS, MVT::i128, Expand); + setOperationAction(ISD::SRA_PARTS, MVT::i128, Expand); + } + // Handle bitcast from fp128 to i128. if (!isTypeLegal(MVT::i128)) setOperationAction(ISD::BITCAST, MVT::i128, Custom); diff --git a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp index 15dc44a04395..7f0140a5e8c6 100644 --- a/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp +++ b/contrib/llvm-project/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp @@ -839,9 +839,9 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) { unsigned Reg; - if (Attrs.hasParamAttr(I, Attribute::SExt)) + if (Call->paramHasAttr(I, Attribute::SExt)) Reg = getRegForSignedValue(V); - else if (Attrs.hasParamAttr(I, Attribute::ZExt)) + else if (Call->paramHasAttr(I, Attribute::ZExt)) Reg = getRegForUnsignedValue(V); else Reg = getRegForValue(V); diff --git a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index 304b998e1f26..e006dd877360 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -148,21 +148,25 @@ classifyFirstOpcodeInMacroFusion(unsigned Opcode) { case X86::AND16ri8: case X86::AND16rm: case X86::AND16rr: + case X86::AND16rr_REV: case X86::AND32i32: case X86::AND32ri: case X86::AND32ri8: case X86::AND32rm: case X86::AND32rr: + case X86::AND32rr_REV: case X86::AND64i32: case X86::AND64ri32: case X86::AND64ri8: case X86::AND64rm: case X86::AND64rr: + case X86::AND64rr_REV: case X86::AND8i8: case X86::AND8ri: case X86::AND8ri8: case X86::AND8rm: case X86::AND8rr: + case X86::AND8rr_REV: return FirstMacroFusionInstKind::And; // CMP case X86::CMP16i16: @@ -171,24 +175,28 @@ classifyFirstOpcodeInMacroFusion(unsigned Opcode) { case X86::CMP16ri8: case X86::CMP16rm: case X86::CMP16rr: + case X86::CMP16rr_REV: case X86::CMP32i32: case X86::CMP32mr: case X86::CMP32ri: case X86::CMP32ri8: case X86::CMP32rm: case X86::CMP32rr: + case X86::CMP32rr_REV: case X86::CMP64i32: case X86::CMP64mr: case X86::CMP64ri32: case X86::CMP64ri8: case X86::CMP64rm: case X86::CMP64rr: + case X86::CMP64rr_REV: case X86::CMP8i8: case X86::CMP8mr: case X86::CMP8ri: case X86::CMP8ri8: case X86::CMP8rm: case X86::CMP8rr: + case X86::CMP8rr_REV: return FirstMacroFusionInstKind::Cmp; // ADD case X86::ADD16i16: @@ -196,42 +204,50 @@ classifyFirstOpcodeInMacroFusion(unsigned Opcode) { case X86::ADD16ri8: case X86::ADD16rm: case X86::ADD16rr: + case X86::ADD16rr_REV: case X86::ADD32i32: case X86::ADD32ri: case X86::ADD32ri8: case X86::ADD32rm: case X86::ADD32rr: + case X86::ADD32rr_REV: case X86::ADD64i32: case X86::ADD64ri32: case X86::ADD64ri8: case X86::ADD64rm: case X86::ADD64rr: + case X86::ADD64rr_REV: case X86::ADD8i8: case X86::ADD8ri: case X86::ADD8ri8: case X86::ADD8rm: case X86::ADD8rr: + case X86::ADD8rr_REV: // SUB case X86::SUB16i16: case X86::SUB16ri: case X86::SUB16ri8: case X86::SUB16rm: case X86::SUB16rr: + case X86::SUB16rr_REV: case X86::SUB32i32: case X86::SUB32ri: case X86::SUB32ri8: case X86::SUB32rm: case X86::SUB32rr: + case X86::SUB32rr_REV: case X86::SUB64i32: case X86::SUB64ri32: case X86::SUB64ri8: case X86::SUB64rm: case X86::SUB64rr: + case X86::SUB64rr_REV: case X86::SUB8i8: case X86::SUB8ri: case X86::SUB8ri8: case X86::SUB8rm: case X86::SUB8rr: + case X86::SUB8rr_REV: return FirstMacroFusionInstKind::AddSub; // INC case X86::INC16r: diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp index aad839b83ee1..b13bf361ab79 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp @@ -173,6 +173,7 @@ static FlagArithMnemonic getMnemonicFromOpcode(unsigned Opcode) { #define LLVM_EXPAND_ADC_SBB_INSTR(MNEMONIC) \ LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rr) \ + LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rr_REV) \ LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rm) \ LLVM_EXPAND_INSTR_SIZES(MNEMONIC, mr) \ case X86::MNEMONIC##8ri: \ diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp index 5a28240ea9e2..700ab797b2f6 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2444,6 +2444,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, ISD::SRL, ISD::OR, ISD::AND, + ISD::BITREVERSE, ISD::ADD, ISD::FADD, ISD::FSUB, @@ -4821,8 +4822,8 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, APInt UndefSrcElts(NumSrcElts, 0); SmallVector<APInt, 64> SrcEltBits; - auto *CN = cast<ConstantSDNode>(Op.getOperand(0).getOperand(0)); - SrcEltBits.push_back(CN->getAPIntValue().zextOrTrunc(SrcEltSizeInBits)); + const APInt &C = Op.getOperand(0).getConstantOperandAPInt(0); + SrcEltBits.push_back(C.zextOrTrunc(SrcEltSizeInBits)); SrcEltBits.append(NumSrcElts - 1, APInt(SrcEltSizeInBits, 0)); return CastBitData(UndefSrcElts, SrcEltBits); } @@ -17223,6 +17224,7 @@ static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, "Cannot lower 512-bit vectors w/o basic ISA!"); int NumElts = Mask.size(); + int NumV2Elements = count_if(Mask, [NumElts](int M) { return M >= NumElts; }); // Try to recognize shuffles that are just padding a subvector with zeros. int SubvecElts = 0; @@ -17288,17 +17290,18 @@ static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, Offset += NumElts; // Increment for next iteration. } - // If we're broadcasting a SETCC result, try to broadcast the ops instead. + // If we're performing an unary shuffle on a SETCC result, try to shuffle the + // ops instead. // TODO: What other unary shuffles would benefit from this? - if (isBroadcastShuffleMask(Mask) && V1.getOpcode() == ISD::SETCC && - V1->hasOneUse()) { + if (NumV2Elements == 0 && V1.getOpcode() == ISD::SETCC && V1->hasOneUse()) { SDValue Op0 = V1.getOperand(0); SDValue Op1 = V1.getOperand(1); ISD::CondCode CC = cast<CondCodeSDNode>(V1.getOperand(2))->get(); EVT OpVT = Op0.getValueType(); - return DAG.getSetCC( - DL, VT, DAG.getVectorShuffle(OpVT, DL, Op0, DAG.getUNDEF(OpVT), Mask), - DAG.getVectorShuffle(OpVT, DL, Op1, DAG.getUNDEF(OpVT), Mask), CC); + if (OpVT.getScalarSizeInBits() >= 32 || isBroadcastShuffleMask(Mask)) + return DAG.getSetCC( + DL, VT, DAG.getVectorShuffle(OpVT, DL, Op0, DAG.getUNDEF(OpVT), Mask), + DAG.getVectorShuffle(OpVT, DL, Op1, DAG.getUNDEF(OpVT), Mask), CC); } MVT ExtVT; @@ -22551,7 +22554,7 @@ static SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, // FIXME: Do this for non-constant compares for constant on LHS? if (CmpVT == MVT::i64 && isa<ConstantSDNode>(Op1) && !isX86CCSigned(X86CC) && Op0.hasOneUse() && // Hacky way to not break CSE opportunities with sub. - cast<ConstantSDNode>(Op1)->getAPIntValue().getActiveBits() <= 32 && + Op1->getAsAPIntVal().getActiveBits() <= 32 && DAG.MaskedValueIsZero(Op0, APInt::getHighBitsSet(64, 32))) { CmpVT = MVT::i32; Op0 = DAG.getNode(ISD::TRUNCATE, dl, CmpVT, Op0); @@ -47029,8 +47032,8 @@ static SDValue combineShiftRightArithmetic(SDNode *N, SelectionDAG &DAG, SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); - APInt ShlConst = (cast<ConstantSDNode>(N01))->getAPIntValue(); - APInt SarConst = (cast<ConstantSDNode>(N1))->getAPIntValue(); + APInt ShlConst = N01->getAsAPIntVal(); + APInt SarConst = N1->getAsAPIntVal(); EVT CVT = N1.getValueType(); if (SarConst.isNegative()) @@ -51835,6 +51838,33 @@ static SDValue combineXor(SDNode *N, SelectionDAG &DAG, return combineFneg(N, DAG, DCI, Subtarget); } +static SDValue combineBITREVERSE(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget &Subtarget) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // Convert a (iX bitreverse(bitcast(vXi1 X))) -> (iX bitcast(shuffle(X))) + if (VT.isInteger() && N0.getOpcode() == ISD::BITCAST && N0.hasOneUse()) { + SDValue Src = N0.getOperand(0); + EVT SrcVT = Src.getValueType(); + if (SrcVT.isVector() && SrcVT.getScalarType() == MVT::i1 && + (DCI.isBeforeLegalize() || + DAG.getTargetLoweringInfo().isTypeLegal(SrcVT)) && + Subtarget.hasSSSE3()) { + unsigned NumElts = SrcVT.getVectorNumElements(); + SmallVector<int, 32> ReverseMask(NumElts); + for (unsigned I = 0; I != NumElts; ++I) + ReverseMask[I] = (NumElts - 1) - I; + SDValue Rev = + DAG.getVectorShuffle(SrcVT, SDLoc(N), Src, Src, ReverseMask); + return DAG.getBitcast(VT, Rev); + } + } + + return SDValue(); +} + static SDValue combineBEXTR(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { @@ -56124,6 +56154,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::AND: return combineAnd(N, DAG, DCI, Subtarget); case ISD::OR: return combineOr(N, DAG, DCI, Subtarget); case ISD::XOR: return combineXor(N, DAG, DCI, Subtarget); + case ISD::BITREVERSE: return combineBITREVERSE(N, DAG, DCI, Subtarget); case X86ISD::BEXTR: case X86ISD::BEXTRI: return combineBEXTR(N, DAG, DCI, Subtarget); case ISD::LOAD: return combineLoad(N, DAG, DCI, Subtarget); diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrArithmetic.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrArithmetic.td index 5cfa95e085e3..76b0fe5f5cad 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrArithmetic.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrArithmetic.td @@ -1107,43 +1107,85 @@ def : Pat<(store (X86adc_flag GR64:$src, (loadi64 addr:$dst), EFLAGS), // Patterns for basic arithmetic ops with relocImm for the immediate field. multiclass ArithBinOp_RF_relocImm_Pats<SDNode OpNodeFlag, SDNode OpNode> { - def : Pat<(OpNodeFlag GR8:$src1, relocImm8_su:$src2), - (!cast<Instruction>(NAME#"8ri") GR8:$src1, relocImm8_su:$src2)>; - def : Pat<(OpNodeFlag GR16:$src1, relocImm16_su:$src2), - (!cast<Instruction>(NAME#"16ri") GR16:$src1, relocImm16_su:$src2)>; - def : Pat<(OpNodeFlag GR32:$src1, relocImm32_su:$src2), - (!cast<Instruction>(NAME#"32ri") GR32:$src1, relocImm32_su:$src2)>; - def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt32_su:$src2), - (!cast<Instruction>(NAME#"64ri32") GR64:$src1, i64relocImmSExt32_su:$src2)>; - - def : Pat<(store (OpNode (load addr:$dst), relocImm8_su:$src), addr:$dst), - (!cast<Instruction>(NAME#"8mi") addr:$dst, relocImm8_su:$src)>; - def : Pat<(store (OpNode (load addr:$dst), relocImm16_su:$src), addr:$dst), - (!cast<Instruction>(NAME#"16mi") addr:$dst, relocImm16_su:$src)>; - def : Pat<(store (OpNode (load addr:$dst), relocImm32_su:$src), addr:$dst), - (!cast<Instruction>(NAME#"32mi") addr:$dst, relocImm32_su:$src)>; - def : Pat<(store (OpNode (load addr:$dst), i64relocImmSExt32_su:$src), addr:$dst), - (!cast<Instruction>(NAME#"64mi32") addr:$dst, i64relocImmSExt32_su:$src)>; + let Predicates = [NoNDD] in { + def : Pat<(OpNodeFlag GR8:$src1, relocImm8_su:$src2), + (!cast<Instruction>(NAME#"8ri") GR8:$src1, relocImm8_su:$src2)>; + def : Pat<(OpNodeFlag GR16:$src1, relocImm16_su:$src2), + (!cast<Instruction>(NAME#"16ri") GR16:$src1, relocImm16_su:$src2)>; + def : Pat<(OpNodeFlag GR32:$src1, relocImm32_su:$src2), + (!cast<Instruction>(NAME#"32ri") GR32:$src1, relocImm32_su:$src2)>; + def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt32_su:$src2), + (!cast<Instruction>(NAME#"64ri32") GR64:$src1, i64relocImmSExt32_su:$src2)>; + + def : Pat<(store (OpNode (load addr:$dst), relocImm8_su:$src), addr:$dst), + (!cast<Instruction>(NAME#"8mi") addr:$dst, relocImm8_su:$src)>; + def : Pat<(store (OpNode (load addr:$dst), relocImm16_su:$src), addr:$dst), + (!cast<Instruction>(NAME#"16mi") addr:$dst, relocImm16_su:$src)>; + def : Pat<(store (OpNode (load addr:$dst), relocImm32_su:$src), addr:$dst), + (!cast<Instruction>(NAME#"32mi") addr:$dst, relocImm32_su:$src)>; + def : Pat<(store (OpNode (load addr:$dst), i64relocImmSExt32_su:$src), addr:$dst), + (!cast<Instruction>(NAME#"64mi32") addr:$dst, i64relocImmSExt32_su:$src)>; + } + let Predicates = [HasNDD] in { + def : Pat<(OpNodeFlag GR8:$src1, relocImm8_su:$src2), + (!cast<Instruction>(NAME#"8ri_ND") GR8:$src1, relocImm8_su:$src2)>; + def : Pat<(OpNodeFlag GR16:$src1, relocImm16_su:$src2), + (!cast<Instruction>(NAME#"16ri_ND") GR16:$src1, relocImm16_su:$src2)>; + def : Pat<(OpNodeFlag GR32:$src1, relocImm32_su:$src2), + (!cast<Instruction>(NAME#"32ri_ND") GR32:$src1, relocImm32_su:$src2)>; + def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt32_su:$src2), + (!cast<Instruction>(NAME#"64ri32_ND") GR64:$src1, i64relocImmSExt32_su:$src2)>; + + def : Pat<(OpNode (load addr:$dst), relocImm8_su:$src), + (!cast<Instruction>(NAME#"8mi_ND") addr:$dst, relocImm8_su:$src)>; + def : Pat<(OpNode (load addr:$dst), relocImm16_su:$src), + (!cast<Instruction>(NAME#"16mi_ND") addr:$dst, relocImm16_su:$src)>; + def : Pat<(OpNode (load addr:$dst), relocImm32_su:$src), + (!cast<Instruction>(NAME#"32mi_ND") addr:$dst, relocImm32_su:$src)>; + def : Pat<(OpNode (load addr:$dst), i64relocImmSExt32_su:$src), + (!cast<Instruction>(NAME#"64mi32_ND") addr:$dst, i64relocImmSExt32_su:$src)>; + } } multiclass ArithBinOp_RFF_relocImm_Pats<SDNode OpNodeFlag> { - def : Pat<(OpNodeFlag GR8:$src1, relocImm8_su:$src2, EFLAGS), - (!cast<Instruction>(NAME#"8ri") GR8:$src1, relocImm8_su:$src2)>; - def : Pat<(OpNodeFlag GR16:$src1, relocImm16_su:$src2, EFLAGS), - (!cast<Instruction>(NAME#"16ri") GR16:$src1, relocImm16_su:$src2)>; - def : Pat<(OpNodeFlag GR32:$src1, relocImm32_su:$src2, EFLAGS), - (!cast<Instruction>(NAME#"32ri") GR32:$src1, relocImm32_su:$src2)>; - def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt32_su:$src2, EFLAGS), - (!cast<Instruction>(NAME#"64ri32") GR64:$src1, i64relocImmSExt32_su:$src2)>; - - def : Pat<(store (OpNodeFlag (load addr:$dst), relocImm8_su:$src, EFLAGS), addr:$dst), - (!cast<Instruction>(NAME#"8mi") addr:$dst, relocImm8_su:$src)>; - def : Pat<(store (OpNodeFlag (load addr:$dst), relocImm16_su:$src, EFLAGS), addr:$dst), - (!cast<Instruction>(NAME#"16mi") addr:$dst, relocImm16_su:$src)>; - def : Pat<(store (OpNodeFlag (load addr:$dst), relocImm32_su:$src, EFLAGS), addr:$dst), - (!cast<Instruction>(NAME#"32mi") addr:$dst, relocImm32_su:$src)>; - def : Pat<(store (OpNodeFlag (load addr:$dst), i64relocImmSExt32_su:$src, EFLAGS), addr:$dst), - (!cast<Instruction>(NAME#"64mi32") addr:$dst, i64relocImmSExt32_su:$src)>; + let Predicates = [NoNDD] in { + def : Pat<(OpNodeFlag GR8:$src1, relocImm8_su:$src2, EFLAGS), + (!cast<Instruction>(NAME#"8ri") GR8:$src1, relocImm8_su:$src2)>; + def : Pat<(OpNodeFlag GR16:$src1, relocImm16_su:$src2, EFLAGS), + (!cast<Instruction>(NAME#"16ri") GR16:$src1, relocImm16_su:$src2)>; + def : Pat<(OpNodeFlag GR32:$src1, relocImm32_su:$src2, EFLAGS), + (!cast<Instruction>(NAME#"32ri") GR32:$src1, relocImm32_su:$src2)>; + def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt32_su:$src2, EFLAGS), + (!cast<Instruction>(NAME#"64ri32") GR64:$src1, i64relocImmSExt32_su:$src2)>; + + def : Pat<(store (OpNodeFlag (load addr:$dst), relocImm8_su:$src, EFLAGS), addr:$dst), + (!cast<Instruction>(NAME#"8mi") addr:$dst, relocImm8_su:$src)>; + def : Pat<(store (OpNodeFlag (load addr:$dst), relocImm16_su:$src, EFLAGS), addr:$dst), + (!cast<Instruction>(NAME#"16mi") addr:$dst, relocImm16_su:$src)>; + def : Pat<(store (OpNodeFlag (load addr:$dst), relocImm32_su:$src, EFLAGS), addr:$dst), + (!cast<Instruction>(NAME#"32mi") addr:$dst, relocImm32_su:$src)>; + def : Pat<(store (OpNodeFlag (load addr:$dst), i64relocImmSExt32_su:$src, EFLAGS), addr:$dst), + (!cast<Instruction>(NAME#"64mi32") addr:$dst, i64relocImmSExt32_su:$src)>; + } + let Predicates = [HasNDD] in { + def : Pat<(OpNodeFlag GR8:$src1, relocImm8_su:$src2, EFLAGS), + (!cast<Instruction>(NAME#"8ri_ND") GR8:$src1, relocImm8_su:$src2)>; + def : Pat<(OpNodeFlag GR16:$src1, relocImm16_su:$src2, EFLAGS), + (!cast<Instruction>(NAME#"16ri_ND") GR16:$src1, relocImm16_su:$src2)>; + def : Pat<(OpNodeFlag GR32:$src1, relocImm32_su:$src2, EFLAGS), + (!cast<Instruction>(NAME#"32ri_ND") GR32:$src1, relocImm32_su:$src2)>; + def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt32_su:$src2, EFLAGS), + (!cast<Instruction>(NAME#"64ri32_ND") GR64:$src1, i64relocImmSExt32_su:$src2)>; + + def : Pat<(OpNodeFlag (load addr:$dst), relocImm8_su:$src, EFLAGS), + (!cast<Instruction>(NAME#"8mi_ND") addr:$dst, relocImm8_su:$src)>; + def : Pat<(OpNodeFlag (load addr:$dst), relocImm16_su:$src, EFLAGS), + (!cast<Instruction>(NAME#"16mi_ND") addr:$dst, relocImm16_su:$src)>; + def : Pat<(OpNodeFlag (load addr:$dst), relocImm32_su:$src, EFLAGS), + (!cast<Instruction>(NAME#"32mi_ND") addr:$dst, relocImm32_su:$src)>; + def : Pat<(OpNodeFlag (load addr:$dst), i64relocImmSExt32_su:$src, EFLAGS), + (!cast<Instruction>(NAME#"64mi32_ND") addr:$dst, i64relocImmSExt32_su:$src)>; + } } multiclass ArithBinOp_F_relocImm_Pats<SDNode OpNodeFlag> { diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrCompiler.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrCompiler.td index c77c77ee4a3e..422391a6e02a 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrCompiler.td @@ -1550,13 +1550,24 @@ def : Pat<(X86add_flag_nocf GR64:$src1, 0x0000000080000000), // AddedComplexity is needed to give priority over i64immSExt8 and i64immSExt32. let AddedComplexity = 1 in { -def : Pat<(and GR64:$src, i64immZExt32:$imm), - (SUBREG_TO_REG - (i64 0), - (AND32ri - (EXTRACT_SUBREG GR64:$src, sub_32bit), - (i32 (GetLo32XForm imm:$imm))), - sub_32bit)>; + let Predicates = [NoNDD] in { + def : Pat<(and GR64:$src, i64immZExt32:$imm), + (SUBREG_TO_REG + (i64 0), + (AND32ri + (EXTRACT_SUBREG GR64:$src, sub_32bit), + (i32 (GetLo32XForm imm:$imm))), + sub_32bit)>; + } + let Predicates = [HasNDD] in { + def : Pat<(and GR64:$src, i64immZExt32:$imm), + (SUBREG_TO_REG + (i64 0), + (AND32ri_ND + (EXTRACT_SUBREG GR64:$src, sub_32bit), + (i32 (GetLo32XForm imm:$imm))), + sub_32bit)>; + } } // AddedComplexity = 1 @@ -1762,10 +1773,18 @@ def : Pat<(X86xor_flag (i8 (trunc GR32:$src)), // where the least significant bit is not 0. However, the probability of this // happening is considered low enough that this is officially not a // "real problem". -def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>; -def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>; -def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>; -def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>; +let Predicates = [NoNDD] in { + def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>; + def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>; + def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>; + def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>; +} +let Predicates = [HasNDD] in { + def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr_ND GR8 :$src1, GR8 :$src1)>; + def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr_ND GR16:$src1, GR16:$src1)>; + def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr_ND GR32:$src1, GR32:$src1)>; + def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr_ND GR64:$src1, GR64:$src1)>; +} // Shift amount is implicitly masked. multiclass MaskedShiftAmountPats<SDNode frag, string name> { @@ -1937,75 +1956,179 @@ defm : one_bit_patterns<GR64, i64, BTR64rr, BTS64rr, BTC64rr, shiftMask64>; // EFLAGS-defining Patterns //===----------------------------------------------------------------------===// -// add reg, reg -def : Pat<(add GR8 :$src1, GR8 :$src2), (ADD8rr GR8 :$src1, GR8 :$src2)>; -def : Pat<(add GR16:$src1, GR16:$src2), (ADD16rr GR16:$src1, GR16:$src2)>; -def : Pat<(add GR32:$src1, GR32:$src2), (ADD32rr GR32:$src1, GR32:$src2)>; -def : Pat<(add GR64:$src1, GR64:$src2), (ADD64rr GR64:$src1, GR64:$src2)>; - -// add reg, mem -def : Pat<(add GR8:$src1, (loadi8 addr:$src2)), - (ADD8rm GR8:$src1, addr:$src2)>; -def : Pat<(add GR16:$src1, (loadi16 addr:$src2)), - (ADD16rm GR16:$src1, addr:$src2)>; -def : Pat<(add GR32:$src1, (loadi32 addr:$src2)), - (ADD32rm GR32:$src1, addr:$src2)>; -def : Pat<(add GR64:$src1, (loadi64 addr:$src2)), - (ADD64rm GR64:$src1, addr:$src2)>; - -// add reg, imm -def : Pat<(add GR8 :$src1, imm:$src2), (ADD8ri GR8:$src1 , imm:$src2)>; -def : Pat<(add GR16:$src1, imm:$src2), (ADD16ri GR16:$src1, imm:$src2)>; -def : Pat<(add GR32:$src1, imm:$src2), (ADD32ri GR32:$src1, imm:$src2)>; -def : Pat<(add GR64:$src1, i64immSExt32:$src2), (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>; - -// sub reg, reg -def : Pat<(sub GR8 :$src1, GR8 :$src2), (SUB8rr GR8 :$src1, GR8 :$src2)>; -def : Pat<(sub GR16:$src1, GR16:$src2), (SUB16rr GR16:$src1, GR16:$src2)>; -def : Pat<(sub GR32:$src1, GR32:$src2), (SUB32rr GR32:$src1, GR32:$src2)>; -def : Pat<(sub GR64:$src1, GR64:$src2), (SUB64rr GR64:$src1, GR64:$src2)>; - -// sub reg, mem -def : Pat<(sub GR8:$src1, (loadi8 addr:$src2)), - (SUB8rm GR8:$src1, addr:$src2)>; -def : Pat<(sub GR16:$src1, (loadi16 addr:$src2)), - (SUB16rm GR16:$src1, addr:$src2)>; -def : Pat<(sub GR32:$src1, (loadi32 addr:$src2)), - (SUB32rm GR32:$src1, addr:$src2)>; -def : Pat<(sub GR64:$src1, (loadi64 addr:$src2)), - (SUB64rm GR64:$src1, addr:$src2)>; - -// sub reg, imm -def : Pat<(sub GR8:$src1, imm:$src2), - (SUB8ri GR8:$src1, imm:$src2)>; -def : Pat<(sub GR16:$src1, imm:$src2), - (SUB16ri GR16:$src1, imm:$src2)>; -def : Pat<(sub GR32:$src1, imm:$src2), - (SUB32ri GR32:$src1, imm:$src2)>; -def : Pat<(sub GR64:$src1, i64immSExt32:$src2), - (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>; - -// sub 0, reg -def : Pat<(X86sub_flag 0, GR8 :$src), (NEG8r GR8 :$src)>; -def : Pat<(X86sub_flag 0, GR16:$src), (NEG16r GR16:$src)>; -def : Pat<(X86sub_flag 0, GR32:$src), (NEG32r GR32:$src)>; -def : Pat<(X86sub_flag 0, GR64:$src), (NEG64r GR64:$src)>; - -// mul reg, reg -def : Pat<(mul GR16:$src1, GR16:$src2), - (IMUL16rr GR16:$src1, GR16:$src2)>; -def : Pat<(mul GR32:$src1, GR32:$src2), - (IMUL32rr GR32:$src1, GR32:$src2)>; -def : Pat<(mul GR64:$src1, GR64:$src2), - (IMUL64rr GR64:$src1, GR64:$src2)>; - -// mul reg, mem -def : Pat<(mul GR16:$src1, (loadi16 addr:$src2)), - (IMUL16rm GR16:$src1, addr:$src2)>; -def : Pat<(mul GR32:$src1, (loadi32 addr:$src2)), - (IMUL32rm GR32:$src1, addr:$src2)>; -def : Pat<(mul GR64:$src1, (loadi64 addr:$src2)), - (IMUL64rm GR64:$src1, addr:$src2)>; +multiclass EFLAGSDefiningPats<string suffix, Predicate p> { + let Predicates = [p] in { + // add reg, reg + def : Pat<(add GR8 :$src1, GR8 :$src2), (!cast<Instruction>(ADD8rr#suffix) GR8 :$src1, GR8 :$src2)>; + def : Pat<(add GR16:$src1, GR16:$src2), (!cast<Instruction>(ADD16rr#suffix) GR16:$src1, GR16:$src2)>; + def : Pat<(add GR32:$src1, GR32:$src2), (!cast<Instruction>(ADD32rr#suffix) GR32:$src1, GR32:$src2)>; + def : Pat<(add GR64:$src1, GR64:$src2), (!cast<Instruction>(ADD64rr#suffix) GR64:$src1, GR64:$src2)>; + + // add reg, mem + def : Pat<(add GR8:$src1, (loadi8 addr:$src2)), + (!cast<Instruction>(ADD8rm#suffix) GR8:$src1, addr:$src2)>; + def : Pat<(add GR16:$src1, (loadi16 addr:$src2)), + (!cast<Instruction>(ADD16rm#suffix) GR16:$src1, addr:$src2)>; + def : Pat<(add GR32:$src1, (loadi32 addr:$src2)), + (!cast<Instruction>(ADD32rm#suffix) GR32:$src1, addr:$src2)>; + def : Pat<(add GR64:$src1, (loadi64 addr:$src2)), + (!cast<Instruction>(ADD64rm#suffix) GR64:$src1, addr:$src2)>; + + // add reg, imm + def : Pat<(add GR8 :$src1, imm:$src2), (!cast<Instruction>(ADD8ri#suffix) GR8:$src1 , imm:$src2)>; + def : Pat<(add GR16:$src1, imm:$src2), (!cast<Instruction>(ADD16ri#suffix) GR16:$src1, imm:$src2)>; + def : Pat<(add GR32:$src1, imm:$src2), (!cast<Instruction>(ADD32ri#suffix) GR32:$src1, imm:$src2)>; + def : Pat<(add GR64:$src1, i64immSExt32:$src2), (!cast<Instruction>(ADD64ri32#suffix) GR64:$src1, i64immSExt32:$src2)>; + + // sub reg, reg + def : Pat<(sub GR8 :$src1, GR8 :$src2), (!cast<Instruction>(SUB8rr#suffix) GR8 :$src1, GR8 :$src2)>; + def : Pat<(sub GR16:$src1, GR16:$src2), (!cast<Instruction>(SUB16rr#suffix) GR16:$src1, GR16:$src2)>; + def : Pat<(sub GR32:$src1, GR32:$src2), (!cast<Instruction>(SUB32rr#suffix) GR32:$src1, GR32:$src2)>; + def : Pat<(sub GR64:$src1, GR64:$src2), (!cast<Instruction>(SUB64rr#suffix) GR64:$src1, GR64:$src2)>; + + // sub reg, mem + def : Pat<(sub GR8:$src1, (loadi8 addr:$src2)), + (!cast<Instruction>(SUB8rm#suffix) GR8:$src1, addr:$src2)>; + def : Pat<(sub GR16:$src1, (loadi16 addr:$src2)), + (!cast<Instruction>(SUB16rm#suffix) GR16:$src1, addr:$src2)>; + def : Pat<(sub GR32:$src1, (loadi32 addr:$src2)), + (!cast<Instruction>(SUB32rm#suffix) GR32:$src1, addr:$src2)>; + def : Pat<(sub GR64:$src1, (loadi64 addr:$src2)), + (!cast<Instruction>(SUB64rm#suffix) GR64:$src1, addr:$src2)>; + + // sub reg, imm + def : Pat<(sub GR8:$src1, imm:$src2), + (!cast<Instruction>(SUB8ri#suffix) GR8:$src1, imm:$src2)>; + def : Pat<(sub GR16:$src1, imm:$src2), + (!cast<Instruction>(SUB16ri#suffix) GR16:$src1, imm:$src2)>; + def : Pat<(sub GR32:$src1, imm:$src2), + (!cast<Instruction>(SUB32ri#suffix) GR32:$src1, imm:$src2)>; + def : Pat<(sub GR64:$src1, i64immSExt32:$src2), + (!cast<Instruction>(SUB64ri32#suffix) GR64:$src1, i64immSExt32:$src2)>; + + // sub 0, reg + def : Pat<(X86sub_flag 0, GR8 :$src), (!cast<Instruction>(NEG8r#suffix) GR8 :$src)>; + def : Pat<(X86sub_flag 0, GR16:$src), (!cast<Instruction>(NEG16r#suffix) GR16:$src)>; + def : Pat<(X86sub_flag 0, GR32:$src), (!cast<Instruction>(NEG32r#suffix) GR32:$src)>; + def : Pat<(X86sub_flag 0, GR64:$src), (!cast<Instruction>(NEG64r#suffix) GR64:$src)>; + + // mul reg, reg + def : Pat<(mul GR16:$src1, GR16:$src2), + (!cast<Instruction>(IMUL16rr#suffix) GR16:$src1, GR16:$src2)>; + def : Pat<(mul GR32:$src1, GR32:$src2), + (!cast<Instruction>(IMUL32rr#suffix) GR32:$src1, GR32:$src2)>; + def : Pat<(mul GR64:$src1, GR64:$src2), + (!cast<Instruction>(IMUL64rr#suffix) GR64:$src1, GR64:$src2)>; + + // mul reg, mem + def : Pat<(mul GR16:$src1, (loadi16 addr:$src2)), + (!cast<Instruction>(IMUL16rm#suffix) GR16:$src1, addr:$src2)>; + def : Pat<(mul GR32:$src1, (loadi32 addr:$src2)), + (!cast<Instruction>(IMUL32rm#suffix) GR32:$src1, addr:$src2)>; + def : Pat<(mul GR64:$src1, (loadi64 addr:$src2)), + (!cast<Instruction>(IMUL64rm#suffix) GR64:$src1, addr:$src2)>; + + // or reg/reg. + def : Pat<(or GR8 :$src1, GR8 :$src2), (!cast<Instruction>(OR8rr#suffix) GR8 :$src1, GR8 :$src2)>; + def : Pat<(or GR16:$src1, GR16:$src2), (!cast<Instruction>(OR16rr#suffix) GR16:$src1, GR16:$src2)>; + def : Pat<(or GR32:$src1, GR32:$src2), (!cast<Instruction>(OR32rr#suffix) GR32:$src1, GR32:$src2)>; + def : Pat<(or GR64:$src1, GR64:$src2), (!cast<Instruction>(OR64rr#suffix) GR64:$src1, GR64:$src2)>; + + // or reg/mem + def : Pat<(or GR8:$src1, (loadi8 addr:$src2)), + (!cast<Instruction>(OR8rm#suffix) GR8:$src1, addr:$src2)>; + def : Pat<(or GR16:$src1, (loadi16 addr:$src2)), + (!cast<Instruction>(OR16rm#suffix) GR16:$src1, addr:$src2)>; + def : Pat<(or GR32:$src1, (loadi32 addr:$src2)), + (!cast<Instruction>(OR32rm#suffix) GR32:$src1, addr:$src2)>; + def : Pat<(or GR64:$src1, (loadi64 addr:$src2)), + (!cast<Instruction>(OR64rm#suffix) GR64:$src1, addr:$src2)>; + + // or reg/imm + def : Pat<(or GR8:$src1 , imm:$src2), (!cast<Instruction>(OR8ri#suffix) GR8 :$src1, imm:$src2)>; + def : Pat<(or GR16:$src1, imm:$src2), (!cast<Instruction>(OR16ri#suffix) GR16:$src1, imm:$src2)>; + def : Pat<(or GR32:$src1, imm:$src2), (!cast<Instruction>(OR32ri#suffix) GR32:$src1, imm:$src2)>; + def : Pat<(or GR64:$src1, i64immSExt32:$src2), + (!cast<Instruction>(OR64ri32#suffix) GR64:$src1, i64immSExt32:$src2)>; + + // xor reg/reg + def : Pat<(xor GR8 :$src1, GR8 :$src2), (!cast<Instruction>(XOR8rr#suffix) GR8 :$src1, GR8 :$src2)>; + def : Pat<(xor GR16:$src1, GR16:$src2), (!cast<Instruction>(XOR16rr#suffix) GR16:$src1, GR16:$src2)>; + def : Pat<(xor GR32:$src1, GR32:$src2), (!cast<Instruction>(XOR32rr#suffix) GR32:$src1, GR32:$src2)>; + def : Pat<(xor GR64:$src1, GR64:$src2), (!cast<Instruction>(XOR64rr#suffix) GR64:$src1, GR64:$src2)>; + + // xor reg/mem + def : Pat<(xor GR8:$src1, (loadi8 addr:$src2)), + (!cast<Instruction>(XOR8rm#suffix) GR8:$src1, addr:$src2)>; + def : Pat<(xor GR16:$src1, (loadi16 addr:$src2)), + (!cast<Instruction>(XOR16rm#suffix) GR16:$src1, addr:$src2)>; + def : Pat<(xor GR32:$src1, (loadi32 addr:$src2)), + (!cast<Instruction>(XOR32rm#suffix) GR32:$src1, addr:$src2)>; + def : Pat<(xor GR64:$src1, (loadi64 addr:$src2)), + (!cast<Instruction>(XOR64rm#suffix) GR64:$src1, addr:$src2)>; + + // xor reg/imm + def : Pat<(xor GR8:$src1, imm:$src2), + (!cast<Instruction>(XOR8ri#suffix) GR8:$src1, imm:$src2)>; + def : Pat<(xor GR16:$src1, imm:$src2), + (!cast<Instruction>(XOR16ri#suffix) GR16:$src1, imm:$src2)>; + def : Pat<(xor GR32:$src1, imm:$src2), + (!cast<Instruction>(XOR32ri#suffix) GR32:$src1, imm:$src2)>; + def : Pat<(xor GR64:$src1, i64immSExt32:$src2), + (!cast<Instruction>(XOR64ri32#suffix) GR64:$src1, i64immSExt32:$src2)>; + + // and reg/reg + def : Pat<(and GR8 :$src1, GR8 :$src2), (!cast<Instruction>(AND8rr#suffix) GR8 :$src1, GR8 :$src2)>; + def : Pat<(and GR16:$src1, GR16:$src2), (!cast<Instruction>(AND16rr#suffix) GR16:$src1, GR16:$src2)>; + def : Pat<(and GR32:$src1, GR32:$src2), (!cast<Instruction>(AND32rr#suffix) GR32:$src1, GR32:$src2)>; + def : Pat<(and GR64:$src1, GR64:$src2), (!cast<Instruction>(AND64rr#suffix) GR64:$src1, GR64:$src2)>; + + // and reg/mem + def : Pat<(and GR8:$src1, (loadi8 addr:$src2)), + (!cast<Instruction>(AND8rm#suffix) GR8:$src1, addr:$src2)>; + def : Pat<(and GR16:$src1, (loadi16 addr:$src2)), + (!cast<Instruction>(AND16rm#suffix) GR16:$src1, addr:$src2)>; + def : Pat<(and GR32:$src1, (loadi32 addr:$src2)), + (!cast<Instruction>(AND32rm#suffix) GR32:$src1, addr:$src2)>; + def : Pat<(and GR64:$src1, (loadi64 addr:$src2)), + (!cast<Instruction>(AND64rm#suffix) GR64:$src1, addr:$src2)>; + + // and reg/imm + def : Pat<(and GR8:$src1, imm:$src2), + (!cast<Instruction>(AND8ri#suffix) GR8:$src1, imm:$src2)>; + def : Pat<(and GR16:$src1, imm:$src2), + (!cast<Instruction>(AND16ri#suffix) GR16:$src1, imm:$src2)>; + def : Pat<(and GR32:$src1, imm:$src2), + (!cast<Instruction>(AND32ri#suffix) GR32:$src1, imm:$src2)>; + def : Pat<(and GR64:$src1, i64immSExt32:$src2), + (!cast<Instruction>(AND64ri32#suffix) GR64:$src1, i64immSExt32:$src2)>; + } + + // Increment/Decrement reg. + // Do not make INC/DEC if it is slow + let Predicates = [UseIncDec, p] in { + def : Pat<(add GR8:$src, 1), (!cast<Instruction>(INC8r#suffix) GR8:$src)>; + def : Pat<(add GR16:$src, 1), (!cast<Instruction>(INC16r#suffix) GR16:$src)>; + def : Pat<(add GR32:$src, 1), (!cast<Instruction>(INC32r#suffix) GR32:$src)>; + def : Pat<(add GR64:$src, 1), (!cast<Instruction>(INC64r#suffix) GR64:$src)>; + def : Pat<(add GR8:$src, -1), (!cast<Instruction>(DEC8r#suffix) GR8:$src)>; + def : Pat<(add GR16:$src, -1), (!cast<Instruction>(DEC16r#suffix) GR16:$src)>; + def : Pat<(add GR32:$src, -1), (!cast<Instruction>(DEC32r#suffix) GR32:$src)>; + def : Pat<(add GR64:$src, -1), (!cast<Instruction>(DEC64r#suffix) GR64:$src)>; + + def : Pat<(X86add_flag_nocf GR8:$src, -1), (!cast<Instruction>(DEC8r#suffix) GR8:$src)>; + def : Pat<(X86add_flag_nocf GR16:$src, -1), (!cast<Instruction>(DEC16r#suffix) GR16:$src)>; + def : Pat<(X86add_flag_nocf GR32:$src, -1), (!cast<Instruction>(DEC32r#suffix) GR32:$src)>; + def : Pat<(X86add_flag_nocf GR64:$src, -1), (!cast<Instruction>(DEC64r#suffix) GR64:$src)>; + def : Pat<(X86sub_flag_nocf GR8:$src, -1), (!cast<Instruction>(INC8r#suffix) GR8:$src)>; + def : Pat<(X86sub_flag_nocf GR16:$src, -1), (!cast<Instruction>(INC16r#suffix) GR16:$src)>; + def : Pat<(X86sub_flag_nocf GR32:$src, -1), (!cast<Instruction>(INC32r#suffix) GR32:$src)>; + def : Pat<(X86sub_flag_nocf GR64:$src, -1), (!cast<Instruction>(INC64r#suffix) GR64:$src)>; + } +} + +defm : EFLAGSDefiningPats<"", NoNDD>; +defm : EFLAGSDefiningPats<"_ND", HasNDD>; // mul reg, imm def : Pat<(mul GR16:$src1, imm:$src2), @@ -2023,103 +2146,6 @@ def : Pat<(mul (loadi32 addr:$src1), imm:$src2), def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2), (IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>; -// Increment/Decrement reg. -// Do not make INC/DEC if it is slow -let Predicates = [UseIncDec] in { - def : Pat<(add GR8:$src, 1), (INC8r GR8:$src)>; - def : Pat<(add GR16:$src, 1), (INC16r GR16:$src)>; - def : Pat<(add GR32:$src, 1), (INC32r GR32:$src)>; - def : Pat<(add GR64:$src, 1), (INC64r GR64:$src)>; - def : Pat<(add GR8:$src, -1), (DEC8r GR8:$src)>; - def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>; - def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>; - def : Pat<(add GR64:$src, -1), (DEC64r GR64:$src)>; - - def : Pat<(X86add_flag_nocf GR8:$src, -1), (DEC8r GR8:$src)>; - def : Pat<(X86add_flag_nocf GR16:$src, -1), (DEC16r GR16:$src)>; - def : Pat<(X86add_flag_nocf GR32:$src, -1), (DEC32r GR32:$src)>; - def : Pat<(X86add_flag_nocf GR64:$src, -1), (DEC64r GR64:$src)>; - def : Pat<(X86sub_flag_nocf GR8:$src, -1), (INC8r GR8:$src)>; - def : Pat<(X86sub_flag_nocf GR16:$src, -1), (INC16r GR16:$src)>; - def : Pat<(X86sub_flag_nocf GR32:$src, -1), (INC32r GR32:$src)>; - def : Pat<(X86sub_flag_nocf GR64:$src, -1), (INC64r GR64:$src)>; -} - -// or reg/reg. -def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr GR8 :$src1, GR8 :$src2)>; -def : Pat<(or GR16:$src1, GR16:$src2), (OR16rr GR16:$src1, GR16:$src2)>; -def : Pat<(or GR32:$src1, GR32:$src2), (OR32rr GR32:$src1, GR32:$src2)>; -def : Pat<(or GR64:$src1, GR64:$src2), (OR64rr GR64:$src1, GR64:$src2)>; - -// or reg/mem -def : Pat<(or GR8:$src1, (loadi8 addr:$src2)), - (OR8rm GR8:$src1, addr:$src2)>; -def : Pat<(or GR16:$src1, (loadi16 addr:$src2)), - (OR16rm GR16:$src1, addr:$src2)>; -def : Pat<(or GR32:$src1, (loadi32 addr:$src2)), - (OR32rm GR32:$src1, addr:$src2)>; -def : Pat<(or GR64:$src1, (loadi64 addr:$src2)), - (OR64rm GR64:$src1, addr:$src2)>; - -// or reg/imm -def : Pat<(or GR8:$src1 , imm:$src2), (OR8ri GR8 :$src1, imm:$src2)>; -def : Pat<(or GR16:$src1, imm:$src2), (OR16ri GR16:$src1, imm:$src2)>; -def : Pat<(or GR32:$src1, imm:$src2), (OR32ri GR32:$src1, imm:$src2)>; -def : Pat<(or GR64:$src1, i64immSExt32:$src2), - (OR64ri32 GR64:$src1, i64immSExt32:$src2)>; - -// xor reg/reg -def : Pat<(xor GR8 :$src1, GR8 :$src2), (XOR8rr GR8 :$src1, GR8 :$src2)>; -def : Pat<(xor GR16:$src1, GR16:$src2), (XOR16rr GR16:$src1, GR16:$src2)>; -def : Pat<(xor GR32:$src1, GR32:$src2), (XOR32rr GR32:$src1, GR32:$src2)>; -def : Pat<(xor GR64:$src1, GR64:$src2), (XOR64rr GR64:$src1, GR64:$src2)>; - -// xor reg/mem -def : Pat<(xor GR8:$src1, (loadi8 addr:$src2)), - (XOR8rm GR8:$src1, addr:$src2)>; -def : Pat<(xor GR16:$src1, (loadi16 addr:$src2)), - (XOR16rm GR16:$src1, addr:$src2)>; -def : Pat<(xor GR32:$src1, (loadi32 addr:$src2)), - (XOR32rm GR32:$src1, addr:$src2)>; -def : Pat<(xor GR64:$src1, (loadi64 addr:$src2)), - (XOR64rm GR64:$src1, addr:$src2)>; - -// xor reg/imm -def : Pat<(xor GR8:$src1, imm:$src2), - (XOR8ri GR8:$src1, imm:$src2)>; -def : Pat<(xor GR16:$src1, imm:$src2), - (XOR16ri GR16:$src1, imm:$src2)>; -def : Pat<(xor GR32:$src1, imm:$src2), - (XOR32ri GR32:$src1, imm:$src2)>; -def : Pat<(xor GR64:$src1, i64immSExt32:$src2), - (XOR64ri32 GR64:$src1, i64immSExt32:$src2)>; - -// and reg/reg -def : Pat<(and GR8 :$src1, GR8 :$src2), (AND8rr GR8 :$src1, GR8 :$src2)>; -def : Pat<(and GR16:$src1, GR16:$src2), (AND16rr GR16:$src1, GR16:$src2)>; -def : Pat<(and GR32:$src1, GR32:$src2), (AND32rr GR32:$src1, GR32:$src2)>; -def : Pat<(and GR64:$src1, GR64:$src2), (AND64rr GR64:$src1, GR64:$src2)>; - -// and reg/mem -def : Pat<(and GR8:$src1, (loadi8 addr:$src2)), - (AND8rm GR8:$src1, addr:$src2)>; -def : Pat<(and GR16:$src1, (loadi16 addr:$src2)), - (AND16rm GR16:$src1, addr:$src2)>; -def : Pat<(and GR32:$src1, (loadi32 addr:$src2)), - (AND32rm GR32:$src1, addr:$src2)>; -def : Pat<(and GR64:$src1, (loadi64 addr:$src2)), - (AND64rm GR64:$src1, addr:$src2)>; - -// and reg/imm -def : Pat<(and GR8:$src1, imm:$src2), - (AND8ri GR8:$src1, imm:$src2)>; -def : Pat<(and GR16:$src1, imm:$src2), - (AND16ri GR16:$src1, imm:$src2)>; -def : Pat<(and GR32:$src1, imm:$src2), - (AND32ri GR32:$src1, imm:$src2)>; -def : Pat<(and GR64:$src1, i64immSExt32:$src2), - (AND64ri32 GR64:$src1, i64immSExt32:$src2)>; - // Bit scan instruction patterns to match explicit zero-undef behavior. def : Pat<(cttz_zero_undef GR16:$src), (BSF16rr GR16:$src)>; def : Pat<(cttz_zero_undef GR32:$src), (BSF32rr GR32:$src)>; diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrMisc.td b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrMisc.td index 97c625a64cfc..753cf62392a1 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86InstrMisc.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86InstrMisc.td @@ -1523,28 +1523,28 @@ def MOVDIR64B64_EVEX : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem_GR64:$ // ENQCMD/S - Enqueue 64-byte command as user with 64-byte write atomicity // let SchedRW = [WriteStore], Defs = [EFLAGS] in { - def ENQCMD16 : I<0xF8, MRMSrcMem, (outs), (ins GR16:$dst, i512mem:$src), + def ENQCMD16 : I<0xF8, MRMSrcMem, (outs), (ins GR16:$dst, i512mem_GR16:$src), "enqcmd\t{$src, $dst|$dst, $src}", [(set EFLAGS, (X86enqcmd GR16:$dst, addr:$src))]>, T8, XD, AdSize16, Requires<[HasENQCMD, Not64BitMode]>; - def ENQCMD32 : I<0xF8, MRMSrcMem, (outs), (ins GR32:$dst, i512mem:$src), + def ENQCMD32 : I<0xF8, MRMSrcMem, (outs), (ins GR32:$dst, i512mem_GR32:$src), "enqcmd\t{$src, $dst|$dst, $src}", [(set EFLAGS, (X86enqcmd GR32:$dst, addr:$src))]>, T8, XD, AdSize32, Requires<[HasENQCMD]>; - def ENQCMD64 : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem:$src), + def ENQCMD64 : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem_GR64:$src), "enqcmd\t{$src, $dst|$dst, $src}", [(set EFLAGS, (X86enqcmd GR64:$dst, addr:$src))]>, T8, XD, AdSize64, Requires<[HasENQCMD, In64BitMode]>; - def ENQCMDS16 : I<0xF8, MRMSrcMem, (outs), (ins GR16:$dst, i512mem:$src), + def ENQCMDS16 : I<0xF8, MRMSrcMem, (outs), (ins GR16:$dst, i512mem_GR16:$src), "enqcmds\t{$src, $dst|$dst, $src}", [(set EFLAGS, (X86enqcmds GR16:$dst, addr:$src))]>, T8, XS, AdSize16, Requires<[HasENQCMD, Not64BitMode]>; - def ENQCMDS32 : I<0xF8, MRMSrcMem, (outs), (ins GR32:$dst, i512mem:$src), + def ENQCMDS32 : I<0xF8, MRMSrcMem, (outs), (ins GR32:$dst, i512mem_GR32:$src), "enqcmds\t{$src, $dst|$dst, $src}", [(set EFLAGS, (X86enqcmds GR32:$dst, addr:$src))]>, T8, XS, AdSize32, Requires<[HasENQCMD]>; - def ENQCMDS64 : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem:$src), + def ENQCMDS64 : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem_GR64:$src), "enqcmds\t{$src, $dst|$dst, $src}", [(set EFLAGS, (X86enqcmds GR64:$dst, addr:$src))]>, T8, XS, AdSize64, Requires<[HasENQCMD, In64BitMode]>; diff --git a/contrib/llvm-project/llvm/lib/Target/X86/X86PfmCounters.td b/contrib/llvm-project/llvm/lib/Target/X86/X86PfmCounters.td index 49ef6efc6aec..48d689549709 100644 --- a/contrib/llvm-project/llvm/lib/Target/X86/X86PfmCounters.td +++ b/contrib/llvm-project/llvm/lib/Target/X86/X86PfmCounters.td @@ -18,6 +18,10 @@ def DefaultPfmCounters : ProcPfmCounters {} def : PfmCountersDefaultBinding<DefaultPfmCounters>; // Intel X86 Counters. +defvar DefaultIntelPfmValidationCounters = [ + PfmValidationCounter<InstructionRetired, "INSTRUCTIONS_RETIRED"> +]; + def PentiumPfmCounters : ProcPfmCounters { let CycleCounter = PfmCounter<"cpu_clk_unhalted">; let UopsCounter = PfmCounter<"uops_retired">; @@ -100,6 +104,7 @@ def SandyBridgePfmCounters : ProcPfmCounters { PfmIssueCounter<"SBPort4", "uops_dispatched_port:port_4">, PfmIssueCounter<"SBPort5", "uops_dispatched_port:port_5"> ]; + let ValidationCounters = DefaultIntelPfmValidationCounters; } def : PfmCountersBinding<"sandybridge", SandyBridgePfmCounters>; def : PfmCountersBinding<"ivybridge", SandyBridgePfmCounters>; @@ -117,6 +122,7 @@ def HaswellPfmCounters : ProcPfmCounters { PfmIssueCounter<"HWPort6", "uops_executed_port:port_6">, PfmIssueCounter<"HWPort7", "uops_executed_port:port_7"> ]; + let ValidationCounters = DefaultIntelPfmValidationCounters; } def : PfmCountersBinding<"haswell", HaswellPfmCounters>; @@ -133,6 +139,7 @@ def BroadwellPfmCounters : ProcPfmCounters { PfmIssueCounter<"BWPort6", "uops_executed_port:port_6">, PfmIssueCounter<"BWPort7", "uops_executed_port:port_7"> ]; + let ValidationCounters = DefaultIntelPfmValidationCounters; } def : PfmCountersBinding<"broadwell", BroadwellPfmCounters>; @@ -149,6 +156,7 @@ def SkylakeClientPfmCounters : ProcPfmCounters { PfmIssueCounter<"SKLPort6", "uops_dispatched_port:port_6">, PfmIssueCounter<"SKLPort7", "uops_dispatched_port:port_7"> ]; + let ValidationCounters = DefaultIntelPfmValidationCounters; } def : PfmCountersBinding<"skylake", SkylakeClientPfmCounters>; @@ -165,6 +173,7 @@ def SkylakeServerPfmCounters : ProcPfmCounters { PfmIssueCounter<"SKXPort6", "uops_dispatched_port:port_6">, PfmIssueCounter<"SKXPort7", "uops_dispatched_port:port_7"> ]; + let ValidationCounters = DefaultIntelPfmValidationCounters; } def : PfmCountersBinding<"skylake-avx512", SkylakeServerPfmCounters>; def : PfmCountersBinding<"cascadelake", SkylakeServerPfmCounters>; @@ -182,6 +191,7 @@ def IceLakePfmCounters : ProcPfmCounters { PfmIssueCounter<"ICXPort6", "uops_dispatched_port:port_6">, PfmIssueCounter<"ICXPort78", "uops_dispatched_port:port_7_8"> ]; + let ValidationCounters = DefaultIntelPfmValidationCounters; } def : PfmCountersBinding<"icelake-client", IceLakePfmCounters>; def : PfmCountersBinding<"icelake-server", IceLakePfmCounters>; @@ -189,6 +199,10 @@ def : PfmCountersBinding<"rocketlake", IceLakePfmCounters>; def : PfmCountersBinding<"tigerlake", IceLakePfmCounters>; // AMD X86 Counters. +defvar DefaultAMDPfmValidationCounters = [ + PfmValidationCounter<InstructionRetired, "RETIRED_INSTRUCTIONS"> +]; + // Set basic counters for AMD cpus that we know libpfm4 supports. def DefaultAMDPfmCounters : ProcPfmCounters { let CycleCounter = PfmCounter<"cpu_clk_unhalted">; @@ -265,6 +279,7 @@ def ZnVer1PfmCounters : ProcPfmCounters { PfmIssueCounter<"ZnAGU", "ls_dispatch:ld_st_dispatch + ls_dispatch:ld_dispatch + ls_dispatch:store_dispatch">, PfmIssueCounter<"ZnDivider", "div_op_count"> ]; + let ValidationCounters = DefaultAMDPfmValidationCounters; } def : PfmCountersBinding<"znver1", ZnVer1PfmCounters>; @@ -275,6 +290,7 @@ def ZnVer2PfmCounters : ProcPfmCounters { PfmIssueCounter<"Zn2AGU", "ls_dispatch:ld_st_dispatch + ls_dispatch:ld_dispatch + ls_dispatch:store_dispatch">, PfmIssueCounter<"Zn2Divider", "div_op_count"> ]; + let ValidationCounters = DefaultAMDPfmValidationCounters; } def : PfmCountersBinding<"znver2", ZnVer2PfmCounters>; @@ -288,6 +304,7 @@ def ZnVer3PfmCounters : ProcPfmCounters { PfmIssueCounter<"Zn3Store", "ls_dispatch:store_dispatch">, PfmIssueCounter<"Zn3Divider", "div_op_count"> ]; + let ValidationCounters = DefaultAMDPfmValidationCounters; } def : PfmCountersBinding<"znver3", ZnVer3PfmCounters>; @@ -302,5 +319,6 @@ def ZnVer4PfmCounters : ProcPfmCounters { PfmIssueCounter<"Zn4Divider", "div_op_count">, PfmIssueCounter<"Zn4AGU", "ls_dispatch:ld_st_dispatch + ls_dispatch:ld_dispatch + ls_dispatch:store_dispatch"> ]; + let ValidationCounters = DefaultAMDPfmValidationCounters; } def : PfmCountersBinding<"znver4", ZnVer4PfmCounters>; |
