diff options
Diffstat (limited to 'llvm/lib/Target/AArch64')
11 files changed, 137 insertions, 73 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index 90e1ce9ddf66..7d2ff146a340 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -256,6 +256,11 @@ void AArch64AsmPrinter::emitStartOfAsmFile(Module &M) { if (BTE->getZExtValue()) Flags |= ELF::GNU_PROPERTY_AARCH64_FEATURE_1_BTI; + if (const auto *GCS = mdconst::extract_or_null<ConstantInt>( + M.getModuleFlag("guarded-control-stack"))) + if (GCS->getZExtValue()) + Flags |= ELF::GNU_PROPERTY_AARCH64_FEATURE_1_GCS; + if (const auto *Sign = mdconst::extract_or_null<ConstantInt>( M.getModuleFlag("sign-return-address"))) if (Sign->getZExtValue()) diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index edc8cc7d4d1e..ea5679b4d5e3 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -6834,10 +6834,10 @@ static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) { return getPackedVectorTypeFromPredicateType( Ctx, Root->getOperand(6)->getValueType(0), /*NumVec=*/4); case Intrinsic::aarch64_sve_ld1udq: - case Intrinsic::aarch64_sve_st1udq: + case Intrinsic::aarch64_sve_st1dq: return EVT(MVT::nxv1i64); case Intrinsic::aarch64_sve_ld1uwq: - case Intrinsic::aarch64_sve_st1uwq: + case Intrinsic::aarch64_sve_st1wq: return EVT(MVT::nxv1i32); } } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 47e665176e8b..e2d07a096496 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4513,8 +4513,7 @@ static SDValue skipExtensionForVectorMULL(SDValue N, SelectionDAG &DAG) { SDLoc dl(N); SmallVector<SDValue, 8> Ops; for (unsigned i = 0; i != NumElts; ++i) { - ConstantSDNode *C = cast<ConstantSDNode>(N.getOperand(i)); - const APInt &CInt = C->getAPIntValue(); + const APInt &CInt = N.getConstantOperandAPInt(i); // Element types smaller than 32 bits are not legal, so use i32 elements. // The values are implicitly truncated so sext vs. zext doesn't matter. Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32)); diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 1cfbf4737a6f..42b7a6418032 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -4214,6 +4214,9 @@ static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) { switch (FirstOpc) { default: return false; + case AArch64::LDRQui: + case AArch64::LDURQi: + return SecondOpc == AArch64::LDRQui || SecondOpc == AArch64::LDURQi; case AArch64::LDRWui: case AArch64::LDURWi: return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi; diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index b435b3ce03e7..e90b8a8ca7ac 100644 --- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -1326,10 +1326,14 @@ static int alignTo(int Num, int PowOf2) { static bool mayAlias(MachineInstr &MIa, SmallVectorImpl<MachineInstr *> &MemInsns, AliasAnalysis *AA) { - for (MachineInstr *MIb : MemInsns) - if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false)) + for (MachineInstr *MIb : MemInsns) { + if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false)) { + LLVM_DEBUG(dbgs() << "Aliasing with: "; MIb->dump()); return true; + } + } + LLVM_DEBUG(dbgs() << "No aliases found\n"); return false; } @@ -1757,9 +1761,11 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // Remember any instructions that read/write memory between FirstMI and MI. SmallVector<MachineInstr *, 4> MemInsns; + LLVM_DEBUG(dbgs() << "Find match for: "; FirstMI.dump()); for (unsigned Count = 0; MBBI != E && Count < Limit; MBBI = next_nodbg(MBBI, E)) { MachineInstr &MI = *MBBI; + LLVM_DEBUG(dbgs() << "Analysing 2nd insn: "; MI.dump()); UsedInBetween.accumulate(MI); @@ -1859,6 +1865,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); MemInsns.push_back(&MI); + LLVM_DEBUG(dbgs() << "Offset doesn't fit in immediate, " + << "keep looking.\n"); continue; } // If the alignment requirements of the paired (scaled) instruction @@ -1868,6 +1876,9 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); MemInsns.push_back(&MI); + LLVM_DEBUG(dbgs() + << "Offset doesn't fit due to alignment requirements, " + << "keep looking.\n"); continue; } } @@ -1884,14 +1895,22 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, const bool SameLoadReg = MayLoad && TRI->isSuperOrSubRegisterEq( Reg, getLdStRegOp(MI).getReg()); - // If the Rt of the second instruction was not modified or used between - // the two instructions and none of the instructions between the second - // and first alias with the second, we can combine the second into the - // first. - if (ModifiedRegUnits.available(getLdStRegOp(MI).getReg()) && - !(MI.mayLoad() && !SameLoadReg && - !UsedRegUnits.available(getLdStRegOp(MI).getReg())) && - !mayAlias(MI, MemInsns, AA)) { + // If the Rt of the second instruction (destination register of the + // load) was not modified or used between the two instructions and none + // of the instructions between the second and first alias with the + // second, we can combine the second into the first. + bool RtNotModified = + ModifiedRegUnits.available(getLdStRegOp(MI).getReg()); + bool RtNotUsed = !(MI.mayLoad() && !SameLoadReg && + !UsedRegUnits.available(getLdStRegOp(MI).getReg())); + + LLVM_DEBUG(dbgs() << "Checking, can combine 2nd into 1st insn:\n" + << "Reg '" << getLdStRegOp(MI) << "' not modified: " + << (RtNotModified ? "true" : "false") << "\n" + << "Reg '" << getLdStRegOp(MI) << "' not used: " + << (RtNotUsed ? "true" : "false") << "\n"); + + if (RtNotModified && RtNotUsed && !mayAlias(MI, MemInsns, AA)) { // For pairs loading into the same reg, try to find a renaming // opportunity to allow the renaming of Reg between FirstMI and MI // and combine MI into FirstMI; otherwise bail and keep looking. @@ -1904,6 +1923,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); MemInsns.push_back(&MI); + LLVM_DEBUG(dbgs() << "Can't find reg for renaming, " + << "keep looking.\n"); continue; } Flags.setRenameReg(*RenameReg); @@ -1919,10 +1940,15 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // between the two instructions and none of the instructions between the // first and the second alias with the first, we can combine the first // into the second. - if (!(MayLoad && - !UsedRegUnits.available(getLdStRegOp(FirstMI).getReg())) && - !mayAlias(FirstMI, MemInsns, AA)) { + RtNotModified = !( + MayLoad && !UsedRegUnits.available(getLdStRegOp(FirstMI).getReg())); + + LLVM_DEBUG(dbgs() << "Checking, can combine 1st into 2nd insn:\n" + << "Reg '" << getLdStRegOp(FirstMI) + << "' not modified: " + << (RtNotModified ? "true" : "false") << "\n"); + if (RtNotModified && !mayAlias(FirstMI, MemInsns, AA)) { if (ModifiedRegUnits.available(getLdStRegOp(FirstMI).getReg())) { Flags.setMergeForward(true); Flags.clearRenameReg(); @@ -1938,8 +1964,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, MBBIWithRenameReg = MBBI; } } - // Unable to combine these instructions due to interference in between. - // Keep looking. + LLVM_DEBUG(dbgs() << "Unable to combine these instructions due to " + << "interference in between, keep looking.\n"); } } @@ -1948,16 +1974,20 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // If the instruction wasn't a matching load or store. Stop searching if we // encounter a call instruction that might modify memory. - if (MI.isCall()) + if (MI.isCall()) { + LLVM_DEBUG(dbgs() << "Found a call, stop looking.\n"); return E; + } // Update modified / uses register units. LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); // Otherwise, if the base register is modified, we have no match, so // return early. - if (!ModifiedRegUnits.available(BaseReg)) + if (!ModifiedRegUnits.available(BaseReg)) { + LLVM_DEBUG(dbgs() << "Base reg is modified, stop looking.\n"); return E; + } // Update list of instructions that read/write memory. if (MI.mayLoadOrStore()) diff --git a/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp b/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp index 6fcd9c290e9c..6c6cd120b035 100644 --- a/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp +++ b/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp @@ -53,7 +53,7 @@ using namespace PatternMatch; #define DEBUG_TYPE "aarch64-loop-idiom-transform" static cl::opt<bool> - DisableAll("disable-aarch64-lit-all", cl::Hidden, cl::init(true), + DisableAll("disable-aarch64-lit-all", cl::Hidden, cl::init(false), cl::desc("Disable AArch64 Loop Idiom Transform Pass.")); static cl::opt<bool> DisableByteCmp( diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index ee10a7d1c706..4782ad076c60 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1397,17 +1397,17 @@ let Predicates = [HasSVEorSME] in { (RegImmInst Z_q:$Zt, PPR3bAny:$Pg, GPR64sp:$base, (i64 0))>; } - // ld1quw/st1quw + // ld1quw/st1qw defm : sve_ld1q_pat<nxv4i32, nxv1i1, int_aarch64_sve_ld1uwq, LD1W_Q, LD1W_Q_IMM, am_sve_regreg_lsl2>; defm : sve_ld1q_pat<nxv4f32, nxv1i1, int_aarch64_sve_ld1uwq, LD1W_Q, LD1W_Q_IMM, am_sve_regreg_lsl2>; - defm : sve_st1q_pat<nxv4i32, nxv1i1, int_aarch64_sve_st1uwq, ST1W_Q, ST1W_Q_IMM, am_sve_regreg_lsl2>; - defm : sve_st1q_pat<nxv4f32, nxv1i1, int_aarch64_sve_st1uwq, ST1W_Q, ST1W_Q_IMM, am_sve_regreg_lsl2>; + defm : sve_st1q_pat<nxv4i32, nxv1i1, int_aarch64_sve_st1wq, ST1W_Q, ST1W_Q_IMM, am_sve_regreg_lsl2>; + defm : sve_st1q_pat<nxv4f32, nxv1i1, int_aarch64_sve_st1wq, ST1W_Q, ST1W_Q_IMM, am_sve_regreg_lsl2>; - // ld1qud/st1qud + // ld1qud/st1qd defm : sve_ld1q_pat<nxv2i64, nxv1i1, int_aarch64_sve_ld1udq, LD1D_Q, LD1D_Q_IMM, am_sve_regreg_lsl3>; defm : sve_ld1q_pat<nxv2f64, nxv1i1, int_aarch64_sve_ld1udq, LD1D_Q, LD1D_Q_IMM, am_sve_regreg_lsl3>; - defm : sve_st1q_pat<nxv2i64, nxv1i1, int_aarch64_sve_st1udq, ST1D_Q, ST1D_Q_IMM, am_sve_regreg_lsl3>; - defm : sve_st1q_pat<nxv2f64, nxv1i1, int_aarch64_sve_st1udq, ST1D_Q, ST1D_Q_IMM, am_sve_regreg_lsl3>; + defm : sve_st1q_pat<nxv2i64, nxv1i1, int_aarch64_sve_st1dq, ST1D_Q, ST1D_Q_IMM, am_sve_regreg_lsl3>; + defm : sve_st1q_pat<nxv2f64, nxv1i1, int_aarch64_sve_st1dq, ST1D_Q, ST1D_Q_IMM, am_sve_regreg_lsl3>; } // End HasSVEorSME @@ -4006,7 +4006,9 @@ defm WHILEHS_CXX : sve2p1_int_while_rr_pn<"whilehs", 0b100>; defm WHILEHI_CXX : sve2p1_int_while_rr_pn<"whilehi", 0b101>; defm WHILELO_CXX : sve2p1_int_while_rr_pn<"whilelo", 0b110>; defm WHILELS_CXX : sve2p1_int_while_rr_pn<"whilels", 0b111>; +} // End HasSVE2p1_or_HasSME2 +let Predicates = [HasSVEorSME] in { // Aliases for existing SVE instructions for which predicate-as-counter are // accepted as an operand to the instruction @@ -4025,7 +4027,7 @@ def : InstAlias<"mov $Pd, $Pn", def : InstAlias<"pfalse\t$Pd", (PFALSE PNRasPPR8:$Pd), 0>; -} // End HasSVE2p1_or_HasSME2 +} //===----------------------------------------------------------------------===// // Non-widening BFloat16 to BFloat16 instructions @@ -4095,7 +4097,7 @@ defm FMAXQV : sve2p1_fp_reduction_q<0b110, "fmaxqv", int_aarch64_sve_fmaxqv>; defm FMINQV : sve2p1_fp_reduction_q<0b111, "fminqv", int_aarch64_sve_fminqv>; defm DUPQ_ZZI : sve2p1_dupq<"dupq">; -defm EXTQ_ZZI : sve2p1_extq<"extq", int_aarch64_sve_extq_lane>; +defm EXTQ_ZZI : sve2p1_extq<"extq", int_aarch64_sve_extq>; defm PMOV_PZI : sve2p1_vector_to_pred<"pmov", int_aarch64_sve_pmov_to_pred_lane, int_aarch64_sve_pmov_to_pred_lane_zero>; defm PMOV_ZIP : sve2p1_pred_to_vector<"pmov", int_aarch64_sve_pmov_to_vector_lane_merging, int_aarch64_sve_pmov_to_vector_lane_zeroing>; diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index b5b8b6829178..13b5e578391d 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1406,9 +1406,23 @@ static std::optional<Instruction *> instCombineSVEAllActive(IntrinsicInst &II, return &II; } +// Simplify operations where predicate has all inactive lanes or try to replace +// with _u form when all lanes are active +static std::optional<Instruction *> +instCombineSVEAllOrNoActive(InstCombiner &IC, IntrinsicInst &II, + Intrinsic::ID IID) { + if (match(II.getOperand(0), m_ZeroInt())) { + // llvm_ir, pred(0), op1, op2 - Spec says to return op1 when all lanes are + // inactive for sv[func]_m + return IC.replaceInstUsesWith(II, II.getOperand(1)); + } + return instCombineSVEAllActive(II, IID); +} + static std::optional<Instruction *> instCombineSVEVectorAdd(InstCombiner &IC, IntrinsicInst &II) { - if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_add_u)) + if (auto II_U = + instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_add_u)) return II_U; if (auto MLA = instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul, Intrinsic::aarch64_sve_mla>( @@ -1423,7 +1437,8 @@ static std::optional<Instruction *> instCombineSVEVectorAdd(InstCombiner &IC, static std::optional<Instruction *> instCombineSVEVectorFAdd(InstCombiner &IC, IntrinsicInst &II) { - if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fadd_u)) + if (auto II_U = + instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fadd_u)) return II_U; if (auto FMLA = instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul, @@ -1465,7 +1480,8 @@ instCombineSVEVectorFAddU(InstCombiner &IC, IntrinsicInst &II) { static std::optional<Instruction *> instCombineSVEVectorFSub(InstCombiner &IC, IntrinsicInst &II) { - if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fsub_u)) + if (auto II_U = + instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fsub_u)) return II_U; if (auto FMLS = instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul, @@ -1507,7 +1523,8 @@ instCombineSVEVectorFSubU(InstCombiner &IC, IntrinsicInst &II) { static std::optional<Instruction *> instCombineSVEVectorSub(InstCombiner &IC, IntrinsicInst &II) { - if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_sub_u)) + if (auto II_U = + instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_sub_u)) return II_U; if (auto MLS = instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul, Intrinsic::aarch64_sve_mls>( @@ -1523,11 +1540,6 @@ static std::optional<Instruction *> instCombineSVEVectorMul(InstCombiner &IC, auto *OpMultiplicand = II.getOperand(1); auto *OpMultiplier = II.getOperand(2); - // Canonicalise a non _u intrinsic only. - if (II.getIntrinsicID() != IID) - if (auto II_U = instCombineSVEAllActive(II, IID)) - return II_U; - // Return true if a given instruction is a unit splat value, false otherwise. auto IsUnitSplat = [](auto *I) { auto *SplatValue = getSplatValue(I); @@ -1891,34 +1903,38 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, case Intrinsic::aarch64_sve_ptest_last: return instCombineSVEPTest(IC, II); case Intrinsic::aarch64_sve_fabd: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fabd_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fabd_u); case Intrinsic::aarch64_sve_fadd: return instCombineSVEVectorFAdd(IC, II); case Intrinsic::aarch64_sve_fadd_u: return instCombineSVEVectorFAddU(IC, II); case Intrinsic::aarch64_sve_fdiv: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fdiv_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fdiv_u); case Intrinsic::aarch64_sve_fmax: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmax_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmax_u); case Intrinsic::aarch64_sve_fmaxnm: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmaxnm_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmaxnm_u); case Intrinsic::aarch64_sve_fmin: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmin_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmin_u); case Intrinsic::aarch64_sve_fminnm: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fminnm_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fminnm_u); case Intrinsic::aarch64_sve_fmla: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmla_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmla_u); case Intrinsic::aarch64_sve_fmls: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmls_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmls_u); case Intrinsic::aarch64_sve_fmul: + if (auto II_U = + instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmul_u)) + return II_U; + return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_fmul_u); case Intrinsic::aarch64_sve_fmul_u: return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_fmul_u); case Intrinsic::aarch64_sve_fmulx: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmulx_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmulx_u); case Intrinsic::aarch64_sve_fnmla: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fnmla_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fnmla_u); case Intrinsic::aarch64_sve_fnmls: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fnmls_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fnmls_u); case Intrinsic::aarch64_sve_fsub: return instCombineSVEVectorFSub(IC, II); case Intrinsic::aarch64_sve_fsub_u: @@ -1930,20 +1946,24 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, Intrinsic::aarch64_sve_mla_u>( IC, II, true); case Intrinsic::aarch64_sve_mla: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_mla_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_mla_u); case Intrinsic::aarch64_sve_mls: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_mls_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_mls_u); case Intrinsic::aarch64_sve_mul: + if (auto II_U = + instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_mul_u)) + return II_U; + return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_mul_u); case Intrinsic::aarch64_sve_mul_u: return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_mul_u); case Intrinsic::aarch64_sve_sabd: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_sabd_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_sabd_u); case Intrinsic::aarch64_sve_smax: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_smax_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_smax_u); case Intrinsic::aarch64_sve_smin: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_smin_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_smin_u); case Intrinsic::aarch64_sve_smulh: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_smulh_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_smulh_u); case Intrinsic::aarch64_sve_sub: return instCombineSVEVectorSub(IC, II); case Intrinsic::aarch64_sve_sub_u: @@ -1951,31 +1971,31 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, Intrinsic::aarch64_sve_mls_u>( IC, II, true); case Intrinsic::aarch64_sve_uabd: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_uabd_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_uabd_u); case Intrinsic::aarch64_sve_umax: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_umax_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_umax_u); case Intrinsic::aarch64_sve_umin: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_umin_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_umin_u); case Intrinsic::aarch64_sve_umulh: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_umulh_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_umulh_u); case Intrinsic::aarch64_sve_asr: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_asr_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_asr_u); case Intrinsic::aarch64_sve_lsl: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_lsl_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_lsl_u); case Intrinsic::aarch64_sve_lsr: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_lsr_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_lsr_u); case Intrinsic::aarch64_sve_and: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_and_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_and_u); case Intrinsic::aarch64_sve_bic: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_bic_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_bic_u); case Intrinsic::aarch64_sve_eor: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_eor_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_eor_u); case Intrinsic::aarch64_sve_orr: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_orr_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_orr_u); case Intrinsic::aarch64_sve_sqsub: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_sqsub_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_sqsub_u); case Intrinsic::aarch64_sve_uqsub: - return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_uqsub_u); + return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_uqsub_u); case Intrinsic::aarch64_sve_tbl: return instCombineSVETBL(IC, II); case Intrinsic::aarch64_sve_uunpkhi: diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index b657a0954d78..302116447efc 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -1166,7 +1166,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) getActionDefinitionsBuilder(G_FMAD).lower(); // Access to floating-point environment. - getActionDefinitionsBuilder({G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE}) + getActionDefinitionsBuilder({G_GET_FPENV, G_SET_FPENV, G_RESET_FPENV, + G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE}) .libcall(); getActionDefinitionsBuilder(G_IS_FPCLASS).lower(); diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp index 496ab18e9b19..6e074b6a63c4 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp @@ -120,7 +120,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, assert((!Target.getSymA() || Target.getSymA()->getKind() == MCSymbolRefExpr::VK_None || - Target.getSymA()->getKind() == MCSymbolRefExpr::VK_PLT) && + Target.getSymA()->getKind() == MCSymbolRefExpr::VK_PLT || + Target.getSymA()->getKind() == MCSymbolRefExpr::VK_GOTPCREL) && "Should only be expression-level modifiers here"); assert((!Target.getSymB() || @@ -206,7 +207,10 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx, case FK_Data_2: return R_CLS(ABS16); case FK_Data_4: - return R_CLS(ABS32); + return (!IsILP32 && + Target.getAccessVariant() == MCSymbolRefExpr::VK_GOTPCREL) + ? ELF::R_AARCH64_GOTPCREL32 + : R_CLS(ABS32); case FK_Data_8: if (IsILP32) { Ctx.reportError(Fixup.getLoc(), diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td index 70f3c2c99f0f..44d9a8ac7cb6 100644 --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -1268,7 +1268,7 @@ multiclass sve2_int_perm_revd<string asm, SDPatternOperator op> { } class sve2_clamp<string asm, bits<2> sz, bit U, ZPRRegOp zpr_ty> - : I<(outs zpr_ty:$Zd), (ins zpr_ty:$Zn, zpr_ty:$Zm, zpr_ty:$_Zd), + : I<(outs zpr_ty:$Zd), (ins zpr_ty:$_Zd, zpr_ty:$Zn, zpr_ty:$Zm), asm, "\t$Zd, $Zn, $Zm", "", []>, Sched<[]> { bits<5> Zm; |
