aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AArch64
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AArch64')
-rw-r--r--llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp5
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp4
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp3
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.cpp3
-rw-r--r--llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp64
-rw-r--r--llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td18
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp98
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp3
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp8
-rw-r--r--llvm/lib/Target/AArch64/SMEInstrFormats.td2
11 files changed, 137 insertions, 73 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 90e1ce9ddf66..7d2ff146a340 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -256,6 +256,11 @@ void AArch64AsmPrinter::emitStartOfAsmFile(Module &M) {
if (BTE->getZExtValue())
Flags |= ELF::GNU_PROPERTY_AARCH64_FEATURE_1_BTI;
+ if (const auto *GCS = mdconst::extract_or_null<ConstantInt>(
+ M.getModuleFlag("guarded-control-stack")))
+ if (GCS->getZExtValue())
+ Flags |= ELF::GNU_PROPERTY_AARCH64_FEATURE_1_GCS;
+
if (const auto *Sign = mdconst::extract_or_null<ConstantInt>(
M.getModuleFlag("sign-return-address")))
if (Sign->getZExtValue())
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index edc8cc7d4d1e..ea5679b4d5e3 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -6834,10 +6834,10 @@ static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) {
return getPackedVectorTypeFromPredicateType(
Ctx, Root->getOperand(6)->getValueType(0), /*NumVec=*/4);
case Intrinsic::aarch64_sve_ld1udq:
- case Intrinsic::aarch64_sve_st1udq:
+ case Intrinsic::aarch64_sve_st1dq:
return EVT(MVT::nxv1i64);
case Intrinsic::aarch64_sve_ld1uwq:
- case Intrinsic::aarch64_sve_st1uwq:
+ case Intrinsic::aarch64_sve_st1wq:
return EVT(MVT::nxv1i32);
}
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 47e665176e8b..e2d07a096496 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4513,8 +4513,7 @@ static SDValue skipExtensionForVectorMULL(SDValue N, SelectionDAG &DAG) {
SDLoc dl(N);
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i != NumElts; ++i) {
- ConstantSDNode *C = cast<ConstantSDNode>(N.getOperand(i));
- const APInt &CInt = C->getAPIntValue();
+ const APInt &CInt = N.getConstantOperandAPInt(i);
// Element types smaller than 32 bits are not legal, so use i32 elements.
// The values are implicitly truncated so sext vs. zext doesn't matter.
Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 1cfbf4737a6f..42b7a6418032 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -4214,6 +4214,9 @@ static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
switch (FirstOpc) {
default:
return false;
+ case AArch64::LDRQui:
+ case AArch64::LDURQi:
+ return SecondOpc == AArch64::LDRQui || SecondOpc == AArch64::LDURQi;
case AArch64::LDRWui:
case AArch64::LDURWi:
return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index b435b3ce03e7..e90b8a8ca7ac 100644
--- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -1326,10 +1326,14 @@ static int alignTo(int Num, int PowOf2) {
static bool mayAlias(MachineInstr &MIa,
SmallVectorImpl<MachineInstr *> &MemInsns,
AliasAnalysis *AA) {
- for (MachineInstr *MIb : MemInsns)
- if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false))
+ for (MachineInstr *MIb : MemInsns) {
+ if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false)) {
+ LLVM_DEBUG(dbgs() << "Aliasing with: "; MIb->dump());
return true;
+ }
+ }
+ LLVM_DEBUG(dbgs() << "No aliases found\n");
return false;
}
@@ -1757,9 +1761,11 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// Remember any instructions that read/write memory between FirstMI and MI.
SmallVector<MachineInstr *, 4> MemInsns;
+ LLVM_DEBUG(dbgs() << "Find match for: "; FirstMI.dump());
for (unsigned Count = 0; MBBI != E && Count < Limit;
MBBI = next_nodbg(MBBI, E)) {
MachineInstr &MI = *MBBI;
+ LLVM_DEBUG(dbgs() << "Analysing 2nd insn: "; MI.dump());
UsedInBetween.accumulate(MI);
@@ -1859,6 +1865,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
UsedRegUnits, TRI);
MemInsns.push_back(&MI);
+ LLVM_DEBUG(dbgs() << "Offset doesn't fit in immediate, "
+ << "keep looking.\n");
continue;
}
// If the alignment requirements of the paired (scaled) instruction
@@ -1868,6 +1876,9 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
UsedRegUnits, TRI);
MemInsns.push_back(&MI);
+ LLVM_DEBUG(dbgs()
+ << "Offset doesn't fit due to alignment requirements, "
+ << "keep looking.\n");
continue;
}
}
@@ -1884,14 +1895,22 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
const bool SameLoadReg = MayLoad && TRI->isSuperOrSubRegisterEq(
Reg, getLdStRegOp(MI).getReg());
- // If the Rt of the second instruction was not modified or used between
- // the two instructions and none of the instructions between the second
- // and first alias with the second, we can combine the second into the
- // first.
- if (ModifiedRegUnits.available(getLdStRegOp(MI).getReg()) &&
- !(MI.mayLoad() && !SameLoadReg &&
- !UsedRegUnits.available(getLdStRegOp(MI).getReg())) &&
- !mayAlias(MI, MemInsns, AA)) {
+ // If the Rt of the second instruction (destination register of the
+ // load) was not modified or used between the two instructions and none
+ // of the instructions between the second and first alias with the
+ // second, we can combine the second into the first.
+ bool RtNotModified =
+ ModifiedRegUnits.available(getLdStRegOp(MI).getReg());
+ bool RtNotUsed = !(MI.mayLoad() && !SameLoadReg &&
+ !UsedRegUnits.available(getLdStRegOp(MI).getReg()));
+
+ LLVM_DEBUG(dbgs() << "Checking, can combine 2nd into 1st insn:\n"
+ << "Reg '" << getLdStRegOp(MI) << "' not modified: "
+ << (RtNotModified ? "true" : "false") << "\n"
+ << "Reg '" << getLdStRegOp(MI) << "' not used: "
+ << (RtNotUsed ? "true" : "false") << "\n");
+
+ if (RtNotModified && RtNotUsed && !mayAlias(MI, MemInsns, AA)) {
// For pairs loading into the same reg, try to find a renaming
// opportunity to allow the renaming of Reg between FirstMI and MI
// and combine MI into FirstMI; otherwise bail and keep looking.
@@ -1904,6 +1923,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
UsedRegUnits, TRI);
MemInsns.push_back(&MI);
+ LLVM_DEBUG(dbgs() << "Can't find reg for renaming, "
+ << "keep looking.\n");
continue;
}
Flags.setRenameReg(*RenameReg);
@@ -1919,10 +1940,15 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// between the two instructions and none of the instructions between the
// first and the second alias with the first, we can combine the first
// into the second.
- if (!(MayLoad &&
- !UsedRegUnits.available(getLdStRegOp(FirstMI).getReg())) &&
- !mayAlias(FirstMI, MemInsns, AA)) {
+ RtNotModified = !(
+ MayLoad && !UsedRegUnits.available(getLdStRegOp(FirstMI).getReg()));
+
+ LLVM_DEBUG(dbgs() << "Checking, can combine 1st into 2nd insn:\n"
+ << "Reg '" << getLdStRegOp(FirstMI)
+ << "' not modified: "
+ << (RtNotModified ? "true" : "false") << "\n");
+ if (RtNotModified && !mayAlias(FirstMI, MemInsns, AA)) {
if (ModifiedRegUnits.available(getLdStRegOp(FirstMI).getReg())) {
Flags.setMergeForward(true);
Flags.clearRenameReg();
@@ -1938,8 +1964,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
MBBIWithRenameReg = MBBI;
}
}
- // Unable to combine these instructions due to interference in between.
- // Keep looking.
+ LLVM_DEBUG(dbgs() << "Unable to combine these instructions due to "
+ << "interference in between, keep looking.\n");
}
}
@@ -1948,16 +1974,20 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// If the instruction wasn't a matching load or store. Stop searching if we
// encounter a call instruction that might modify memory.
- if (MI.isCall())
+ if (MI.isCall()) {
+ LLVM_DEBUG(dbgs() << "Found a call, stop looking.\n");
return E;
+ }
// Update modified / uses register units.
LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
// Otherwise, if the base register is modified, we have no match, so
// return early.
- if (!ModifiedRegUnits.available(BaseReg))
+ if (!ModifiedRegUnits.available(BaseReg)) {
+ LLVM_DEBUG(dbgs() << "Base reg is modified, stop looking.\n");
return E;
+ }
// Update list of instructions that read/write memory.
if (MI.mayLoadOrStore())
diff --git a/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp b/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp
index 6fcd9c290e9c..6c6cd120b035 100644
--- a/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp
@@ -53,7 +53,7 @@ using namespace PatternMatch;
#define DEBUG_TYPE "aarch64-loop-idiom-transform"
static cl::opt<bool>
- DisableAll("disable-aarch64-lit-all", cl::Hidden, cl::init(true),
+ DisableAll("disable-aarch64-lit-all", cl::Hidden, cl::init(false),
cl::desc("Disable AArch64 Loop Idiom Transform Pass."));
static cl::opt<bool> DisableByteCmp(
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index ee10a7d1c706..4782ad076c60 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1397,17 +1397,17 @@ let Predicates = [HasSVEorSME] in {
(RegImmInst Z_q:$Zt, PPR3bAny:$Pg, GPR64sp:$base, (i64 0))>;
}
- // ld1quw/st1quw
+ // ld1quw/st1qw
defm : sve_ld1q_pat<nxv4i32, nxv1i1, int_aarch64_sve_ld1uwq, LD1W_Q, LD1W_Q_IMM, am_sve_regreg_lsl2>;
defm : sve_ld1q_pat<nxv4f32, nxv1i1, int_aarch64_sve_ld1uwq, LD1W_Q, LD1W_Q_IMM, am_sve_regreg_lsl2>;
- defm : sve_st1q_pat<nxv4i32, nxv1i1, int_aarch64_sve_st1uwq, ST1W_Q, ST1W_Q_IMM, am_sve_regreg_lsl2>;
- defm : sve_st1q_pat<nxv4f32, nxv1i1, int_aarch64_sve_st1uwq, ST1W_Q, ST1W_Q_IMM, am_sve_regreg_lsl2>;
+ defm : sve_st1q_pat<nxv4i32, nxv1i1, int_aarch64_sve_st1wq, ST1W_Q, ST1W_Q_IMM, am_sve_regreg_lsl2>;
+ defm : sve_st1q_pat<nxv4f32, nxv1i1, int_aarch64_sve_st1wq, ST1W_Q, ST1W_Q_IMM, am_sve_regreg_lsl2>;
- // ld1qud/st1qud
+ // ld1qud/st1qd
defm : sve_ld1q_pat<nxv2i64, nxv1i1, int_aarch64_sve_ld1udq, LD1D_Q, LD1D_Q_IMM, am_sve_regreg_lsl3>;
defm : sve_ld1q_pat<nxv2f64, nxv1i1, int_aarch64_sve_ld1udq, LD1D_Q, LD1D_Q_IMM, am_sve_regreg_lsl3>;
- defm : sve_st1q_pat<nxv2i64, nxv1i1, int_aarch64_sve_st1udq, ST1D_Q, ST1D_Q_IMM, am_sve_regreg_lsl3>;
- defm : sve_st1q_pat<nxv2f64, nxv1i1, int_aarch64_sve_st1udq, ST1D_Q, ST1D_Q_IMM, am_sve_regreg_lsl3>;
+ defm : sve_st1q_pat<nxv2i64, nxv1i1, int_aarch64_sve_st1dq, ST1D_Q, ST1D_Q_IMM, am_sve_regreg_lsl3>;
+ defm : sve_st1q_pat<nxv2f64, nxv1i1, int_aarch64_sve_st1dq, ST1D_Q, ST1D_Q_IMM, am_sve_regreg_lsl3>;
} // End HasSVEorSME
@@ -4006,7 +4006,9 @@ defm WHILEHS_CXX : sve2p1_int_while_rr_pn<"whilehs", 0b100>;
defm WHILEHI_CXX : sve2p1_int_while_rr_pn<"whilehi", 0b101>;
defm WHILELO_CXX : sve2p1_int_while_rr_pn<"whilelo", 0b110>;
defm WHILELS_CXX : sve2p1_int_while_rr_pn<"whilels", 0b111>;
+} // End HasSVE2p1_or_HasSME2
+let Predicates = [HasSVEorSME] in {
// Aliases for existing SVE instructions for which predicate-as-counter are
// accepted as an operand to the instruction
@@ -4025,7 +4027,7 @@ def : InstAlias<"mov $Pd, $Pn",
def : InstAlias<"pfalse\t$Pd", (PFALSE PNRasPPR8:$Pd), 0>;
-} // End HasSVE2p1_or_HasSME2
+}
//===----------------------------------------------------------------------===//
// Non-widening BFloat16 to BFloat16 instructions
@@ -4095,7 +4097,7 @@ defm FMAXQV : sve2p1_fp_reduction_q<0b110, "fmaxqv", int_aarch64_sve_fmaxqv>;
defm FMINQV : sve2p1_fp_reduction_q<0b111, "fminqv", int_aarch64_sve_fminqv>;
defm DUPQ_ZZI : sve2p1_dupq<"dupq">;
-defm EXTQ_ZZI : sve2p1_extq<"extq", int_aarch64_sve_extq_lane>;
+defm EXTQ_ZZI : sve2p1_extq<"extq", int_aarch64_sve_extq>;
defm PMOV_PZI : sve2p1_vector_to_pred<"pmov", int_aarch64_sve_pmov_to_pred_lane, int_aarch64_sve_pmov_to_pred_lane_zero>;
defm PMOV_ZIP : sve2p1_pred_to_vector<"pmov", int_aarch64_sve_pmov_to_vector_lane_merging, int_aarch64_sve_pmov_to_vector_lane_zeroing>;
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index b5b8b6829178..13b5e578391d 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1406,9 +1406,23 @@ static std::optional<Instruction *> instCombineSVEAllActive(IntrinsicInst &II,
return &II;
}
+// Simplify operations where predicate has all inactive lanes or try to replace
+// with _u form when all lanes are active
+static std::optional<Instruction *>
+instCombineSVEAllOrNoActive(InstCombiner &IC, IntrinsicInst &II,
+ Intrinsic::ID IID) {
+ if (match(II.getOperand(0), m_ZeroInt())) {
+ // llvm_ir, pred(0), op1, op2 - Spec says to return op1 when all lanes are
+ // inactive for sv[func]_m
+ return IC.replaceInstUsesWith(II, II.getOperand(1));
+ }
+ return instCombineSVEAllActive(II, IID);
+}
+
static std::optional<Instruction *> instCombineSVEVectorAdd(InstCombiner &IC,
IntrinsicInst &II) {
- if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_add_u))
+ if (auto II_U =
+ instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_add_u))
return II_U;
if (auto MLA = instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul,
Intrinsic::aarch64_sve_mla>(
@@ -1423,7 +1437,8 @@ static std::optional<Instruction *> instCombineSVEVectorAdd(InstCombiner &IC,
static std::optional<Instruction *>
instCombineSVEVectorFAdd(InstCombiner &IC, IntrinsicInst &II) {
- if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fadd_u))
+ if (auto II_U =
+ instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fadd_u))
return II_U;
if (auto FMLA =
instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul,
@@ -1465,7 +1480,8 @@ instCombineSVEVectorFAddU(InstCombiner &IC, IntrinsicInst &II) {
static std::optional<Instruction *>
instCombineSVEVectorFSub(InstCombiner &IC, IntrinsicInst &II) {
- if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fsub_u))
+ if (auto II_U =
+ instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fsub_u))
return II_U;
if (auto FMLS =
instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul,
@@ -1507,7 +1523,8 @@ instCombineSVEVectorFSubU(InstCombiner &IC, IntrinsicInst &II) {
static std::optional<Instruction *> instCombineSVEVectorSub(InstCombiner &IC,
IntrinsicInst &II) {
- if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_sub_u))
+ if (auto II_U =
+ instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_sub_u))
return II_U;
if (auto MLS = instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul,
Intrinsic::aarch64_sve_mls>(
@@ -1523,11 +1540,6 @@ static std::optional<Instruction *> instCombineSVEVectorMul(InstCombiner &IC,
auto *OpMultiplicand = II.getOperand(1);
auto *OpMultiplier = II.getOperand(2);
- // Canonicalise a non _u intrinsic only.
- if (II.getIntrinsicID() != IID)
- if (auto II_U = instCombineSVEAllActive(II, IID))
- return II_U;
-
// Return true if a given instruction is a unit splat value, false otherwise.
auto IsUnitSplat = [](auto *I) {
auto *SplatValue = getSplatValue(I);
@@ -1891,34 +1903,38 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
case Intrinsic::aarch64_sve_ptest_last:
return instCombineSVEPTest(IC, II);
case Intrinsic::aarch64_sve_fabd:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fabd_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fabd_u);
case Intrinsic::aarch64_sve_fadd:
return instCombineSVEVectorFAdd(IC, II);
case Intrinsic::aarch64_sve_fadd_u:
return instCombineSVEVectorFAddU(IC, II);
case Intrinsic::aarch64_sve_fdiv:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fdiv_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fdiv_u);
case Intrinsic::aarch64_sve_fmax:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmax_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmax_u);
case Intrinsic::aarch64_sve_fmaxnm:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmaxnm_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmaxnm_u);
case Intrinsic::aarch64_sve_fmin:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmin_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmin_u);
case Intrinsic::aarch64_sve_fminnm:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fminnm_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fminnm_u);
case Intrinsic::aarch64_sve_fmla:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmla_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmla_u);
case Intrinsic::aarch64_sve_fmls:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmls_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmls_u);
case Intrinsic::aarch64_sve_fmul:
+ if (auto II_U =
+ instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmul_u))
+ return II_U;
+ return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_fmul_u);
case Intrinsic::aarch64_sve_fmul_u:
return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_fmul_u);
case Intrinsic::aarch64_sve_fmulx:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmulx_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmulx_u);
case Intrinsic::aarch64_sve_fnmla:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fnmla_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fnmla_u);
case Intrinsic::aarch64_sve_fnmls:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fnmls_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fnmls_u);
case Intrinsic::aarch64_sve_fsub:
return instCombineSVEVectorFSub(IC, II);
case Intrinsic::aarch64_sve_fsub_u:
@@ -1930,20 +1946,24 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
Intrinsic::aarch64_sve_mla_u>(
IC, II, true);
case Intrinsic::aarch64_sve_mla:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_mla_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_mla_u);
case Intrinsic::aarch64_sve_mls:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_mls_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_mls_u);
case Intrinsic::aarch64_sve_mul:
+ if (auto II_U =
+ instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_mul_u))
+ return II_U;
+ return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_mul_u);
case Intrinsic::aarch64_sve_mul_u:
return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_mul_u);
case Intrinsic::aarch64_sve_sabd:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_sabd_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_sabd_u);
case Intrinsic::aarch64_sve_smax:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_smax_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_smax_u);
case Intrinsic::aarch64_sve_smin:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_smin_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_smin_u);
case Intrinsic::aarch64_sve_smulh:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_smulh_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_smulh_u);
case Intrinsic::aarch64_sve_sub:
return instCombineSVEVectorSub(IC, II);
case Intrinsic::aarch64_sve_sub_u:
@@ -1951,31 +1971,31 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
Intrinsic::aarch64_sve_mls_u>(
IC, II, true);
case Intrinsic::aarch64_sve_uabd:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_uabd_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_uabd_u);
case Intrinsic::aarch64_sve_umax:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_umax_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_umax_u);
case Intrinsic::aarch64_sve_umin:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_umin_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_umin_u);
case Intrinsic::aarch64_sve_umulh:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_umulh_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_umulh_u);
case Intrinsic::aarch64_sve_asr:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_asr_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_asr_u);
case Intrinsic::aarch64_sve_lsl:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_lsl_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_lsl_u);
case Intrinsic::aarch64_sve_lsr:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_lsr_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_lsr_u);
case Intrinsic::aarch64_sve_and:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_and_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_and_u);
case Intrinsic::aarch64_sve_bic:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_bic_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_bic_u);
case Intrinsic::aarch64_sve_eor:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_eor_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_eor_u);
case Intrinsic::aarch64_sve_orr:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_orr_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_orr_u);
case Intrinsic::aarch64_sve_sqsub:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_sqsub_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_sqsub_u);
case Intrinsic::aarch64_sve_uqsub:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_uqsub_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_uqsub_u);
case Intrinsic::aarch64_sve_tbl:
return instCombineSVETBL(IC, II);
case Intrinsic::aarch64_sve_uunpkhi:
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index b657a0954d78..302116447efc 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1166,7 +1166,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_FMAD).lower();
// Access to floating-point environment.
- getActionDefinitionsBuilder({G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE})
+ getActionDefinitionsBuilder({G_GET_FPENV, G_SET_FPENV, G_RESET_FPENV,
+ G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE})
.libcall();
getActionDefinitionsBuilder(G_IS_FPCLASS).lower();
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
index 496ab18e9b19..6e074b6a63c4 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@@ -120,7 +120,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
assert((!Target.getSymA() ||
Target.getSymA()->getKind() == MCSymbolRefExpr::VK_None ||
- Target.getSymA()->getKind() == MCSymbolRefExpr::VK_PLT) &&
+ Target.getSymA()->getKind() == MCSymbolRefExpr::VK_PLT ||
+ Target.getSymA()->getKind() == MCSymbolRefExpr::VK_GOTPCREL) &&
"Should only be expression-level modifiers here");
assert((!Target.getSymB() ||
@@ -206,7 +207,10 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
case FK_Data_2:
return R_CLS(ABS16);
case FK_Data_4:
- return R_CLS(ABS32);
+ return (!IsILP32 &&
+ Target.getAccessVariant() == MCSymbolRefExpr::VK_GOTPCREL)
+ ? ELF::R_AARCH64_GOTPCREL32
+ : R_CLS(ABS32);
case FK_Data_8:
if (IsILP32) {
Ctx.reportError(Fixup.getLoc(),
diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 70f3c2c99f0f..44d9a8ac7cb6 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -1268,7 +1268,7 @@ multiclass sve2_int_perm_revd<string asm, SDPatternOperator op> {
}
class sve2_clamp<string asm, bits<2> sz, bit U, ZPRRegOp zpr_ty>
- : I<(outs zpr_ty:$Zd), (ins zpr_ty:$Zn, zpr_ty:$Zm, zpr_ty:$_Zd),
+ : I<(outs zpr_ty:$Zd), (ins zpr_ty:$_Zd, zpr_ty:$Zn, zpr_ty:$Zm),
asm, "\t$Zd, $Zn, $Zm", "", []>,
Sched<[]> {
bits<5> Zm;