aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2023-12-18 20:30:12 +0000
committerDimitry Andric <dim@FreeBSD.org>2024-04-06 20:11:55 +0000
commit5f757f3ff9144b609b3c433dfd370cc6bdc191ad (patch)
tree1b4e980b866cd26a00af34c0a653eb640bd09caf /contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td
parent3e1c8a35f741a5d114d0ba670b15191355711fe9 (diff)
parent312c0ed19cc5276a17bacf2120097bec4515b0f1 (diff)
downloadsrc-5f757f3ff9144b609b3c433dfd370cc6bdc191ad.tar.gz
src-5f757f3ff9144b609b3c433dfd370cc6bdc191ad.zip
Merge llvm-project main llvmorg-18-init-15088-gd14ee76181fb
This updates llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and openmp to llvm-project main llvmorg-18-init-15088-gd14ee76181fb. PR: 276104 MFC after: 1 month
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td')
-rw-r--r--contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td298
1 files changed, 229 insertions, 69 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td
index 8f50af4b71fd..c600bcaab2b3 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td
@@ -127,6 +127,12 @@ def FeatureCCPP : SubtargetFeature<"ccpp", "HasCCPP",
def FeatureSVE : SubtargetFeature<"sve", "HasSVE", "true",
"Enable Scalable Vector Extension (SVE) instructions (FEAT_SVE)", [FeatureFullFP16]>;
+def FeatureFPMR : SubtargetFeature<"fpmr", "HasFPMR", "true",
+ "Enable FPMR Register (FEAT_FPMR)">;
+
+def FeatureFP8 : SubtargetFeature<"fp8", "HasFP8", "true",
+ "Enable FP8 instructions (FEAT_FP8)">;
+
// This flag is currently still labeled as Experimental, but when fully
// implemented this should tell the compiler to use the zeroing pseudos to
// benefit from the reverse instructions (e.g. SUB vs SUBR) if the inactive
@@ -148,6 +154,9 @@ def FeatureExperimentalZeroingPseudos
def FeatureUseScalarIncVL : SubtargetFeature<"use-scalar-inc-vl",
"UseScalarIncVL", "true", "Prefer inc/dec over add+cnt">;
+def FeatureBF16 : SubtargetFeature<"bf16", "HasBF16",
+ "true", "Enable BFloat16 Extension (FEAT_BF16)" >;
+
def FeatureNoSVEFPLD1R : SubtargetFeature<"no-sve-fp-ld1r",
"NoSVEFPLD1R", "true", "Avoid using LD1RX instructions for FP">;
@@ -172,7 +181,7 @@ def FeatureSVE2p1: SubtargetFeature<"sve2p1", "HasSVE2p1", "true",
"Enable Scalable Vector Extension 2.1 instructions", [FeatureSVE2]>;
def FeatureB16B16 : SubtargetFeature<"b16b16", "HasB16B16", "true",
- "Enable SVE2.1 or SME2.1 non-widening BFloat16 to BFloat16 instructions (FEAT_B16B16)", []>;
+ "Enable SVE2.1 or SME2.1 non-widening BFloat16 to BFloat16 instructions (FEAT_B16B16)", [FeatureBF16]>;
def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",
"Has zero-cycle register moves">;
@@ -223,14 +232,9 @@ def FeatureEnableSelectOptimize : SubtargetFeature<
"enable-select-opt", "EnableSelectOptimize", "true",
"Enable the select optimize pass for select loop heuristics">;
-def FeatureCustomCheapAsMoveHandling : SubtargetFeature<"custom-cheap-as-move",
- "HasCustomCheapAsMoveHandling", "true",
- "Use custom handling of cheap instructions">;
-
def FeatureExynosCheapAsMoveHandling : SubtargetFeature<"exynos-cheap-as-move",
"HasExynosCheapAsMoveHandling", "true",
- "Use Exynos specific handling of cheap instructions",
- [FeatureCustomCheapAsMoveHandling]>;
+ "Use Exynos specific handling of cheap instructions">;
def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler",
"UsePostRAScheduler", "true", "Schedule again after register allocation">;
@@ -300,6 +304,10 @@ def FeatureDisableLatencySchedHeuristic : SubtargetFeature<
"disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true",
"Disable latency scheduling heuristic">;
+def FeatureStorePairSuppress : SubtargetFeature<
+ "store-pair-suppress", "EnableStorePairSuppress", "true",
+ "Enable Store Pair Suppression heuristics">;
+
def FeatureForce32BitJumpTables
: SubtargetFeature<"force-32bit-jump-tables", "Force32BitJumpTables", "true",
"Force jump table entries to be 32-bits wide except at MinSize">;
@@ -382,9 +390,13 @@ def FeatureNoNegativeImmediates : SubtargetFeature<"no-neg-immediates",
"equivalent when the immediate does "
"not fit in the encoding.">;
-def FeatureLSLFast : SubtargetFeature<
- "lsl-fast", "HasLSLFast", "true",
- "CPU has a fastpath logical shift of up to 3 places">;
+def FeatureAddrLSLFast : SubtargetFeature<
+ "addr-lsl-fast", "HasAddrLSLFast", "true",
+ "Address operands with logical shift of up to 3 places are cheap">;
+
+def FeatureALULSLFast : SubtargetFeature<
+ "alu-lsl-fast", "HasALULSLFast", "true",
+ "Add/Sub operations with lsl shift <= 4 are cheap">;
def FeatureAggressiveFMA :
SubtargetFeature<"aggressive-fma",
@@ -438,9 +450,6 @@ def FeatureTaggedGlobals : SubtargetFeature<"tagged-globals",
"true", "Use an instruction sequence for taking the address of a global "
"that allows a memory tag in the upper address bits">;
-def FeatureBF16 : SubtargetFeature<"bf16", "HasBF16",
- "true", "Enable BFloat16 Extension (FEAT_BF16)" >;
-
def FeatureMatMulInt8 : SubtargetFeature<"i8mm", "HasMatMulInt8",
"true", "Enable Matrix Multiply Int8 Extension (FEAT_I8MM)">;
@@ -499,12 +508,47 @@ def FeatureSMEI16I64 : SubtargetFeature<"sme-i16i64", "HasSMEI16I64", "true",
def FeatureSMEF16F16 : SubtargetFeature<"sme-f16f16", "HasSMEF16F16", "true",
"Enable SME2.1 non-widening Float16 instructions (FEAT_SME_F16F16)", []>;
+def FeatureSMEFA64 : SubtargetFeature<"sme-fa64", "HasSMEFA64", "true",
+ "Enable the full A64 instruction set in streaming SVE mode (FEAT_SME_FA64)", [FeatureSME, FeatureSVE2]>;
+
def FeatureSME2 : SubtargetFeature<"sme2", "HasSME2", "true",
"Enable Scalable Matrix Extension 2 (SME2) instructions", [FeatureSME]>;
def FeatureSME2p1 : SubtargetFeature<"sme2p1", "HasSME2p1", "true",
"Enable Scalable Matrix Extension 2.1 (FEAT_SME2p1) instructions", [FeatureSME2]>;
+def FeatureFAMINMAX: SubtargetFeature<"faminmax", "HasFAMINMAX", "true",
+ "Enable FAMIN and FAMAX instructions (FEAT_FAMINMAX)">;
+
+def FeatureFP8FMA : SubtargetFeature<"fp8fma", "HasFP8FMA", "true",
+ "Enable fp8 multiply-add instructions (FEAT_FP8FMA)">;
+
+def FeatureSSVE_FP8FMA : SubtargetFeature<"ssve-fp8fma", "HasSSVE_FP8FMA", "true",
+ "Enable SVE2 fp8 multiply-add instructions (FEAT_SSVE_FP8FMA)", [FeatureSME2]>;
+
+def FeatureFP8DOT2: SubtargetFeature<"fp8dot2", "HasFP8DOT2", "true",
+ "Enable fp8 2-way dot instructions (FEAT_FP8DOT2)">;
+
+def FeatureSSVE_FP8DOT2 : SubtargetFeature<"ssve-fp8dot2", "HasSSVE_FP8DOT2", "true",
+ "Enable SVE2 fp8 2-way dot product instructions (FEAT_SSVE_FP8DOT2)", [FeatureSME2]>;
+
+def FeatureFP8DOT4: SubtargetFeature<"fp8dot4", "HasFP8DOT4", "true",
+ "Enable fp8 4-way dot instructions (FEAT_FP8DOT4)">;
+
+def FeatureSSVE_FP8DOT4 : SubtargetFeature<"ssve-fp8dot4", "HasSSVE_FP8DOT4", "true",
+ "Enable SVE2 fp8 4-way dot product instructions (FEAT_SSVE_FP8DOT4)", [FeatureSME2]>;
+def FeatureLUT: SubtargetFeature<"lut", "HasLUT", "true",
+ "Enable Lookup Table instructions (FEAT_LUT)">;
+
+def FeatureSME_LUTv2 : SubtargetFeature<"sme-lutv2", "HasSME_LUTv2", "true",
+ "Enable Scalable Matrix Extension (SME) LUTv2 instructions (FEAT_SME_LUTv2)">;
+
+def FeatureSMEF8F16 : SubtargetFeature<"sme-f8f16", "HasSMEF8F16", "true",
+ "Enable Scalable Matrix Extension (SME) F8F16 instructions(FEAT_SME_F8F16)", [FeatureSME2, FeatureFP8]>;
+
+def FeatureSMEF8F32 : SubtargetFeature<"sme-f8f32", "HasSMEF8F32", "true",
+ "Enable Scalable Matrix Extension (SME) F8F32 instructions (FEAT_SME_F8F32)", [FeatureSME2, FeatureFP8]>;
+
def FeatureAppleA7SysReg : SubtargetFeature<"apple-a7-sysreg", "HasAppleA7SysReg", "true",
"Apple A7 (the CPU formerly known as Cyclone)">;
@@ -566,6 +610,18 @@ def FeatureD128 : SubtargetFeature<"d128", "HasD128",
"and Instructions (FEAT_D128, FEAT_LVA3, FEAT_SYSREG128, FEAT_SYSINSTR128)",
[FeatureLSE128]>;
+def FeatureDisableLdp : SubtargetFeature<"disable-ldp", "HasDisableLdp",
+ "true", "Do not emit ldp">;
+
+def FeatureDisableStp : SubtargetFeature<"disable-stp", "HasDisableStp",
+ "true", "Do not emit stp">;
+
+def FeatureLdpAlignedOnly : SubtargetFeature<"ldp-aligned-only", "HasLdpAlignedOnly",
+ "true", "In order to emit ldp, first check if the load will be aligned to 2 * element_size">;
+
+def FeatureStpAlignedOnly : SubtargetFeature<"stp-aligned-only", "HasStpAlignedOnly",
+ "true", "In order to emit stp, first check if the store will be aligned to 2 * element_size">;
+
//===----------------------------------------------------------------------===//
// Architectures.
//
@@ -634,6 +690,10 @@ def HasV9_4aOps : SubtargetFeature<
"v9.4a", "HasV9_4aOps", "true", "Support ARM v9.4a instructions",
[HasV8_9aOps, HasV9_3aOps]>;
+def HasV9_5aOps : SubtargetFeature<
+ "v9.5a", "HasV9_5aOps", "true", "Support ARM v9.5a instructions",
+ [HasV9_4aOps]>;
+
def HasV8_0rOps : SubtargetFeature<
"v8r", "HasV8_0rOps", "true", "Support ARM v8r instructions",
[//v8.1
@@ -723,8 +783,8 @@ let F = [HasSVE2p1, HasSVE2p1_or_HasSME2, HasSVE2p1_or_HasSME2p1] in
def SVE2p1Unsupported : AArch64Unsupported;
def SVE2Unsupported : AArch64Unsupported {
- let F = !listconcat([HasSVE2, HasSVE2orSME,
- HasSVE2AES, HasSVE2SHA3, HasSVE2SM4, HasSVE2BitPerm],
+ let F = !listconcat([HasSVE2, HasSVE2orSME, HasSSVE_FP8FMA, HasSMEF8F16,
+ HasSMEF8F32, HasSVE2AES, HasSVE2SHA3, HasSVE2SM4, HasSVE2BitPerm],
SVE2p1Unsupported.F);
}
@@ -737,12 +797,13 @@ let F = [HasSME2p1, HasSVE2p1_or_HasSME2p1] in
def SME2p1Unsupported : AArch64Unsupported;
def SME2Unsupported : AArch64Unsupported {
- let F = !listconcat([HasSME2, HasSVE2p1_or_HasSME2],
+ let F = !listconcat([HasSME2, HasSVE2p1_or_HasSME2, HasSSVE_FP8FMA,
+ HasSMEF8F16, HasSMEF8F32],
SME2p1Unsupported.F);
}
def SMEUnsupported : AArch64Unsupported {
- let F = !listconcat([HasSME, HasSMEI16I64, HasSMEF16F16, HasSMEF64F64],
+ let F = !listconcat([HasSME, HasSMEI16I64, HasSMEF16F16, HasSMEF64F64, HasSMEFA64],
SME2Unsupported.F);
}
@@ -778,7 +839,6 @@ def TuneA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53",
FeatureFuseAES,
FeatureFuseAdrpAdd,
FeatureBalanceFPOps,
- FeatureCustomCheapAsMoveHandling,
FeaturePostRAScheduler]>;
def TuneA55 : SubtargetFeature<"a55", "ARMProcFamily", "CortexA55",
@@ -795,11 +855,16 @@ def TuneA510 : SubtargetFeature<"a510", "ARMProcFamily", "CortexA510",
FeaturePostRAScheduler
]>;
+def TuneA520 : SubtargetFeature<"a520", "ARMProcFamily", "CortexA520",
+ "Cortex-A520 ARM processors", [
+ FeatureFuseAES,
+ FeatureFuseAdrpAdd,
+ FeaturePostRAScheduler]>;
+
def TuneA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
"Cortex-A57 ARM processors", [
FeatureFuseAES,
FeatureBalanceFPOps,
- FeatureCustomCheapAsMoveHandling,
FeatureFuseAdrpAdd,
FeatureFuseLiterals,
FeaturePostRAScheduler,
@@ -841,7 +906,8 @@ def TuneA76 : SubtargetFeature<"a76", "ARMProcFamily", "CortexA76",
"Cortex-A76 ARM processors", [
FeatureFuseAES,
FeatureFuseAdrpAdd,
- FeatureLSLFast,
+ FeatureAddrLSLFast,
+ FeatureALULSLFast,
FeatureEnableSelectOptimize,
FeaturePredictableSelectIsExpensive]>;
@@ -850,7 +916,8 @@ def TuneA77 : SubtargetFeature<"a77", "ARMProcFamily", "CortexA77",
FeatureCmpBccFusion,
FeatureFuseAES,
FeatureFuseAdrpAdd,
- FeatureLSLFast,
+ FeatureAddrLSLFast,
+ FeatureALULSLFast,
FeatureEnableSelectOptimize,
FeaturePredictableSelectIsExpensive]>;
@@ -859,7 +926,8 @@ def TuneA78 : SubtargetFeature<"a78", "ARMProcFamily", "CortexA78",
FeatureCmpBccFusion,
FeatureFuseAES,
FeatureFuseAdrpAdd,
- FeatureLSLFast,
+ FeatureAddrLSLFast,
+ FeatureALULSLFast,
FeaturePostRAScheduler,
FeatureEnableSelectOptimize,
FeaturePredictableSelectIsExpensive]>;
@@ -870,7 +938,8 @@ def TuneA78C : SubtargetFeature<"a78c", "ARMProcFamily",
FeatureCmpBccFusion,
FeatureFuseAES,
FeatureFuseAdrpAdd,
- FeatureLSLFast,
+ FeatureAddrLSLFast,
+ FeatureALULSLFast,
FeaturePostRAScheduler,
FeatureEnableSelectOptimize,
FeaturePredictableSelectIsExpensive]>;
@@ -880,7 +949,8 @@ def TuneA710 : SubtargetFeature<"a710", "ARMProcFamily", "CortexA710",
FeatureCmpBccFusion,
FeatureFuseAES,
FeatureFuseAdrpAdd,
- FeatureLSLFast,
+ FeatureAddrLSLFast,
+ FeatureALULSLFast,
FeaturePostRAScheduler,
FeatureEnableSelectOptimize,
FeaturePredictableSelectIsExpensive]>;
@@ -890,7 +960,19 @@ def TuneA715 : SubtargetFeature<"a715", "ARMProcFamily", "CortexA715",
FeatureFuseAES,
FeaturePostRAScheduler,
FeatureCmpBccFusion,
- FeatureLSLFast,
+ FeatureAddrLSLFast,
+ FeatureALULSLFast,
+ FeatureFuseAdrpAdd,
+ FeatureEnableSelectOptimize,
+ FeaturePredictableSelectIsExpensive]>;
+
+def TuneA720 : SubtargetFeature<"a720", "ARMProcFamily", "CortexA720",
+ "Cortex-A720 ARM processors", [
+ FeatureFuseAES,
+ FeaturePostRAScheduler,
+ FeatureCmpBccFusion,
+ FeatureAddrLSLFast,
+ FeatureALULSLFast,
FeatureFuseAdrpAdd,
FeatureEnableSelectOptimize,
FeaturePredictableSelectIsExpensive]>;
@@ -905,7 +987,8 @@ def TuneX1 : SubtargetFeature<"cortex-x1", "ARMProcFamily", "CortexX1",
FeatureCmpBccFusion,
FeatureFuseAES,
FeatureFuseAdrpAdd,
- FeatureLSLFast,
+ FeatureAddrLSLFast,
+ FeatureALULSLFast,
FeaturePostRAScheduler,
FeatureEnableSelectOptimize,
FeaturePredictableSelectIsExpensive]>;
@@ -915,14 +998,26 @@ def TuneX2 : SubtargetFeature<"cortex-x2", "ARMProcFamily", "CortexX2",
FeatureCmpBccFusion,
FeatureFuseAES,
FeatureFuseAdrpAdd,
- FeatureLSLFast,
+ FeatureAddrLSLFast,
+ FeatureALULSLFast,
FeaturePostRAScheduler,
FeatureEnableSelectOptimize,
FeaturePredictableSelectIsExpensive]>;
def TuneX3 : SubtargetFeature<"cortex-x3", "ARMProcFamily", "CortexX3",
"Cortex-X3 ARM processors", [
- FeatureLSLFast,
+ FeatureAddrLSLFast,
+ FeatureALULSLFast,
+ FeatureFuseAdrpAdd,
+ FeatureFuseAES,
+ FeaturePostRAScheduler,
+ FeatureEnableSelectOptimize,
+ FeaturePredictableSelectIsExpensive]>;
+
+def TuneX4 : SubtargetFeature<"cortex-x4", "ARMProcFamily", "CortexX4",
+ "Cortex-X4 ARM processors", [
+ FeatureAddrLSLFast,
+ FeatureALULSLFast,
FeatureFuseAdrpAdd,
FeatureFuseAES,
FeaturePostRAScheduler,
@@ -934,8 +1029,8 @@ def TuneA64FX : SubtargetFeature<"a64fx", "ARMProcFamily", "A64FX",
FeaturePostRAScheduler,
FeatureAggressiveFMA,
FeatureArithmeticBccFusion,
- FeaturePredictableSelectIsExpensive
- ]>;
+ FeatureStorePairSuppress,
+ FeaturePredictableSelectIsExpensive]>;
def TuneCarmel : SubtargetFeature<"carmel", "ARMProcFamily", "Carmel",
"Nvidia Carmel processors">;
@@ -949,10 +1044,10 @@ def TuneAppleA7 : SubtargetFeature<"apple-a7", "ARMProcFamily", "AppleA7",
FeatureArithmeticCbzFusion,
FeatureDisableLatencySchedHeuristic,
FeatureFuseAES, FeatureFuseCryptoEOR,
+ FeatureStorePairSuppress,
FeatureZCRegMove,
FeatureZCZeroing,
- FeatureZCZeroingFPWorkaround]
- >;
+ FeatureZCZeroingFPWorkaround]>;
def TuneAppleA10 : SubtargetFeature<"apple-a10", "ARMProcFamily", "AppleA10",
"Apple A10", [
@@ -962,9 +1057,9 @@ def TuneAppleA10 : SubtargetFeature<"apple-a10", "ARMProcFamily", "AppleA10",
FeatureDisableLatencySchedHeuristic,
FeatureFuseAES,
FeatureFuseCryptoEOR,
+ FeatureStorePairSuppress,
FeatureZCRegMove,
- FeatureZCZeroing]
- >;
+ FeatureZCZeroing]>;
def TuneAppleA11 : SubtargetFeature<"apple-a11", "ARMProcFamily", "AppleA11",
"Apple A11", [
@@ -974,9 +1069,9 @@ def TuneAppleA11 : SubtargetFeature<"apple-a11", "ARMProcFamily", "AppleA11",
FeatureDisableLatencySchedHeuristic,
FeatureFuseAES,
FeatureFuseCryptoEOR,
+ FeatureStorePairSuppress,
FeatureZCRegMove,
- FeatureZCZeroing]
- >;
+ FeatureZCZeroing]>;
def TuneAppleA12 : SubtargetFeature<"apple-a12", "ARMProcFamily", "AppleA12",
"Apple A12", [
@@ -986,9 +1081,9 @@ def TuneAppleA12 : SubtargetFeature<"apple-a12", "ARMProcFamily", "AppleA12",
FeatureDisableLatencySchedHeuristic,
FeatureFuseAES,
FeatureFuseCryptoEOR,
+ FeatureStorePairSuppress,
FeatureZCRegMove,
- FeatureZCZeroing]
- >;
+ FeatureZCZeroing]>;
def TuneAppleA13 : SubtargetFeature<"apple-a13", "ARMProcFamily", "AppleA13",
"Apple A13", [
@@ -998,9 +1093,9 @@ def TuneAppleA13 : SubtargetFeature<"apple-a13", "ARMProcFamily", "AppleA13",
FeatureDisableLatencySchedHeuristic,
FeatureFuseAES,
FeatureFuseCryptoEOR,
+ FeatureStorePairSuppress,
FeatureZCRegMove,
- FeatureZCZeroing]
- >;
+ FeatureZCZeroing]>;
def TuneAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14",
"Apple A14", [
@@ -1016,6 +1111,7 @@ def TuneAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14",
FeatureFuseCryptoEOR,
FeatureFuseAdrpAdd,
FeatureFuseLiterals,
+ FeatureStorePairSuppress,
FeatureZCRegMove,
FeatureZCZeroing]>;
@@ -1031,9 +1127,9 @@ def TuneAppleA15 : SubtargetFeature<"apple-a15", "ARMProcFamily", "AppleA15",
FeatureFuseCCSelect,
FeatureFuseCryptoEOR,
FeatureFuseLiterals,
+ FeatureStorePairSuppress,
FeatureZCRegMove,
- FeatureZCZeroing
- ]>;
+ FeatureZCZeroing]>;
def TuneAppleA16 : SubtargetFeature<"apple-a16", "ARMProcFamily", "AppleA16",
"Apple A16", [
@@ -1047,9 +1143,25 @@ def TuneAppleA16 : SubtargetFeature<"apple-a16", "ARMProcFamily", "AppleA16",
FeatureFuseCCSelect,
FeatureFuseCryptoEOR,
FeatureFuseLiterals,
+ FeatureStorePairSuppress,
+ FeatureZCRegMove,
+ FeatureZCZeroing]>;
+
+def TuneAppleA17 : SubtargetFeature<"apple-a17", "ARMProcFamily", "AppleA17",
+ "Apple A17", [
+ FeatureAlternateSExtLoadCVTF32Pattern,
+ FeatureArithmeticBccFusion,
+ FeatureArithmeticCbzFusion,
+ FeatureDisableLatencySchedHeuristic,
+ FeatureFuseAddress,
+ FeatureFuseAES,
+ FeatureFuseArithmeticLogic,
+ FeatureFuseCCSelect,
+ FeatureFuseCryptoEOR,
+ FeatureFuseLiterals,
+ FeatureStorePairSuppress,
FeatureZCRegMove,
- FeatureZCZeroing
- ]>;
+ FeatureZCZeroing]>;
def TuneExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3",
"Samsung Exynos-M3 processors",
@@ -1060,7 +1172,9 @@ def TuneExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3",
FeatureFuseCCSelect,
FeatureFuseAdrpAdd,
FeatureFuseLiterals,
- FeatureLSLFast,
+ FeatureStorePairSuppress,
+ FeatureAddrLSLFast,
+ FeatureALULSLFast,
FeaturePostRAScheduler,
FeaturePredictableSelectIsExpensive]>;
@@ -1077,28 +1191,30 @@ def TuneExynosM4 : SubtargetFeature<"exynosm4", "ARMProcFamily", "ExynosM3",
FeatureFuseCCSelect,
FeatureFuseAdrpAdd,
FeatureFuseLiterals,
- FeatureLSLFast,
+ FeatureStorePairSuppress,
+ FeatureAddrLSLFast,
+ FeatureALULSLFast,
FeaturePostRAScheduler,
FeatureZCZeroing]>;
def TuneKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo",
"Qualcomm Kryo processors", [
- FeatureCustomCheapAsMoveHandling,
FeaturePostRAScheduler,
FeaturePredictableSelectIsExpensive,
FeatureZCZeroing,
- FeatureLSLFast]
- >;
+ FeatureAddrLSLFast,
+ FeatureALULSLFast,
+ FeatureStorePairSuppress]>;
def TuneFalkor : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor",
"Qualcomm Falkor processors", [
- FeatureCustomCheapAsMoveHandling,
FeaturePostRAScheduler,
FeaturePredictableSelectIsExpensive,
FeatureZCZeroing,
- FeatureLSLFast,
- FeatureSlowSTRQro
- ]>;
+ FeatureStorePairSuppress,
+ FeatureAddrLSLFast,
+ FeatureALULSLFast,
+ FeatureSlowSTRQro]>;
def TuneNeoverseE1 : SubtargetFeature<"neoversee1", "ARMProcFamily", "NeoverseE1",
"Neoverse E1 ARM processors", [
@@ -1110,7 +1226,8 @@ def TuneNeoverseN1 : SubtargetFeature<"neoversen1", "ARMProcFamily", "NeoverseN1
"Neoverse N1 ARM processors", [
FeatureFuseAES,
FeatureFuseAdrpAdd,
- FeatureLSLFast,
+ FeatureAddrLSLFast,
+ FeatureALULSLFast,
FeaturePostRAScheduler,
FeatureEnableSelectOptimize,
FeaturePredictableSelectIsExpensive]>;
@@ -1119,7 +1236,8 @@ def TuneNeoverseN2 : SubtargetFeature<"neoversen2", "ARMProcFamily", "NeoverseN2
"Neoverse N2 ARM processors", [
FeatureFuseAES,
FeatureFuseAdrpAdd,
- FeatureLSLFast,
+ FeatureAddrLSLFast,
+ FeatureALULSLFast,
FeaturePostRAScheduler,
FeatureEnableSelectOptimize,
FeaturePredictableSelectIsExpensive]>;
@@ -1128,7 +1246,8 @@ def TuneNeoverse512TVB : SubtargetFeature<"neoverse512tvb", "ARMProcFamily", "Ne
"Neoverse 512-TVB ARM processors", [
FeatureFuseAES,
FeatureFuseAdrpAdd,
- FeatureLSLFast,
+ FeatureAddrLSLFast,
+ FeatureALULSLFast,
FeaturePostRAScheduler,
FeatureEnableSelectOptimize,
FeaturePredictableSelectIsExpensive]>;
@@ -1137,7 +1256,8 @@ def TuneNeoverseV1 : SubtargetFeature<"neoversev1", "ARMProcFamily", "NeoverseV1
"Neoverse V1 ARM processors", [
FeatureFuseAES,
FeatureFuseAdrpAdd,
- FeatureLSLFast,
+ FeatureAddrLSLFast,
+ FeatureALULSLFast,
FeaturePostRAScheduler,
FeatureEnableSelectOptimize,
FeaturePredictableSelectIsExpensive,
@@ -1146,24 +1266,28 @@ def TuneNeoverseV1 : SubtargetFeature<"neoversev1", "ARMProcFamily", "NeoverseV1
def TuneNeoverseV2 : SubtargetFeature<"neoversev2", "ARMProcFamily", "NeoverseV2",
"Neoverse V2 ARM processors", [
FeatureFuseAES,
- FeatureLSLFast,
+ FeatureFuseAdrpAdd,
+ FeatureAddrLSLFast,
+ FeatureALULSLFast,
FeaturePostRAScheduler,
FeatureEnableSelectOptimize,
FeaturePredictableSelectIsExpensive]>;
def TuneSaphira : SubtargetFeature<"saphira", "ARMProcFamily", "Saphira",
"Qualcomm Saphira processors", [
- FeatureCustomCheapAsMoveHandling,
FeaturePostRAScheduler,
FeaturePredictableSelectIsExpensive,
FeatureZCZeroing,
- FeatureLSLFast]>;
+ FeatureStorePairSuppress,
+ FeatureAddrLSLFast,
+ FeatureALULSLFast]>;
def TuneThunderX2T99 : SubtargetFeature<"thunderx2t99", "ARMProcFamily", "ThunderX2T99",
"Cavium ThunderX2 processors", [
FeatureAggressiveFMA,
FeatureArithmeticBccFusion,
FeaturePostRAScheduler,
+ FeatureStorePairSuppress,
FeaturePredictableSelectIsExpensive]>;
def TuneThunderX3T110 : SubtargetFeature<"thunderx3t110", "ARMProcFamily",
@@ -1174,59 +1298,72 @@ def TuneThunderX3T110 : SubtargetFeature<"thunderx3t110", "ARMProcFamily",
FeaturePostRAScheduler,
FeaturePredictableSelectIsExpensive,
FeatureBalanceFPOps,
+ FeatureStorePairSuppress,
FeatureStrictAlign]>;
def TuneThunderX : SubtargetFeature<"thunderx", "ARMProcFamily", "ThunderX",
"Cavium ThunderX processors", [
FeaturePostRAScheduler,
+ FeatureStorePairSuppress,
FeaturePredictableSelectIsExpensive]>;
def TuneThunderXT88 : SubtargetFeature<"thunderxt88", "ARMProcFamily",
"ThunderXT88",
"Cavium ThunderX processors", [
FeaturePostRAScheduler,
+ FeatureStorePairSuppress,
FeaturePredictableSelectIsExpensive]>;
def TuneThunderXT81 : SubtargetFeature<"thunderxt81", "ARMProcFamily",
"ThunderXT81",
"Cavium ThunderX processors", [
FeaturePostRAScheduler,
+ FeatureStorePairSuppress,
FeaturePredictableSelectIsExpensive]>;
def TuneThunderXT83 : SubtargetFeature<"thunderxt83", "ARMProcFamily",
"ThunderXT83",
"Cavium ThunderX processors", [
FeaturePostRAScheduler,
+ FeatureStorePairSuppress,
FeaturePredictableSelectIsExpensive]>;
def TuneTSV110 : SubtargetFeature<"tsv110", "ARMProcFamily", "TSV110",
"HiSilicon TS-V110 processors", [
- FeatureCustomCheapAsMoveHandling,
FeatureFuseAES,
+ FeatureStorePairSuppress,
FeaturePostRAScheduler]>;
def TuneAmpere1 : SubtargetFeature<"ampere1", "ARMProcFamily", "Ampere1",
"Ampere Computing Ampere-1 processors", [
FeaturePostRAScheduler,
FeatureFuseAES,
- FeatureLSLFast,
+ FeatureAddrLSLFast,
+ FeatureALULSLFast,
FeatureAggressiveFMA,
FeatureArithmeticBccFusion,
FeatureCmpBccFusion,
FeatureFuseAddress,
- FeatureFuseLiterals]>;
+ FeatureFuseLiterals,
+ FeatureStorePairSuppress,
+ FeatureLdpAlignedOnly,
+ FeatureStpAlignedOnly]>;
def TuneAmpere1A : SubtargetFeature<"ampere1a", "ARMProcFamily", "Ampere1A",
"Ampere Computing Ampere-1A processors", [
FeaturePostRAScheduler,
FeatureFuseAES,
- FeatureLSLFast,
+ FeatureAddrLSLFast,
+ FeatureALULSLFast,
FeatureAggressiveFMA,
FeatureArithmeticBccFusion,
FeatureCmpBccFusion,
FeatureFuseAddress,
FeatureFuseLiterals,
- FeatureFuseLiterals]>;
+ FeatureFuseLiterals,
+ FeatureStorePairSuppress,
+ FeatureLdpAlignedOnly,
+ FeatureStpAlignedOnly]>;
def ProcessorFeatures {
list<SubtargetFeature> A53 = [HasV8_0aOps, FeatureCRC, FeatureCrypto,
@@ -1238,6 +1375,9 @@ def ProcessorFeatures {
FeatureMatMulInt8, FeatureBF16, FeatureAM,
FeatureMTE, FeatureETE, FeatureSVE2BitPerm,
FeatureFP16FML];
+ list<SubtargetFeature> A520 = [HasV9_2aOps, FeaturePerfMon, FeatureAM,
+ FeatureMTE, FeatureETE, FeatureSVE2BitPerm,
+ FeatureFP16FML];
list<SubtargetFeature> A65 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
FeatureNEON, FeatureFullFP16, FeatureDotProd,
FeatureRCPC, FeatureSSBS, FeatureRAS,
@@ -1264,6 +1404,9 @@ def ProcessorFeatures {
FeatureFP16FML, FeatureSVE, FeatureTRBE,
FeatureSVE2BitPerm, FeatureBF16, FeatureETE,
FeaturePerfMon, FeatureMatMulInt8, FeatureSPE];
+ list<SubtargetFeature> A720 = [HasV9_2aOps, FeatureMTE, FeatureFP16FML,
+ FeatureTRBE, FeatureSVE2BitPerm, FeatureETE,
+ FeaturePerfMon, FeatureSPE, FeatureSPE_EEF];
list<SubtargetFeature> R82 = [HasV8_0rOps, FeaturePerfMon, FeatureFullFP16,
FeatureFP16FML, FeatureSSBS, FeaturePredRes,
FeatureSB];
@@ -1285,6 +1428,10 @@ def ProcessorFeatures {
FeatureSPE, FeatureBF16, FeatureMatMulInt8,
FeatureMTE, FeatureSVE2BitPerm, FeatureFullFP16,
FeatureFP16FML];
+ list<SubtargetFeature> X4 = [HasV9_2aOps,
+ FeaturePerfMon, FeatureETE, FeatureTRBE,
+ FeatureSPE, FeatureMTE, FeatureSVE2BitPerm,
+ FeatureFP16FML, FeatureSPE_EEF];
list<SubtargetFeature> A64FX = [HasV8_2aOps, FeatureFPARMv8, FeatureNEON,
FeatureSHA2, FeaturePerfMon, FeatureFullFP16,
FeatureSVE, FeatureComplxNum];
@@ -1315,6 +1462,10 @@ def ProcessorFeatures {
FeatureNEON, FeaturePerfMon, FeatureSHA3,
FeatureFullFP16, FeatureFP16FML,
FeatureHCX];
+ list<SubtargetFeature> AppleA17 = [HasV8_6aOps, FeatureCrypto, FeatureFPARMv8,
+ FeatureNEON, FeaturePerfMon, FeatureSHA3,
+ FeatureFullFP16, FeatureFP16FML,
+ FeatureHCX];
list<SubtargetFeature> ExynosM3 = [HasV8_0aOps, FeatureCRC, FeatureCrypto,
FeaturePerfMon];
list<SubtargetFeature> ExynosM4 = [HasV8_2aOps, FeatureCrypto, FeatureDotProd,
@@ -1329,9 +1480,9 @@ def ProcessorFeatures {
FeatureFPARMv8, FeatureFullFP16, FeatureNEON,
FeatureRCPC, FeatureSPE, FeatureSSBS,
FeaturePerfMon];
- list<SubtargetFeature> NeoverseN2 = [HasV8_5aOps, FeatureBF16, FeatureETE,
+ list<SubtargetFeature> NeoverseN2 = [HasV9_0aOps, FeatureBF16, FeatureETE,
FeatureMatMulInt8, FeatureMTE, FeatureSVE2,
- FeatureSVE2BitPerm, FeatureTRBE, FeatureCrypto,
+ FeatureSVE2BitPerm, FeatureTRBE,
FeaturePerfMon];
list<SubtargetFeature> Neoverse512TVB = [HasV8_4aOps, FeatureBF16, FeatureCacheDeepPersist,
FeatureCrypto, FeatureFPARMv8, FeatureFP16FML,
@@ -1376,7 +1527,7 @@ def ProcessorFeatures {
// FeatureFuseAdrpAdd is enabled under Generic to allow linker merging
// optimizations.
-def : ProcessorModel<"generic", CortexA55Model, ProcessorFeatures.Generic,
+def : ProcessorModel<"generic", CortexA510Model, ProcessorFeatures.Generic,
[FeatureFuseAES, FeatureFuseAdrpAdd, FeaturePostRAScheduler,
FeatureEnableSelectOptimize]>;
def : ProcessorModel<"cortex-a35", CortexA53Model, ProcessorFeatures.A53,
@@ -1389,6 +1540,8 @@ def : ProcessorModel<"cortex-a55", CortexA55Model, ProcessorFeatures.A55,
[TuneA55]>;
def : ProcessorModel<"cortex-a510", CortexA510Model, ProcessorFeatures.A510,
[TuneA510]>;
+def : ProcessorModel<"cortex-a520", CortexA510Model, ProcessorFeatures.A520,
+ [TuneA520]>;
def : ProcessorModel<"cortex-a57", CortexA57Model, ProcessorFeatures.A53,
[TuneA57]>;
def : ProcessorModel<"cortex-a65", CortexA53Model, ProcessorFeatures.A65,
@@ -1415,6 +1568,8 @@ def : ProcessorModel<"cortex-a710", NeoverseN2Model, ProcessorFeatures.A710,
[TuneA710]>;
def : ProcessorModel<"cortex-a715", NeoverseN2Model, ProcessorFeatures.A715,
[TuneA715]>;
+def : ProcessorModel<"cortex-a720", NeoverseN2Model, ProcessorFeatures.A720,
+ [TuneA720]>;
def : ProcessorModel<"cortex-r82", CortexA55Model, ProcessorFeatures.R82,
[TuneR82]>;
def : ProcessorModel<"cortex-x1", CortexA57Model, ProcessorFeatures.X1,
@@ -1425,6 +1580,8 @@ def : ProcessorModel<"cortex-x2", NeoverseN2Model, ProcessorFeatures.X2,
[TuneX2]>;
def : ProcessorModel<"cortex-x3", NeoverseN2Model, ProcessorFeatures.X3,
[TuneX3]>;
+def : ProcessorModel<"cortex-x4", NeoverseN2Model, ProcessorFeatures.X4,
+ [TuneX4]>;
def : ProcessorModel<"neoverse-e1", CortexA53Model,
ProcessorFeatures.NeoverseE1, [TuneNeoverseE1]>;
def : ProcessorModel<"neoverse-n1", NeoverseN1Model,
@@ -1492,12 +1649,15 @@ def : ProcessorModel<"apple-a15", CycloneModel, ProcessorFeatures.AppleA15,
[TuneAppleA15]>;
def : ProcessorModel<"apple-a16", CycloneModel, ProcessorFeatures.AppleA16,
[TuneAppleA16]>;
-
+def : ProcessorModel<"apple-a17", CycloneModel, ProcessorFeatures.AppleA17,
+ [TuneAppleA17]>;
// Mac CPUs
def : ProcessorModel<"apple-m1", CycloneModel, ProcessorFeatures.AppleA14,
[TuneAppleA14]>;
def : ProcessorModel<"apple-m2", CycloneModel, ProcessorFeatures.AppleA15,
[TuneAppleA15]>;
+def : ProcessorModel<"apple-m3", CycloneModel, ProcessorFeatures.AppleA16,
+ [TuneAppleA16]>;
// watch CPUs.
def : ProcessorModel<"apple-s4", CycloneModel, ProcessorFeatures.AppleA12,