src - FreeBSD source tree

diff options


context:
space:
mode:

author	Dimitry Andric <dim@FreeBSD.org>	2023-12-18 20:30:12 +0000
committer	Dimitry Andric <dim@FreeBSD.org>	2024-04-06 20:11:55 +0000
commit	5f757f3ff9144b609b3c433dfd370cc6bdc191ad (patch)
tree	1b4e980b866cd26a00af34c0a653eb640bd09caf /contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td
parent	3e1c8a35f741a5d114d0ba670b15191355711fe9 (diff)
parent	312c0ed19cc5276a17bacf2120097bec4515b0f1 (diff)
download	src-5f757f3ff9144b609b3c433dfd370cc6bdc191ad.tar.gz src-5f757f3ff9144b609b3c433dfd370cc6bdc191ad.zip

Merge llvm-project main llvmorg-18-init-15088-gd14ee76181fb

This updates llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and openmp to llvm-project main llvmorg-18-init-15088-gd14ee76181fb. PR: 276104 MFC after: 1 month

Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td')

-rw-r--r--

contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td

298

1 files changed, 229 insertions, 69 deletions

diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td
index 8f50af4b71fd..c600bcaab2b3 100644
--- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td
+++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td

@@ -127,6 +127,12 @@ def FeatureCCPP : SubtargetFeature<"ccpp", "HasCCPP",

def FeatureSVE : SubtargetFeature<"sve", "HasSVE", "true",

"Enable Scalable Vector Extension (SVE) instructions (FEAT_SVE)", [FeatureFullFP16]>;

+def FeatureFPMR : SubtargetFeature<"fpmr", "HasFPMR", "true",

+ "Enable FPMR Register (FEAT_FPMR)">;

+def FeatureFP8 : SubtargetFeature<"fp8", "HasFP8", "true",

+ "Enable FP8 instructions (FEAT_FP8)">;

// This flag is currently still labeled as Experimental, but when fully

// implemented this should tell the compiler to use the zeroing pseudos to

// benefit from the reverse instructions (e.g. SUB vs SUBR) if the inactive

@@ -148,6 +154,9 @@ def FeatureExperimentalZeroingPseudos

def FeatureUseScalarIncVL : SubtargetFeature<"use-scalar-inc-vl",

"UseScalarIncVL", "true", "Prefer inc/dec over add+cnt">;

+def FeatureBF16 : SubtargetFeature<"bf16", "HasBF16",

+ "true", "Enable BFloat16 Extension (FEAT_BF16)" >;

def FeatureNoSVEFPLD1R : SubtargetFeature<"no-sve-fp-ld1r",

"NoSVEFPLD1R", "true", "Avoid using LD1RX instructions for FP">;

@@ -172,7 +181,7 @@ def FeatureSVE2p1: SubtargetFeature<"sve2p1", "HasSVE2p1", "true",

"Enable Scalable Vector Extension 2.1 instructions", [FeatureSVE2]>;

def FeatureB16B16 : SubtargetFeature<"b16b16", "HasB16B16", "true",

- "Enable SVE2.1 or SME2.1 non-widening BFloat16 to BFloat16 instructions (FEAT_B16B16)", []>;

+ "Enable SVE2.1 or SME2.1 non-widening BFloat16 to BFloat16 instructions (FEAT_B16B16)", [FeatureBF16]>;

def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true",

"Has zero-cycle register moves">;

@@ -223,14 +232,9 @@ def FeatureEnableSelectOptimize : SubtargetFeature<

"enable-select-opt", "EnableSelectOptimize", "true",

"Enable the select optimize pass for select loop heuristics">;

-def FeatureCustomCheapAsMoveHandling : SubtargetFeature<"custom-cheap-as-move",

- "HasCustomCheapAsMoveHandling", "true",

- "Use custom handling of cheap instructions">;

def FeatureExynosCheapAsMoveHandling : SubtargetFeature<"exynos-cheap-as-move",

"HasExynosCheapAsMoveHandling", "true",

- "Use Exynos specific handling of cheap instructions",

- [FeatureCustomCheapAsMoveHandling]>;

+ "Use Exynos specific handling of cheap instructions">;

def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler",

"UsePostRAScheduler", "true", "Schedule again after register allocation">;

@@ -300,6 +304,10 @@ def FeatureDisableLatencySchedHeuristic : SubtargetFeature<

"disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true",

"Disable latency scheduling heuristic">;

+def FeatureStorePairSuppress : SubtargetFeature<

+ "store-pair-suppress", "EnableStorePairSuppress", "true",

+ "Enable Store Pair Suppression heuristics">;

def FeatureForce32BitJumpTables

: SubtargetFeature<"force-32bit-jump-tables", "Force32BitJumpTables", "true",

"Force jump table entries to be 32-bits wide except at MinSize">;

@@ -382,9 +390,13 @@ def FeatureNoNegativeImmediates : SubtargetFeature<"no-neg-immediates",

"equivalent when the immediate does "

"not fit in the encoding.">;

-def FeatureLSLFast : SubtargetFeature<

- "lsl-fast", "HasLSLFast", "true",

- "CPU has a fastpath logical shift of up to 3 places">;

+def FeatureAddrLSLFast : SubtargetFeature<

+ "addr-lsl-fast", "HasAddrLSLFast", "true",

+ "Address operands with logical shift of up to 3 places are cheap">;

+def FeatureALULSLFast : SubtargetFeature<

+ "alu-lsl-fast", "HasALULSLFast", "true",

+ "Add/Sub operations with lsl shift <= 4 are cheap">;

def FeatureAggressiveFMA :

SubtargetFeature<"aggressive-fma",

@@ -438,9 +450,6 @@ def FeatureTaggedGlobals : SubtargetFeature<"tagged-globals",

"true", "Use an instruction sequence for taking the address of a global "

"that allows a memory tag in the upper address bits">;

-def FeatureBF16 : SubtargetFeature<"bf16", "HasBF16",

- "true", "Enable BFloat16 Extension (FEAT_BF16)" >;

def FeatureMatMulInt8 : SubtargetFeature<"i8mm", "HasMatMulInt8",

"true", "Enable Matrix Multiply Int8 Extension (FEAT_I8MM)">;

@@ -499,12 +508,47 @@ def FeatureSMEI16I64 : SubtargetFeature<"sme-i16i64", "HasSMEI16I64", "true",

def FeatureSMEF16F16 : SubtargetFeature<"sme-f16f16", "HasSMEF16F16", "true",

"Enable SME2.1 non-widening Float16 instructions (FEAT_SME_F16F16)", []>;

+def FeatureSMEFA64 : SubtargetFeature<"sme-fa64", "HasSMEFA64", "true",

+ "Enable the full A64 instruction set in streaming SVE mode (FEAT_SME_FA64)", [FeatureSME, FeatureSVE2]>;

def FeatureSME2 : SubtargetFeature<"sme2", "HasSME2", "true",

"Enable Scalable Matrix Extension 2 (SME2) instructions", [FeatureSME]>;

def FeatureSME2p1 : SubtargetFeature<"sme2p1", "HasSME2p1", "true",

"Enable Scalable Matrix Extension 2.1 (FEAT_SME2p1) instructions", [FeatureSME2]>;

+def FeatureFAMINMAX: SubtargetFeature<"faminmax", "HasFAMINMAX", "true",

+ "Enable FAMIN and FAMAX instructions (FEAT_FAMINMAX)">;

+def FeatureFP8FMA : SubtargetFeature<"fp8fma", "HasFP8FMA", "true",

+ "Enable fp8 multiply-add instructions (FEAT_FP8FMA)">;

+def FeatureSSVE_FP8FMA : SubtargetFeature<"ssve-fp8fma", "HasSSVE_FP8FMA", "true",

+ "Enable SVE2 fp8 multiply-add instructions (FEAT_SSVE_FP8FMA)", [FeatureSME2]>;

+def FeatureFP8DOT2: SubtargetFeature<"fp8dot2", "HasFP8DOT2", "true",

+ "Enable fp8 2-way dot instructions (FEAT_FP8DOT2)">;

+def FeatureSSVE_FP8DOT2 : SubtargetFeature<"ssve-fp8dot2", "HasSSVE_FP8DOT2", "true",

+ "Enable SVE2 fp8 2-way dot product instructions (FEAT_SSVE_FP8DOT2)", [FeatureSME2]>;

+def FeatureFP8DOT4: SubtargetFeature<"fp8dot4", "HasFP8DOT4", "true",

+ "Enable fp8 4-way dot instructions (FEAT_FP8DOT4)">;

+def FeatureSSVE_FP8DOT4 : SubtargetFeature<"ssve-fp8dot4", "HasSSVE_FP8DOT4", "true",

+ "Enable SVE2 fp8 4-way dot product instructions (FEAT_SSVE_FP8DOT4)", [FeatureSME2]>;

+def FeatureLUT: SubtargetFeature<"lut", "HasLUT", "true",

+ "Enable Lookup Table instructions (FEAT_LUT)">;

+def FeatureSME_LUTv2 : SubtargetFeature<"sme-lutv2", "HasSME_LUTv2", "true",

+ "Enable Scalable Matrix Extension (SME) LUTv2 instructions (FEAT_SME_LUTv2)">;

+def FeatureSMEF8F16 : SubtargetFeature<"sme-f8f16", "HasSMEF8F16", "true",

+ "Enable Scalable Matrix Extension (SME) F8F16 instructions(FEAT_SME_F8F16)", [FeatureSME2, FeatureFP8]>;

+def FeatureSMEF8F32 : SubtargetFeature<"sme-f8f32", "HasSMEF8F32", "true",

+ "Enable Scalable Matrix Extension (SME) F8F32 instructions (FEAT_SME_F8F32)", [FeatureSME2, FeatureFP8]>;

def FeatureAppleA7SysReg : SubtargetFeature<"apple-a7-sysreg", "HasAppleA7SysReg", "true",

"Apple A7 (the CPU formerly known as Cyclone)">;

@@ -566,6 +610,18 @@ def FeatureD128 : SubtargetFeature<"d128", "HasD128",

"and Instructions (FEAT_D128, FEAT_LVA3, FEAT_SYSREG128, FEAT_SYSINSTR128)",

[FeatureLSE128]>;

+def FeatureDisableLdp : SubtargetFeature<"disable-ldp", "HasDisableLdp",

+ "true", "Do not emit ldp">;

+def FeatureDisableStp : SubtargetFeature<"disable-stp", "HasDisableStp",

+ "true", "Do not emit stp">;

+def FeatureLdpAlignedOnly : SubtargetFeature<"ldp-aligned-only", "HasLdpAlignedOnly",

+ "true", "In order to emit ldp, first check if the load will be aligned to 2 * element_size">;

+def FeatureStpAlignedOnly : SubtargetFeature<"stp-aligned-only", "HasStpAlignedOnly",

+ "true", "In order to emit stp, first check if the store will be aligned to 2 * element_size">;

//===----------------------------------------------------------------------===//

// Architectures.

@@ -634,6 +690,10 @@ def HasV9_4aOps : SubtargetFeature<

"v9.4a", "HasV9_4aOps", "true", "Support ARM v9.4a instructions",

[HasV8_9aOps, HasV9_3aOps]>;

+def HasV9_5aOps : SubtargetFeature<

+ "v9.5a", "HasV9_5aOps", "true", "Support ARM v9.5a instructions",

+ [HasV9_4aOps]>;

def HasV8_0rOps : SubtargetFeature<

"v8r", "HasV8_0rOps", "true", "Support ARM v8r instructions",

[//v8.1

@@ -723,8 +783,8 @@ let F = [HasSVE2p1, HasSVE2p1_or_HasSME2, HasSVE2p1_or_HasSME2p1] in

def SVE2p1Unsupported : AArch64Unsupported;

def SVE2Unsupported : AArch64Unsupported {

- let F = !listconcat([HasSVE2, HasSVE2orSME,

- HasSVE2AES, HasSVE2SHA3, HasSVE2SM4, HasSVE2BitPerm],

+ let F = !listconcat([HasSVE2, HasSVE2orSME, HasSSVE_FP8FMA, HasSMEF8F16,

+ HasSMEF8F32, HasSVE2AES, HasSVE2SHA3, HasSVE2SM4, HasSVE2BitPerm],

SVE2p1Unsupported.F);

}

@@ -737,12 +797,13 @@ let F = [HasSME2p1, HasSVE2p1_or_HasSME2p1] in

def SME2p1Unsupported : AArch64Unsupported;

def SME2Unsupported : AArch64Unsupported {

- let F = !listconcat([HasSME2, HasSVE2p1_or_HasSME2],

+ let F = !listconcat([HasSME2, HasSVE2p1_or_HasSME2, HasSSVE_FP8FMA,

+ HasSMEF8F16, HasSMEF8F32],

SME2p1Unsupported.F);

}

def SMEUnsupported : AArch64Unsupported {

- let F = !listconcat([HasSME, HasSMEI16I64, HasSMEF16F16, HasSMEF64F64],

+ let F = !listconcat([HasSME, HasSMEI16I64, HasSMEF16F16, HasSMEF64F64, HasSMEFA64],

SME2Unsupported.F);

}

@@ -778,7 +839,6 @@ def TuneA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53",

FeatureFuseAES,

FeatureFuseAdrpAdd,

FeatureBalanceFPOps,

- FeatureCustomCheapAsMoveHandling,

FeaturePostRAScheduler]>;

def TuneA55 : SubtargetFeature<"a55", "ARMProcFamily", "CortexA55",

@@ -795,11 +855,16 @@ def TuneA510 : SubtargetFeature<"a510", "ARMProcFamily", "CortexA510",

FeaturePostRAScheduler

]>;

+def TuneA520 : SubtargetFeature<"a520", "ARMProcFamily", "CortexA520",

+ "Cortex-A520 ARM processors", [

+ FeatureFuseAES,

+ FeatureFuseAdrpAdd,

+ FeaturePostRAScheduler]>;

def TuneA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",

"Cortex-A57 ARM processors", [

FeatureFuseAES,

FeatureBalanceFPOps,

- FeatureCustomCheapAsMoveHandling,

FeatureFuseAdrpAdd,

FeatureFuseLiterals,

FeaturePostRAScheduler,

@@ -841,7 +906,8 @@ def TuneA76 : SubtargetFeature<"a76", "ARMProcFamily", "CortexA76",

"Cortex-A76 ARM processors", [

FeatureFuseAES,

FeatureFuseAdrpAdd,

- FeatureLSLFast,

+ FeatureAddrLSLFast,

+ FeatureALULSLFast,

FeatureEnableSelectOptimize,

FeaturePredictableSelectIsExpensive]>;

@@ -850,7 +916,8 @@ def TuneA77 : SubtargetFeature<"a77", "ARMProcFamily", "CortexA77",

FeatureCmpBccFusion,

FeatureFuseAES,

FeatureFuseAdrpAdd,

- FeatureLSLFast,

+ FeatureAddrLSLFast,

+ FeatureALULSLFast,

FeatureEnableSelectOptimize,

FeaturePredictableSelectIsExpensive]>;

@@ -859,7 +926,8 @@ def TuneA78 : SubtargetFeature<"a78", "ARMProcFamily", "CortexA78",

FeatureCmpBccFusion,

FeatureFuseAES,

FeatureFuseAdrpAdd,

- FeatureLSLFast,

+ FeatureAddrLSLFast,

+ FeatureALULSLFast,

FeaturePostRAScheduler,

FeatureEnableSelectOptimize,

FeaturePredictableSelectIsExpensive]>;

@@ -870,7 +938,8 @@ def TuneA78C : SubtargetFeature<"a78c", "ARMProcFamily",

FeatureCmpBccFusion,

FeatureFuseAES,

FeatureFuseAdrpAdd,

- FeatureLSLFast,

+ FeatureAddrLSLFast,

+ FeatureALULSLFast,

FeaturePostRAScheduler,

FeatureEnableSelectOptimize,

FeaturePredictableSelectIsExpensive]>;

@@ -880,7 +949,8 @@ def TuneA710 : SubtargetFeature<"a710", "ARMProcFamily", "CortexA710",

FeatureCmpBccFusion,

FeatureFuseAES,

FeatureFuseAdrpAdd,

- FeatureLSLFast,

+ FeatureAddrLSLFast,

+ FeatureALULSLFast,

FeaturePostRAScheduler,

FeatureEnableSelectOptimize,

FeaturePredictableSelectIsExpensive]>;

@@ -890,7 +960,19 @@ def TuneA715 : SubtargetFeature<"a715", "ARMProcFamily", "CortexA715",

FeatureFuseAES,

FeaturePostRAScheduler,

FeatureCmpBccFusion,

- FeatureLSLFast,

+ FeatureAddrLSLFast,

+ FeatureALULSLFast,

+ FeatureFuseAdrpAdd,

+ FeatureEnableSelectOptimize,

+ FeaturePredictableSelectIsExpensive]>;

+def TuneA720 : SubtargetFeature<"a720", "ARMProcFamily", "CortexA720",

+ "Cortex-A720 ARM processors", [

+ FeatureFuseAES,

+ FeaturePostRAScheduler,

+ FeatureCmpBccFusion,

+ FeatureAddrLSLFast,

+ FeatureALULSLFast,

FeatureFuseAdrpAdd,

FeatureEnableSelectOptimize,

FeaturePredictableSelectIsExpensive]>;

@@ -905,7 +987,8 @@ def TuneX1 : SubtargetFeature<"cortex-x1", "ARMProcFamily", "CortexX1",

FeatureCmpBccFusion,

FeatureFuseAES,

FeatureFuseAdrpAdd,

- FeatureLSLFast,

+ FeatureAddrLSLFast,

+ FeatureALULSLFast,

FeaturePostRAScheduler,

FeatureEnableSelectOptimize,

FeaturePredictableSelectIsExpensive]>;

@@ -915,14 +998,26 @@ def TuneX2 : SubtargetFeature<"cortex-x2", "ARMProcFamily", "CortexX2",

FeatureCmpBccFusion,

FeatureFuseAES,

FeatureFuseAdrpAdd,

- FeatureLSLFast,

+ FeatureAddrLSLFast,

+ FeatureALULSLFast,

FeaturePostRAScheduler,

FeatureEnableSelectOptimize,

FeaturePredictableSelectIsExpensive]>;

def TuneX3 : SubtargetFeature<"cortex-x3", "ARMProcFamily", "CortexX3",

"Cortex-X3 ARM processors", [

- FeatureLSLFast,

+ FeatureAddrLSLFast,

+ FeatureALULSLFast,

+ FeatureFuseAdrpAdd,

+ FeatureFuseAES,

+ FeaturePostRAScheduler,

+ FeatureEnableSelectOptimize,

+ FeaturePredictableSelectIsExpensive]>;

+def TuneX4 : SubtargetFeature<"cortex-x4", "ARMProcFamily", "CortexX4",

+ "Cortex-X4 ARM processors", [

+ FeatureAddrLSLFast,

+ FeatureALULSLFast,

FeatureFuseAdrpAdd,

FeatureFuseAES,

FeaturePostRAScheduler,

@@ -934,8 +1029,8 @@ def TuneA64FX : SubtargetFeature<"a64fx", "ARMProcFamily", "A64FX",

FeaturePostRAScheduler,

FeatureAggressiveFMA,

FeatureArithmeticBccFusion,

- FeaturePredictableSelectIsExpensive

- ]>;

+ FeatureStorePairSuppress,

+ FeaturePredictableSelectIsExpensive]>;

def TuneCarmel : SubtargetFeature<"carmel", "ARMProcFamily", "Carmel",

"Nvidia Carmel processors">;

@@ -949,10 +1044,10 @@ def TuneAppleA7 : SubtargetFeature<"apple-a7", "ARMProcFamily", "AppleA7",

FeatureArithmeticCbzFusion,

FeatureDisableLatencySchedHeuristic,

FeatureFuseAES, FeatureFuseCryptoEOR,

+ FeatureStorePairSuppress,

FeatureZCRegMove,

FeatureZCZeroing,

- FeatureZCZeroingFPWorkaround]

- >;

+ FeatureZCZeroingFPWorkaround]>;

def TuneAppleA10 : SubtargetFeature<"apple-a10", "ARMProcFamily", "AppleA10",

"Apple A10", [

@@ -962,9 +1057,9 @@ def TuneAppleA10 : SubtargetFeature<"apple-a10", "ARMProcFamily", "AppleA10",

FeatureDisableLatencySchedHeuristic,

FeatureFuseAES,

FeatureFuseCryptoEOR,

+ FeatureStorePairSuppress,

FeatureZCRegMove,

- FeatureZCZeroing]

- >;

+ FeatureZCZeroing]>;

def TuneAppleA11 : SubtargetFeature<"apple-a11", "ARMProcFamily", "AppleA11",

"Apple A11", [

@@ -974,9 +1069,9 @@ def TuneAppleA11 : SubtargetFeature<"apple-a11", "ARMProcFamily", "AppleA11",

FeatureDisableLatencySchedHeuristic,

FeatureFuseAES,

FeatureFuseCryptoEOR,

+ FeatureStorePairSuppress,

FeatureZCRegMove,

- FeatureZCZeroing]

- >;

+ FeatureZCZeroing]>;

def TuneAppleA12 : SubtargetFeature<"apple-a12", "ARMProcFamily", "AppleA12",

"Apple A12", [

@@ -986,9 +1081,9 @@ def TuneAppleA12 : SubtargetFeature<"apple-a12", "ARMProcFamily", "AppleA12",

FeatureDisableLatencySchedHeuristic,

FeatureFuseAES,

FeatureFuseCryptoEOR,

+ FeatureStorePairSuppress,

FeatureZCRegMove,

- FeatureZCZeroing]

- >;

+ FeatureZCZeroing]>;

def TuneAppleA13 : SubtargetFeature<"apple-a13", "ARMProcFamily", "AppleA13",

"Apple A13", [

@@ -998,9 +1093,9 @@ def TuneAppleA13 : SubtargetFeature<"apple-a13", "ARMProcFamily", "AppleA13",

FeatureDisableLatencySchedHeuristic,

FeatureFuseAES,

FeatureFuseCryptoEOR,

+ FeatureStorePairSuppress,

FeatureZCRegMove,

- FeatureZCZeroing]

- >;

+ FeatureZCZeroing]>;

def TuneAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14",

"Apple A14", [

@@ -1016,6 +1111,7 @@ def TuneAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14",

FeatureFuseCryptoEOR,

FeatureFuseAdrpAdd,

FeatureFuseLiterals,

+ FeatureStorePairSuppress,

FeatureZCRegMove,

FeatureZCZeroing]>;

@@ -1031,9 +1127,9 @@ def TuneAppleA15 : SubtargetFeature<"apple-a15", "ARMProcFamily", "AppleA15",

FeatureFuseCCSelect,

FeatureFuseCryptoEOR,

FeatureFuseLiterals,

+ FeatureStorePairSuppress,

FeatureZCRegMove,

- FeatureZCZeroing

- ]>;

+ FeatureZCZeroing]>;

def TuneAppleA16 : SubtargetFeature<"apple-a16", "ARMProcFamily", "AppleA16",

"Apple A16", [

@@ -1047,9 +1143,25 @@ def TuneAppleA16 : SubtargetFeature<"apple-a16", "ARMProcFamily", "AppleA16",

FeatureFuseCCSelect,

FeatureFuseCryptoEOR,

FeatureFuseLiterals,

+ FeatureStorePairSuppress,

+ FeatureZCRegMove,

+ FeatureZCZeroing]>;

+def TuneAppleA17 : SubtargetFeature<"apple-a17", "ARMProcFamily", "AppleA17",

+ "Apple A17", [

+ FeatureAlternateSExtLoadCVTF32Pattern,

+ FeatureArithmeticBccFusion,

+ FeatureArithmeticCbzFusion,

+ FeatureDisableLatencySchedHeuristic,

+ FeatureFuseAddress,

+ FeatureFuseAES,

+ FeatureFuseArithmeticLogic,

+ FeatureFuseCCSelect,

+ FeatureFuseCryptoEOR,

+ FeatureFuseLiterals,

+ FeatureStorePairSuppress,

FeatureZCRegMove,

- FeatureZCZeroing

- ]>;

+ FeatureZCZeroing]>;

def TuneExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3",

"Samsung Exynos-M3 processors",

@@ -1060,7 +1172,9 @@ def TuneExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3",

FeatureFuseCCSelect,

FeatureFuseAdrpAdd,

FeatureFuseLiterals,

- FeatureLSLFast,

+ FeatureStorePairSuppress,

+ FeatureAddrLSLFast,

+ FeatureALULSLFast,

FeaturePostRAScheduler,

FeaturePredictableSelectIsExpensive]>;

@@ -1077,28 +1191,30 @@ def TuneExynosM4 : SubtargetFeature<"exynosm4", "ARMProcFamily", "ExynosM3",

FeatureFuseCCSelect,

FeatureFuseAdrpAdd,

FeatureFuseLiterals,

- FeatureLSLFast,

+ FeatureStorePairSuppress,

+ FeatureAddrLSLFast,

+ FeatureALULSLFast,

FeaturePostRAScheduler,

FeatureZCZeroing]>;

def TuneKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo",

"Qualcomm Kryo processors", [

- FeatureCustomCheapAsMoveHandling,

FeaturePostRAScheduler,

FeaturePredictableSelectIsExpensive,

FeatureZCZeroing,

- FeatureLSLFast]

- >;

+ FeatureAddrLSLFast,

+ FeatureALULSLFast,

+ FeatureStorePairSuppress]>;

def TuneFalkor : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor",

"Qualcomm Falkor processors", [

- FeatureCustomCheapAsMoveHandling,

FeaturePostRAScheduler,

FeaturePredictableSelectIsExpensive,

FeatureZCZeroing,

- FeatureLSLFast,

- FeatureSlowSTRQro

- ]>;

+ FeatureStorePairSuppress,

+ FeatureAddrLSLFast,

+ FeatureALULSLFast,

+ FeatureSlowSTRQro]>;

def TuneNeoverseE1 : SubtargetFeature<"neoversee1", "ARMProcFamily", "NeoverseE1",

"Neoverse E1 ARM processors", [

@@ -1110,7 +1226,8 @@ def TuneNeoverseN1 : SubtargetFeature<"neoversen1", "ARMProcFamily", "NeoverseN1

"Neoverse N1 ARM processors", [

FeatureFuseAES,

FeatureFuseAdrpAdd,

- FeatureLSLFast,

+ FeatureAddrLSLFast,

+ FeatureALULSLFast,

FeaturePostRAScheduler,

FeatureEnableSelectOptimize,

FeaturePredictableSelectIsExpensive]>;

@@ -1119,7 +1236,8 @@ def TuneNeoverseN2 : SubtargetFeature<"neoversen2", "ARMProcFamily", "NeoverseN2

"Neoverse N2 ARM processors", [

FeatureFuseAES,

FeatureFuseAdrpAdd,

- FeatureLSLFast,

+ FeatureAddrLSLFast,

+ FeatureALULSLFast,

FeaturePostRAScheduler,

FeatureEnableSelectOptimize,

FeaturePredictableSelectIsExpensive]>;

@@ -1128,7 +1246,8 @@ def TuneNeoverse512TVB : SubtargetFeature<"neoverse512tvb", "ARMProcFamily", "Ne

"Neoverse 512-TVB ARM processors", [

FeatureFuseAES,

FeatureFuseAdrpAdd,

- FeatureLSLFast,

+ FeatureAddrLSLFast,

+ FeatureALULSLFast,

FeaturePostRAScheduler,

FeatureEnableSelectOptimize,

FeaturePredictableSelectIsExpensive]>;

@@ -1137,7 +1256,8 @@ def TuneNeoverseV1 : SubtargetFeature<"neoversev1", "ARMProcFamily", "NeoverseV1

"Neoverse V1 ARM processors", [

FeatureFuseAES,

FeatureFuseAdrpAdd,

- FeatureLSLFast,

+ FeatureAddrLSLFast,

+ FeatureALULSLFast,

FeaturePostRAScheduler,

FeatureEnableSelectOptimize,

FeaturePredictableSelectIsExpensive,

@@ -1146,24 +1266,28 @@ def TuneNeoverseV1 : SubtargetFeature<"neoversev1", "ARMProcFamily", "NeoverseV1

def TuneNeoverseV2 : SubtargetFeature<"neoversev2", "ARMProcFamily", "NeoverseV2",

"Neoverse V2 ARM processors", [

FeatureFuseAES,

- FeatureLSLFast,

+ FeatureFuseAdrpAdd,

+ FeatureAddrLSLFast,

+ FeatureALULSLFast,

FeaturePostRAScheduler,

FeatureEnableSelectOptimize,

FeaturePredictableSelectIsExpensive]>;

def TuneSaphira : SubtargetFeature<"saphira", "ARMProcFamily", "Saphira",

"Qualcomm Saphira processors", [

- FeatureCustomCheapAsMoveHandling,

FeaturePostRAScheduler,

FeaturePredictableSelectIsExpensive,

FeatureZCZeroing,

- FeatureLSLFast]>;

+ FeatureStorePairSuppress,

+ FeatureAddrLSLFast,

+ FeatureALULSLFast]>;

def TuneThunderX2T99 : SubtargetFeature<"thunderx2t99", "ARMProcFamily", "ThunderX2T99",

"Cavium ThunderX2 processors", [

FeatureAggressiveFMA,

FeatureArithmeticBccFusion,

FeaturePostRAScheduler,

+ FeatureStorePairSuppress,

FeaturePredictableSelectIsExpensive]>;

def TuneThunderX3T110 : SubtargetFeature<"thunderx3t110", "ARMProcFamily",

@@ -1174,59 +1298,72 @@ def TuneThunderX3T110 : SubtargetFeature<"thunderx3t110", "ARMProcFamily",

FeaturePostRAScheduler,

FeaturePredictableSelectIsExpensive,

FeatureBalanceFPOps,

+ FeatureStorePairSuppress,

FeatureStrictAlign]>;

def TuneThunderX : SubtargetFeature<"thunderx", "ARMProcFamily", "ThunderX",

"Cavium ThunderX processors", [

FeaturePostRAScheduler,

+ FeatureStorePairSuppress,

FeaturePredictableSelectIsExpensive]>;

def TuneThunderXT88 : SubtargetFeature<"thunderxt88", "ARMProcFamily",

"ThunderXT88",

"Cavium ThunderX processors", [

FeaturePostRAScheduler,

+ FeatureStorePairSuppress,

FeaturePredictableSelectIsExpensive]>;

def TuneThunderXT81 : SubtargetFeature<"thunderxt81", "ARMProcFamily",

"ThunderXT81",

"Cavium ThunderX processors", [

FeaturePostRAScheduler,

+ FeatureStorePairSuppress,

FeaturePredictableSelectIsExpensive]>;

def TuneThunderXT83 : SubtargetFeature<"thunderxt83", "ARMProcFamily",

"ThunderXT83",

"Cavium ThunderX processors", [

FeaturePostRAScheduler,

+ FeatureStorePairSuppress,

FeaturePredictableSelectIsExpensive]>;

def TuneTSV110 : SubtargetFeature<"tsv110", "ARMProcFamily", "TSV110",

"HiSilicon TS-V110 processors", [

- FeatureCustomCheapAsMoveHandling,

FeatureFuseAES,

+ FeatureStorePairSuppress,

FeaturePostRAScheduler]>;

def TuneAmpere1 : SubtargetFeature<"ampere1", "ARMProcFamily", "Ampere1",

"Ampere Computing Ampere-1 processors", [

FeaturePostRAScheduler,

FeatureFuseAES,

- FeatureLSLFast,

+ FeatureAddrLSLFast,

+ FeatureALULSLFast,

FeatureAggressiveFMA,

FeatureArithmeticBccFusion,

FeatureCmpBccFusion,

FeatureFuseAddress,

- FeatureFuseLiterals]>;

+ FeatureFuseLiterals,

+ FeatureStorePairSuppress,

+ FeatureLdpAlignedOnly,

+ FeatureStpAlignedOnly]>;

def TuneAmpere1A : SubtargetFeature<"ampere1a", "ARMProcFamily", "Ampere1A",

"Ampere Computing Ampere-1A processors", [

FeaturePostRAScheduler,

FeatureFuseAES,

- FeatureLSLFast,

+ FeatureAddrLSLFast,

+ FeatureALULSLFast,

FeatureAggressiveFMA,

FeatureArithmeticBccFusion,

FeatureCmpBccFusion,

FeatureFuseAddress,

FeatureFuseLiterals,

- FeatureFuseLiterals]>;

+ FeatureFuseLiterals,

+ FeatureStorePairSuppress,

+ FeatureLdpAlignedOnly,

+ FeatureStpAlignedOnly]>;

def ProcessorFeatures {

list<SubtargetFeature> A53 = [HasV8_0aOps, FeatureCRC, FeatureCrypto,

@@ -1238,6 +1375,9 @@ def ProcessorFeatures {

FeatureMatMulInt8, FeatureBF16, FeatureAM,

FeatureMTE, FeatureETE, FeatureSVE2BitPerm,

FeatureFP16FML];

+ list<SubtargetFeature> A520 = [HasV9_2aOps, FeaturePerfMon, FeatureAM,

+ FeatureMTE, FeatureETE, FeatureSVE2BitPerm,

+ FeatureFP16FML];

list<SubtargetFeature> A65 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,

FeatureNEON, FeatureFullFP16, FeatureDotProd,

FeatureRCPC, FeatureSSBS, FeatureRAS,

@@ -1264,6 +1404,9 @@ def ProcessorFeatures {

FeatureFP16FML, FeatureSVE, FeatureTRBE,

FeatureSVE2BitPerm, FeatureBF16, FeatureETE,

FeaturePerfMon, FeatureMatMulInt8, FeatureSPE];

+ list<SubtargetFeature> A720 = [HasV9_2aOps, FeatureMTE, FeatureFP16FML,

+ FeatureTRBE, FeatureSVE2BitPerm, FeatureETE,

+ FeaturePerfMon, FeatureSPE, FeatureSPE_EEF];

list<SubtargetFeature> R82 = [HasV8_0rOps, FeaturePerfMon, FeatureFullFP16,

FeatureFP16FML, FeatureSSBS, FeaturePredRes,

FeatureSB];

@@ -1285,6 +1428,10 @@ def ProcessorFeatures {

FeatureSPE, FeatureBF16, FeatureMatMulInt8,

FeatureMTE, FeatureSVE2BitPerm, FeatureFullFP16,

FeatureFP16FML];

+ list<SubtargetFeature> X4 = [HasV9_2aOps,

+ FeaturePerfMon, FeatureETE, FeatureTRBE,

+ FeatureSPE, FeatureMTE, FeatureSVE2BitPerm,

+ FeatureFP16FML, FeatureSPE_EEF];

list<SubtargetFeature> A64FX = [HasV8_2aOps, FeatureFPARMv8, FeatureNEON,

FeatureSHA2, FeaturePerfMon, FeatureFullFP16,

FeatureSVE, FeatureComplxNum];

@@ -1315,6 +1462,10 @@ def ProcessorFeatures {

FeatureNEON, FeaturePerfMon, FeatureSHA3,

FeatureFullFP16, FeatureFP16FML,

FeatureHCX];

+ list<SubtargetFeature> AppleA17 = [HasV8_6aOps, FeatureCrypto, FeatureFPARMv8,

+ FeatureNEON, FeaturePerfMon, FeatureSHA3,

+ FeatureFullFP16, FeatureFP16FML,

+ FeatureHCX];

list<SubtargetFeature> ExynosM3 = [HasV8_0aOps, FeatureCRC, FeatureCrypto,

FeaturePerfMon];

list<SubtargetFeature> ExynosM4 = [HasV8_2aOps, FeatureCrypto, FeatureDotProd,

@@ -1329,9 +1480,9 @@ def ProcessorFeatures {

FeatureFPARMv8, FeatureFullFP16, FeatureNEON,

FeatureRCPC, FeatureSPE, FeatureSSBS,

FeaturePerfMon];

- list<SubtargetFeature> NeoverseN2 = [HasV8_5aOps, FeatureBF16, FeatureETE,

+ list<SubtargetFeature> NeoverseN2 = [HasV9_0aOps, FeatureBF16, FeatureETE,

FeatureMatMulInt8, FeatureMTE, FeatureSVE2,

- FeatureSVE2BitPerm, FeatureTRBE, FeatureCrypto,

+ FeatureSVE2BitPerm, FeatureTRBE,

FeaturePerfMon];

list<SubtargetFeature> Neoverse512TVB = [HasV8_4aOps, FeatureBF16, FeatureCacheDeepPersist,

FeatureCrypto, FeatureFPARMv8, FeatureFP16FML,

@@ -1376,7 +1527,7 @@ def ProcessorFeatures {

// FeatureFuseAdrpAdd is enabled under Generic to allow linker merging

// optimizations.

-def : ProcessorModel<"generic", CortexA55Model, ProcessorFeatures.Generic,

+def : ProcessorModel<"generic", CortexA510Model, ProcessorFeatures.Generic,

[FeatureFuseAES, FeatureFuseAdrpAdd, FeaturePostRAScheduler,

FeatureEnableSelectOptimize]>;

def : ProcessorModel<"cortex-a35", CortexA53Model, ProcessorFeatures.A53,

@@ -1389,6 +1540,8 @@ def : ProcessorModel<"cortex-a55", CortexA55Model, ProcessorFeatures.A55,

[TuneA55]>;

def : ProcessorModel<"cortex-a510", CortexA510Model, ProcessorFeatures.A510,

[TuneA510]>;

+def : ProcessorModel<"cortex-a520", CortexA510Model, ProcessorFeatures.A520,

+ [TuneA520]>;

def : ProcessorModel<"cortex-a57", CortexA57Model, ProcessorFeatures.A53,

[TuneA57]>;

def : ProcessorModel<"cortex-a65", CortexA53Model, ProcessorFeatures.A65,

@@ -1415,6 +1568,8 @@ def : ProcessorModel<"cortex-a710", NeoverseN2Model, ProcessorFeatures.A710,

[TuneA710]>;

def : ProcessorModel<"cortex-a715", NeoverseN2Model, ProcessorFeatures.A715,

[TuneA715]>;

+def : ProcessorModel<"cortex-a720", NeoverseN2Model, ProcessorFeatures.A720,

+ [TuneA720]>;

def : ProcessorModel<"cortex-r82", CortexA55Model, ProcessorFeatures.R82,

[TuneR82]>;

def : ProcessorModel<"cortex-x1", CortexA57Model, ProcessorFeatures.X1,

@@ -1425,6 +1580,8 @@ def : ProcessorModel<"cortex-x2", NeoverseN2Model, ProcessorFeatures.X2,

[TuneX2]>;

def : ProcessorModel<"cortex-x3", NeoverseN2Model, ProcessorFeatures.X3,

[TuneX3]>;

+def : ProcessorModel<"cortex-x4", NeoverseN2Model, ProcessorFeatures.X4,

+ [TuneX4]>;

def : ProcessorModel<"neoverse-e1", CortexA53Model,

ProcessorFeatures.NeoverseE1, [TuneNeoverseE1]>;

def : ProcessorModel<"neoverse-n1", NeoverseN1Model,

@@ -1492,12 +1649,15 @@ def : ProcessorModel<"apple-a15", CycloneModel, ProcessorFeatures.AppleA15,

[TuneAppleA15]>;

def : ProcessorModel<"apple-a16", CycloneModel, ProcessorFeatures.AppleA16,

[TuneAppleA16]>;

+def : ProcessorModel<"apple-a17", CycloneModel, ProcessorFeatures.AppleA17,

+ [TuneAppleA17]>;

// Mac CPUs

def : ProcessorModel<"apple-m1", CycloneModel, ProcessorFeatures.AppleA14,

[TuneAppleA14]>;

def : ProcessorModel<"apple-m2", CycloneModel, ProcessorFeatures.AppleA15,

[TuneAppleA15]>;

+def : ProcessorModel<"apple-m3", CycloneModel, ProcessorFeatures.AppleA16,

+ [TuneAppleA16]>;

// watch CPUs.

def : ProcessorModel<"apple-s4", CycloneModel, ProcessorFeatures.AppleA12,