diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2023-12-18 20:30:12 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2024-04-06 20:11:55 +0000 |
commit | 5f757f3ff9144b609b3c433dfd370cc6bdc191ad (patch) | |
tree | 1b4e980b866cd26a00af34c0a653eb640bd09caf /contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td | |
parent | 3e1c8a35f741a5d114d0ba670b15191355711fe9 (diff) | |
parent | 312c0ed19cc5276a17bacf2120097bec4515b0f1 (diff) | |
download | src-5f757f3ff9144b609b3c433dfd370cc6bdc191ad.tar.gz src-5f757f3ff9144b609b3c433dfd370cc6bdc191ad.zip |
Merge llvm-project main llvmorg-18-init-15088-gd14ee76181fb
This updates llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and
openmp to llvm-project main llvmorg-18-init-15088-gd14ee76181fb.
PR: 276104
MFC after: 1 month
Diffstat (limited to 'contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td')
-rw-r--r-- | contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td | 298 |
1 files changed, 229 insertions, 69 deletions
diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td index 8f50af4b71fd..c600bcaab2b3 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td @@ -127,6 +127,12 @@ def FeatureCCPP : SubtargetFeature<"ccpp", "HasCCPP", def FeatureSVE : SubtargetFeature<"sve", "HasSVE", "true", "Enable Scalable Vector Extension (SVE) instructions (FEAT_SVE)", [FeatureFullFP16]>; +def FeatureFPMR : SubtargetFeature<"fpmr", "HasFPMR", "true", + "Enable FPMR Register (FEAT_FPMR)">; + +def FeatureFP8 : SubtargetFeature<"fp8", "HasFP8", "true", + "Enable FP8 instructions (FEAT_FP8)">; + // This flag is currently still labeled as Experimental, but when fully // implemented this should tell the compiler to use the zeroing pseudos to // benefit from the reverse instructions (e.g. SUB vs SUBR) if the inactive @@ -148,6 +154,9 @@ def FeatureExperimentalZeroingPseudos def FeatureUseScalarIncVL : SubtargetFeature<"use-scalar-inc-vl", "UseScalarIncVL", "true", "Prefer inc/dec over add+cnt">; +def FeatureBF16 : SubtargetFeature<"bf16", "HasBF16", + "true", "Enable BFloat16 Extension (FEAT_BF16)" >; + def FeatureNoSVEFPLD1R : SubtargetFeature<"no-sve-fp-ld1r", "NoSVEFPLD1R", "true", "Avoid using LD1RX instructions for FP">; @@ -172,7 +181,7 @@ def FeatureSVE2p1: SubtargetFeature<"sve2p1", "HasSVE2p1", "true", "Enable Scalable Vector Extension 2.1 instructions", [FeatureSVE2]>; def FeatureB16B16 : SubtargetFeature<"b16b16", "HasB16B16", "true", - "Enable SVE2.1 or SME2.1 non-widening BFloat16 to BFloat16 instructions (FEAT_B16B16)", []>; + "Enable SVE2.1 or SME2.1 non-widening BFloat16 to BFloat16 instructions (FEAT_B16B16)", [FeatureBF16]>; def FeatureZCRegMove : SubtargetFeature<"zcm", "HasZeroCycleRegMove", "true", "Has zero-cycle register moves">; @@ -223,14 +232,9 @@ def FeatureEnableSelectOptimize : SubtargetFeature< "enable-select-opt", "EnableSelectOptimize", "true", "Enable the select optimize pass for select loop heuristics">; -def FeatureCustomCheapAsMoveHandling : SubtargetFeature<"custom-cheap-as-move", - "HasCustomCheapAsMoveHandling", "true", - "Use custom handling of cheap instructions">; - def FeatureExynosCheapAsMoveHandling : SubtargetFeature<"exynos-cheap-as-move", "HasExynosCheapAsMoveHandling", "true", - "Use Exynos specific handling of cheap instructions", - [FeatureCustomCheapAsMoveHandling]>; + "Use Exynos specific handling of cheap instructions">; def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler", "UsePostRAScheduler", "true", "Schedule again after register allocation">; @@ -300,6 +304,10 @@ def FeatureDisableLatencySchedHeuristic : SubtargetFeature< "disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true", "Disable latency scheduling heuristic">; +def FeatureStorePairSuppress : SubtargetFeature< + "store-pair-suppress", "EnableStorePairSuppress", "true", + "Enable Store Pair Suppression heuristics">; + def FeatureForce32BitJumpTables : SubtargetFeature<"force-32bit-jump-tables", "Force32BitJumpTables", "true", "Force jump table entries to be 32-bits wide except at MinSize">; @@ -382,9 +390,13 @@ def FeatureNoNegativeImmediates : SubtargetFeature<"no-neg-immediates", "equivalent when the immediate does " "not fit in the encoding.">; -def FeatureLSLFast : SubtargetFeature< - "lsl-fast", "HasLSLFast", "true", - "CPU has a fastpath logical shift of up to 3 places">; +def FeatureAddrLSLFast : SubtargetFeature< + "addr-lsl-fast", "HasAddrLSLFast", "true", + "Address operands with logical shift of up to 3 places are cheap">; + +def FeatureALULSLFast : SubtargetFeature< + "alu-lsl-fast", "HasALULSLFast", "true", + "Add/Sub operations with lsl shift <= 4 are cheap">; def FeatureAggressiveFMA : SubtargetFeature<"aggressive-fma", @@ -438,9 +450,6 @@ def FeatureTaggedGlobals : SubtargetFeature<"tagged-globals", "true", "Use an instruction sequence for taking the address of a global " "that allows a memory tag in the upper address bits">; -def FeatureBF16 : SubtargetFeature<"bf16", "HasBF16", - "true", "Enable BFloat16 Extension (FEAT_BF16)" >; - def FeatureMatMulInt8 : SubtargetFeature<"i8mm", "HasMatMulInt8", "true", "Enable Matrix Multiply Int8 Extension (FEAT_I8MM)">; @@ -499,12 +508,47 @@ def FeatureSMEI16I64 : SubtargetFeature<"sme-i16i64", "HasSMEI16I64", "true", def FeatureSMEF16F16 : SubtargetFeature<"sme-f16f16", "HasSMEF16F16", "true", "Enable SME2.1 non-widening Float16 instructions (FEAT_SME_F16F16)", []>; +def FeatureSMEFA64 : SubtargetFeature<"sme-fa64", "HasSMEFA64", "true", + "Enable the full A64 instruction set in streaming SVE mode (FEAT_SME_FA64)", [FeatureSME, FeatureSVE2]>; + def FeatureSME2 : SubtargetFeature<"sme2", "HasSME2", "true", "Enable Scalable Matrix Extension 2 (SME2) instructions", [FeatureSME]>; def FeatureSME2p1 : SubtargetFeature<"sme2p1", "HasSME2p1", "true", "Enable Scalable Matrix Extension 2.1 (FEAT_SME2p1) instructions", [FeatureSME2]>; +def FeatureFAMINMAX: SubtargetFeature<"faminmax", "HasFAMINMAX", "true", + "Enable FAMIN and FAMAX instructions (FEAT_FAMINMAX)">; + +def FeatureFP8FMA : SubtargetFeature<"fp8fma", "HasFP8FMA", "true", + "Enable fp8 multiply-add instructions (FEAT_FP8FMA)">; + +def FeatureSSVE_FP8FMA : SubtargetFeature<"ssve-fp8fma", "HasSSVE_FP8FMA", "true", + "Enable SVE2 fp8 multiply-add instructions (FEAT_SSVE_FP8FMA)", [FeatureSME2]>; + +def FeatureFP8DOT2: SubtargetFeature<"fp8dot2", "HasFP8DOT2", "true", + "Enable fp8 2-way dot instructions (FEAT_FP8DOT2)">; + +def FeatureSSVE_FP8DOT2 : SubtargetFeature<"ssve-fp8dot2", "HasSSVE_FP8DOT2", "true", + "Enable SVE2 fp8 2-way dot product instructions (FEAT_SSVE_FP8DOT2)", [FeatureSME2]>; + +def FeatureFP8DOT4: SubtargetFeature<"fp8dot4", "HasFP8DOT4", "true", + "Enable fp8 4-way dot instructions (FEAT_FP8DOT4)">; + +def FeatureSSVE_FP8DOT4 : SubtargetFeature<"ssve-fp8dot4", "HasSSVE_FP8DOT4", "true", + "Enable SVE2 fp8 4-way dot product instructions (FEAT_SSVE_FP8DOT4)", [FeatureSME2]>; +def FeatureLUT: SubtargetFeature<"lut", "HasLUT", "true", + "Enable Lookup Table instructions (FEAT_LUT)">; + +def FeatureSME_LUTv2 : SubtargetFeature<"sme-lutv2", "HasSME_LUTv2", "true", + "Enable Scalable Matrix Extension (SME) LUTv2 instructions (FEAT_SME_LUTv2)">; + +def FeatureSMEF8F16 : SubtargetFeature<"sme-f8f16", "HasSMEF8F16", "true", + "Enable Scalable Matrix Extension (SME) F8F16 instructions(FEAT_SME_F8F16)", [FeatureSME2, FeatureFP8]>; + +def FeatureSMEF8F32 : SubtargetFeature<"sme-f8f32", "HasSMEF8F32", "true", + "Enable Scalable Matrix Extension (SME) F8F32 instructions (FEAT_SME_F8F32)", [FeatureSME2, FeatureFP8]>; + def FeatureAppleA7SysReg : SubtargetFeature<"apple-a7-sysreg", "HasAppleA7SysReg", "true", "Apple A7 (the CPU formerly known as Cyclone)">; @@ -566,6 +610,18 @@ def FeatureD128 : SubtargetFeature<"d128", "HasD128", "and Instructions (FEAT_D128, FEAT_LVA3, FEAT_SYSREG128, FEAT_SYSINSTR128)", [FeatureLSE128]>; +def FeatureDisableLdp : SubtargetFeature<"disable-ldp", "HasDisableLdp", + "true", "Do not emit ldp">; + +def FeatureDisableStp : SubtargetFeature<"disable-stp", "HasDisableStp", + "true", "Do not emit stp">; + +def FeatureLdpAlignedOnly : SubtargetFeature<"ldp-aligned-only", "HasLdpAlignedOnly", + "true", "In order to emit ldp, first check if the load will be aligned to 2 * element_size">; + +def FeatureStpAlignedOnly : SubtargetFeature<"stp-aligned-only", "HasStpAlignedOnly", + "true", "In order to emit stp, first check if the store will be aligned to 2 * element_size">; + //===----------------------------------------------------------------------===// // Architectures. // @@ -634,6 +690,10 @@ def HasV9_4aOps : SubtargetFeature< "v9.4a", "HasV9_4aOps", "true", "Support ARM v9.4a instructions", [HasV8_9aOps, HasV9_3aOps]>; +def HasV9_5aOps : SubtargetFeature< + "v9.5a", "HasV9_5aOps", "true", "Support ARM v9.5a instructions", + [HasV9_4aOps]>; + def HasV8_0rOps : SubtargetFeature< "v8r", "HasV8_0rOps", "true", "Support ARM v8r instructions", [//v8.1 @@ -723,8 +783,8 @@ let F = [HasSVE2p1, HasSVE2p1_or_HasSME2, HasSVE2p1_or_HasSME2p1] in def SVE2p1Unsupported : AArch64Unsupported; def SVE2Unsupported : AArch64Unsupported { - let F = !listconcat([HasSVE2, HasSVE2orSME, - HasSVE2AES, HasSVE2SHA3, HasSVE2SM4, HasSVE2BitPerm], + let F = !listconcat([HasSVE2, HasSVE2orSME, HasSSVE_FP8FMA, HasSMEF8F16, + HasSMEF8F32, HasSVE2AES, HasSVE2SHA3, HasSVE2SM4, HasSVE2BitPerm], SVE2p1Unsupported.F); } @@ -737,12 +797,13 @@ let F = [HasSME2p1, HasSVE2p1_or_HasSME2p1] in def SME2p1Unsupported : AArch64Unsupported; def SME2Unsupported : AArch64Unsupported { - let F = !listconcat([HasSME2, HasSVE2p1_or_HasSME2], + let F = !listconcat([HasSME2, HasSVE2p1_or_HasSME2, HasSSVE_FP8FMA, + HasSMEF8F16, HasSMEF8F32], SME2p1Unsupported.F); } def SMEUnsupported : AArch64Unsupported { - let F = !listconcat([HasSME, HasSMEI16I64, HasSMEF16F16, HasSMEF64F64], + let F = !listconcat([HasSME, HasSMEI16I64, HasSMEF16F16, HasSMEF64F64, HasSMEFA64], SME2Unsupported.F); } @@ -778,7 +839,6 @@ def TuneA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53", FeatureFuseAES, FeatureFuseAdrpAdd, FeatureBalanceFPOps, - FeatureCustomCheapAsMoveHandling, FeaturePostRAScheduler]>; def TuneA55 : SubtargetFeature<"a55", "ARMProcFamily", "CortexA55", @@ -795,11 +855,16 @@ def TuneA510 : SubtargetFeature<"a510", "ARMProcFamily", "CortexA510", FeaturePostRAScheduler ]>; +def TuneA520 : SubtargetFeature<"a520", "ARMProcFamily", "CortexA520", + "Cortex-A520 ARM processors", [ + FeatureFuseAES, + FeatureFuseAdrpAdd, + FeaturePostRAScheduler]>; + def TuneA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57", "Cortex-A57 ARM processors", [ FeatureFuseAES, FeatureBalanceFPOps, - FeatureCustomCheapAsMoveHandling, FeatureFuseAdrpAdd, FeatureFuseLiterals, FeaturePostRAScheduler, @@ -841,7 +906,8 @@ def TuneA76 : SubtargetFeature<"a76", "ARMProcFamily", "CortexA76", "Cortex-A76 ARM processors", [ FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureLSLFast, + FeatureAddrLSLFast, + FeatureALULSLFast, FeatureEnableSelectOptimize, FeaturePredictableSelectIsExpensive]>; @@ -850,7 +916,8 @@ def TuneA77 : SubtargetFeature<"a77", "ARMProcFamily", "CortexA77", FeatureCmpBccFusion, FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureLSLFast, + FeatureAddrLSLFast, + FeatureALULSLFast, FeatureEnableSelectOptimize, FeaturePredictableSelectIsExpensive]>; @@ -859,7 +926,8 @@ def TuneA78 : SubtargetFeature<"a78", "ARMProcFamily", "CortexA78", FeatureCmpBccFusion, FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureLSLFast, + FeatureAddrLSLFast, + FeatureALULSLFast, FeaturePostRAScheduler, FeatureEnableSelectOptimize, FeaturePredictableSelectIsExpensive]>; @@ -870,7 +938,8 @@ def TuneA78C : SubtargetFeature<"a78c", "ARMProcFamily", FeatureCmpBccFusion, FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureLSLFast, + FeatureAddrLSLFast, + FeatureALULSLFast, FeaturePostRAScheduler, FeatureEnableSelectOptimize, FeaturePredictableSelectIsExpensive]>; @@ -880,7 +949,8 @@ def TuneA710 : SubtargetFeature<"a710", "ARMProcFamily", "CortexA710", FeatureCmpBccFusion, FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureLSLFast, + FeatureAddrLSLFast, + FeatureALULSLFast, FeaturePostRAScheduler, FeatureEnableSelectOptimize, FeaturePredictableSelectIsExpensive]>; @@ -890,7 +960,19 @@ def TuneA715 : SubtargetFeature<"a715", "ARMProcFamily", "CortexA715", FeatureFuseAES, FeaturePostRAScheduler, FeatureCmpBccFusion, - FeatureLSLFast, + FeatureAddrLSLFast, + FeatureALULSLFast, + FeatureFuseAdrpAdd, + FeatureEnableSelectOptimize, + FeaturePredictableSelectIsExpensive]>; + +def TuneA720 : SubtargetFeature<"a720", "ARMProcFamily", "CortexA720", + "Cortex-A720 ARM processors", [ + FeatureFuseAES, + FeaturePostRAScheduler, + FeatureCmpBccFusion, + FeatureAddrLSLFast, + FeatureALULSLFast, FeatureFuseAdrpAdd, FeatureEnableSelectOptimize, FeaturePredictableSelectIsExpensive]>; @@ -905,7 +987,8 @@ def TuneX1 : SubtargetFeature<"cortex-x1", "ARMProcFamily", "CortexX1", FeatureCmpBccFusion, FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureLSLFast, + FeatureAddrLSLFast, + FeatureALULSLFast, FeaturePostRAScheduler, FeatureEnableSelectOptimize, FeaturePredictableSelectIsExpensive]>; @@ -915,14 +998,26 @@ def TuneX2 : SubtargetFeature<"cortex-x2", "ARMProcFamily", "CortexX2", FeatureCmpBccFusion, FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureLSLFast, + FeatureAddrLSLFast, + FeatureALULSLFast, FeaturePostRAScheduler, FeatureEnableSelectOptimize, FeaturePredictableSelectIsExpensive]>; def TuneX3 : SubtargetFeature<"cortex-x3", "ARMProcFamily", "CortexX3", "Cortex-X3 ARM processors", [ - FeatureLSLFast, + FeatureAddrLSLFast, + FeatureALULSLFast, + FeatureFuseAdrpAdd, + FeatureFuseAES, + FeaturePostRAScheduler, + FeatureEnableSelectOptimize, + FeaturePredictableSelectIsExpensive]>; + +def TuneX4 : SubtargetFeature<"cortex-x4", "ARMProcFamily", "CortexX4", + "Cortex-X4 ARM processors", [ + FeatureAddrLSLFast, + FeatureALULSLFast, FeatureFuseAdrpAdd, FeatureFuseAES, FeaturePostRAScheduler, @@ -934,8 +1029,8 @@ def TuneA64FX : SubtargetFeature<"a64fx", "ARMProcFamily", "A64FX", FeaturePostRAScheduler, FeatureAggressiveFMA, FeatureArithmeticBccFusion, - FeaturePredictableSelectIsExpensive - ]>; + FeatureStorePairSuppress, + FeaturePredictableSelectIsExpensive]>; def TuneCarmel : SubtargetFeature<"carmel", "ARMProcFamily", "Carmel", "Nvidia Carmel processors">; @@ -949,10 +1044,10 @@ def TuneAppleA7 : SubtargetFeature<"apple-a7", "ARMProcFamily", "AppleA7", FeatureArithmeticCbzFusion, FeatureDisableLatencySchedHeuristic, FeatureFuseAES, FeatureFuseCryptoEOR, + FeatureStorePairSuppress, FeatureZCRegMove, FeatureZCZeroing, - FeatureZCZeroingFPWorkaround] - >; + FeatureZCZeroingFPWorkaround]>; def TuneAppleA10 : SubtargetFeature<"apple-a10", "ARMProcFamily", "AppleA10", "Apple A10", [ @@ -962,9 +1057,9 @@ def TuneAppleA10 : SubtargetFeature<"apple-a10", "ARMProcFamily", "AppleA10", FeatureDisableLatencySchedHeuristic, FeatureFuseAES, FeatureFuseCryptoEOR, + FeatureStorePairSuppress, FeatureZCRegMove, - FeatureZCZeroing] - >; + FeatureZCZeroing]>; def TuneAppleA11 : SubtargetFeature<"apple-a11", "ARMProcFamily", "AppleA11", "Apple A11", [ @@ -974,9 +1069,9 @@ def TuneAppleA11 : SubtargetFeature<"apple-a11", "ARMProcFamily", "AppleA11", FeatureDisableLatencySchedHeuristic, FeatureFuseAES, FeatureFuseCryptoEOR, + FeatureStorePairSuppress, FeatureZCRegMove, - FeatureZCZeroing] - >; + FeatureZCZeroing]>; def TuneAppleA12 : SubtargetFeature<"apple-a12", "ARMProcFamily", "AppleA12", "Apple A12", [ @@ -986,9 +1081,9 @@ def TuneAppleA12 : SubtargetFeature<"apple-a12", "ARMProcFamily", "AppleA12", FeatureDisableLatencySchedHeuristic, FeatureFuseAES, FeatureFuseCryptoEOR, + FeatureStorePairSuppress, FeatureZCRegMove, - FeatureZCZeroing] - >; + FeatureZCZeroing]>; def TuneAppleA13 : SubtargetFeature<"apple-a13", "ARMProcFamily", "AppleA13", "Apple A13", [ @@ -998,9 +1093,9 @@ def TuneAppleA13 : SubtargetFeature<"apple-a13", "ARMProcFamily", "AppleA13", FeatureDisableLatencySchedHeuristic, FeatureFuseAES, FeatureFuseCryptoEOR, + FeatureStorePairSuppress, FeatureZCRegMove, - FeatureZCZeroing] - >; + FeatureZCZeroing]>; def TuneAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14", "Apple A14", [ @@ -1016,6 +1111,7 @@ def TuneAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14", FeatureFuseCryptoEOR, FeatureFuseAdrpAdd, FeatureFuseLiterals, + FeatureStorePairSuppress, FeatureZCRegMove, FeatureZCZeroing]>; @@ -1031,9 +1127,9 @@ def TuneAppleA15 : SubtargetFeature<"apple-a15", "ARMProcFamily", "AppleA15", FeatureFuseCCSelect, FeatureFuseCryptoEOR, FeatureFuseLiterals, + FeatureStorePairSuppress, FeatureZCRegMove, - FeatureZCZeroing - ]>; + FeatureZCZeroing]>; def TuneAppleA16 : SubtargetFeature<"apple-a16", "ARMProcFamily", "AppleA16", "Apple A16", [ @@ -1047,9 +1143,25 @@ def TuneAppleA16 : SubtargetFeature<"apple-a16", "ARMProcFamily", "AppleA16", FeatureFuseCCSelect, FeatureFuseCryptoEOR, FeatureFuseLiterals, + FeatureStorePairSuppress, + FeatureZCRegMove, + FeatureZCZeroing]>; + +def TuneAppleA17 : SubtargetFeature<"apple-a17", "ARMProcFamily", "AppleA17", + "Apple A17", [ + FeatureAlternateSExtLoadCVTF32Pattern, + FeatureArithmeticBccFusion, + FeatureArithmeticCbzFusion, + FeatureDisableLatencySchedHeuristic, + FeatureFuseAddress, + FeatureFuseAES, + FeatureFuseArithmeticLogic, + FeatureFuseCCSelect, + FeatureFuseCryptoEOR, + FeatureFuseLiterals, + FeatureStorePairSuppress, FeatureZCRegMove, - FeatureZCZeroing - ]>; + FeatureZCZeroing]>; def TuneExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3", "Samsung Exynos-M3 processors", @@ -1060,7 +1172,9 @@ def TuneExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3", FeatureFuseCCSelect, FeatureFuseAdrpAdd, FeatureFuseLiterals, - FeatureLSLFast, + FeatureStorePairSuppress, + FeatureAddrLSLFast, + FeatureALULSLFast, FeaturePostRAScheduler, FeaturePredictableSelectIsExpensive]>; @@ -1077,28 +1191,30 @@ def TuneExynosM4 : SubtargetFeature<"exynosm4", "ARMProcFamily", "ExynosM3", FeatureFuseCCSelect, FeatureFuseAdrpAdd, FeatureFuseLiterals, - FeatureLSLFast, + FeatureStorePairSuppress, + FeatureAddrLSLFast, + FeatureALULSLFast, FeaturePostRAScheduler, FeatureZCZeroing]>; def TuneKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo", "Qualcomm Kryo processors", [ - FeatureCustomCheapAsMoveHandling, FeaturePostRAScheduler, FeaturePredictableSelectIsExpensive, FeatureZCZeroing, - FeatureLSLFast] - >; + FeatureAddrLSLFast, + FeatureALULSLFast, + FeatureStorePairSuppress]>; def TuneFalkor : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor", "Qualcomm Falkor processors", [ - FeatureCustomCheapAsMoveHandling, FeaturePostRAScheduler, FeaturePredictableSelectIsExpensive, FeatureZCZeroing, - FeatureLSLFast, - FeatureSlowSTRQro - ]>; + FeatureStorePairSuppress, + FeatureAddrLSLFast, + FeatureALULSLFast, + FeatureSlowSTRQro]>; def TuneNeoverseE1 : SubtargetFeature<"neoversee1", "ARMProcFamily", "NeoverseE1", "Neoverse E1 ARM processors", [ @@ -1110,7 +1226,8 @@ def TuneNeoverseN1 : SubtargetFeature<"neoversen1", "ARMProcFamily", "NeoverseN1 "Neoverse N1 ARM processors", [ FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureLSLFast, + FeatureAddrLSLFast, + FeatureALULSLFast, FeaturePostRAScheduler, FeatureEnableSelectOptimize, FeaturePredictableSelectIsExpensive]>; @@ -1119,7 +1236,8 @@ def TuneNeoverseN2 : SubtargetFeature<"neoversen2", "ARMProcFamily", "NeoverseN2 "Neoverse N2 ARM processors", [ FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureLSLFast, + FeatureAddrLSLFast, + FeatureALULSLFast, FeaturePostRAScheduler, FeatureEnableSelectOptimize, FeaturePredictableSelectIsExpensive]>; @@ -1128,7 +1246,8 @@ def TuneNeoverse512TVB : SubtargetFeature<"neoverse512tvb", "ARMProcFamily", "Ne "Neoverse 512-TVB ARM processors", [ FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureLSLFast, + FeatureAddrLSLFast, + FeatureALULSLFast, FeaturePostRAScheduler, FeatureEnableSelectOptimize, FeaturePredictableSelectIsExpensive]>; @@ -1137,7 +1256,8 @@ def TuneNeoverseV1 : SubtargetFeature<"neoversev1", "ARMProcFamily", "NeoverseV1 "Neoverse V1 ARM processors", [ FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureLSLFast, + FeatureAddrLSLFast, + FeatureALULSLFast, FeaturePostRAScheduler, FeatureEnableSelectOptimize, FeaturePredictableSelectIsExpensive, @@ -1146,24 +1266,28 @@ def TuneNeoverseV1 : SubtargetFeature<"neoversev1", "ARMProcFamily", "NeoverseV1 def TuneNeoverseV2 : SubtargetFeature<"neoversev2", "ARMProcFamily", "NeoverseV2", "Neoverse V2 ARM processors", [ FeatureFuseAES, - FeatureLSLFast, + FeatureFuseAdrpAdd, + FeatureAddrLSLFast, + FeatureALULSLFast, FeaturePostRAScheduler, FeatureEnableSelectOptimize, FeaturePredictableSelectIsExpensive]>; def TuneSaphira : SubtargetFeature<"saphira", "ARMProcFamily", "Saphira", "Qualcomm Saphira processors", [ - FeatureCustomCheapAsMoveHandling, FeaturePostRAScheduler, FeaturePredictableSelectIsExpensive, FeatureZCZeroing, - FeatureLSLFast]>; + FeatureStorePairSuppress, + FeatureAddrLSLFast, + FeatureALULSLFast]>; def TuneThunderX2T99 : SubtargetFeature<"thunderx2t99", "ARMProcFamily", "ThunderX2T99", "Cavium ThunderX2 processors", [ FeatureAggressiveFMA, FeatureArithmeticBccFusion, FeaturePostRAScheduler, + FeatureStorePairSuppress, FeaturePredictableSelectIsExpensive]>; def TuneThunderX3T110 : SubtargetFeature<"thunderx3t110", "ARMProcFamily", @@ -1174,59 +1298,72 @@ def TuneThunderX3T110 : SubtargetFeature<"thunderx3t110", "ARMProcFamily", FeaturePostRAScheduler, FeaturePredictableSelectIsExpensive, FeatureBalanceFPOps, + FeatureStorePairSuppress, FeatureStrictAlign]>; def TuneThunderX : SubtargetFeature<"thunderx", "ARMProcFamily", "ThunderX", "Cavium ThunderX processors", [ FeaturePostRAScheduler, + FeatureStorePairSuppress, FeaturePredictableSelectIsExpensive]>; def TuneThunderXT88 : SubtargetFeature<"thunderxt88", "ARMProcFamily", "ThunderXT88", "Cavium ThunderX processors", [ FeaturePostRAScheduler, + FeatureStorePairSuppress, FeaturePredictableSelectIsExpensive]>; def TuneThunderXT81 : SubtargetFeature<"thunderxt81", "ARMProcFamily", "ThunderXT81", "Cavium ThunderX processors", [ FeaturePostRAScheduler, + FeatureStorePairSuppress, FeaturePredictableSelectIsExpensive]>; def TuneThunderXT83 : SubtargetFeature<"thunderxt83", "ARMProcFamily", "ThunderXT83", "Cavium ThunderX processors", [ FeaturePostRAScheduler, + FeatureStorePairSuppress, FeaturePredictableSelectIsExpensive]>; def TuneTSV110 : SubtargetFeature<"tsv110", "ARMProcFamily", "TSV110", "HiSilicon TS-V110 processors", [ - FeatureCustomCheapAsMoveHandling, FeatureFuseAES, + FeatureStorePairSuppress, FeaturePostRAScheduler]>; def TuneAmpere1 : SubtargetFeature<"ampere1", "ARMProcFamily", "Ampere1", "Ampere Computing Ampere-1 processors", [ FeaturePostRAScheduler, FeatureFuseAES, - FeatureLSLFast, + FeatureAddrLSLFast, + FeatureALULSLFast, FeatureAggressiveFMA, FeatureArithmeticBccFusion, FeatureCmpBccFusion, FeatureFuseAddress, - FeatureFuseLiterals]>; + FeatureFuseLiterals, + FeatureStorePairSuppress, + FeatureLdpAlignedOnly, + FeatureStpAlignedOnly]>; def TuneAmpere1A : SubtargetFeature<"ampere1a", "ARMProcFamily", "Ampere1A", "Ampere Computing Ampere-1A processors", [ FeaturePostRAScheduler, FeatureFuseAES, - FeatureLSLFast, + FeatureAddrLSLFast, + FeatureALULSLFast, FeatureAggressiveFMA, FeatureArithmeticBccFusion, FeatureCmpBccFusion, FeatureFuseAddress, FeatureFuseLiterals, - FeatureFuseLiterals]>; + FeatureFuseLiterals, + FeatureStorePairSuppress, + FeatureLdpAlignedOnly, + FeatureStpAlignedOnly]>; def ProcessorFeatures { list<SubtargetFeature> A53 = [HasV8_0aOps, FeatureCRC, FeatureCrypto, @@ -1238,6 +1375,9 @@ def ProcessorFeatures { FeatureMatMulInt8, FeatureBF16, FeatureAM, FeatureMTE, FeatureETE, FeatureSVE2BitPerm, FeatureFP16FML]; + list<SubtargetFeature> A520 = [HasV9_2aOps, FeaturePerfMon, FeatureAM, + FeatureMTE, FeatureETE, FeatureSVE2BitPerm, + FeatureFP16FML]; list<SubtargetFeature> A65 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, FeatureNEON, FeatureFullFP16, FeatureDotProd, FeatureRCPC, FeatureSSBS, FeatureRAS, @@ -1264,6 +1404,9 @@ def ProcessorFeatures { FeatureFP16FML, FeatureSVE, FeatureTRBE, FeatureSVE2BitPerm, FeatureBF16, FeatureETE, FeaturePerfMon, FeatureMatMulInt8, FeatureSPE]; + list<SubtargetFeature> A720 = [HasV9_2aOps, FeatureMTE, FeatureFP16FML, + FeatureTRBE, FeatureSVE2BitPerm, FeatureETE, + FeaturePerfMon, FeatureSPE, FeatureSPE_EEF]; list<SubtargetFeature> R82 = [HasV8_0rOps, FeaturePerfMon, FeatureFullFP16, FeatureFP16FML, FeatureSSBS, FeaturePredRes, FeatureSB]; @@ -1285,6 +1428,10 @@ def ProcessorFeatures { FeatureSPE, FeatureBF16, FeatureMatMulInt8, FeatureMTE, FeatureSVE2BitPerm, FeatureFullFP16, FeatureFP16FML]; + list<SubtargetFeature> X4 = [HasV9_2aOps, + FeaturePerfMon, FeatureETE, FeatureTRBE, + FeatureSPE, FeatureMTE, FeatureSVE2BitPerm, + FeatureFP16FML, FeatureSPE_EEF]; list<SubtargetFeature> A64FX = [HasV8_2aOps, FeatureFPARMv8, FeatureNEON, FeatureSHA2, FeaturePerfMon, FeatureFullFP16, FeatureSVE, FeatureComplxNum]; @@ -1315,6 +1462,10 @@ def ProcessorFeatures { FeatureNEON, FeaturePerfMon, FeatureSHA3, FeatureFullFP16, FeatureFP16FML, FeatureHCX]; + list<SubtargetFeature> AppleA17 = [HasV8_6aOps, FeatureCrypto, FeatureFPARMv8, + FeatureNEON, FeaturePerfMon, FeatureSHA3, + FeatureFullFP16, FeatureFP16FML, + FeatureHCX]; list<SubtargetFeature> ExynosM3 = [HasV8_0aOps, FeatureCRC, FeatureCrypto, FeaturePerfMon]; list<SubtargetFeature> ExynosM4 = [HasV8_2aOps, FeatureCrypto, FeatureDotProd, @@ -1329,9 +1480,9 @@ def ProcessorFeatures { FeatureFPARMv8, FeatureFullFP16, FeatureNEON, FeatureRCPC, FeatureSPE, FeatureSSBS, FeaturePerfMon]; - list<SubtargetFeature> NeoverseN2 = [HasV8_5aOps, FeatureBF16, FeatureETE, + list<SubtargetFeature> NeoverseN2 = [HasV9_0aOps, FeatureBF16, FeatureETE, FeatureMatMulInt8, FeatureMTE, FeatureSVE2, - FeatureSVE2BitPerm, FeatureTRBE, FeatureCrypto, + FeatureSVE2BitPerm, FeatureTRBE, FeaturePerfMon]; list<SubtargetFeature> Neoverse512TVB = [HasV8_4aOps, FeatureBF16, FeatureCacheDeepPersist, FeatureCrypto, FeatureFPARMv8, FeatureFP16FML, @@ -1376,7 +1527,7 @@ def ProcessorFeatures { // FeatureFuseAdrpAdd is enabled under Generic to allow linker merging // optimizations. -def : ProcessorModel<"generic", CortexA55Model, ProcessorFeatures.Generic, +def : ProcessorModel<"generic", CortexA510Model, ProcessorFeatures.Generic, [FeatureFuseAES, FeatureFuseAdrpAdd, FeaturePostRAScheduler, FeatureEnableSelectOptimize]>; def : ProcessorModel<"cortex-a35", CortexA53Model, ProcessorFeatures.A53, @@ -1389,6 +1540,8 @@ def : ProcessorModel<"cortex-a55", CortexA55Model, ProcessorFeatures.A55, [TuneA55]>; def : ProcessorModel<"cortex-a510", CortexA510Model, ProcessorFeatures.A510, [TuneA510]>; +def : ProcessorModel<"cortex-a520", CortexA510Model, ProcessorFeatures.A520, + [TuneA520]>; def : ProcessorModel<"cortex-a57", CortexA57Model, ProcessorFeatures.A53, [TuneA57]>; def : ProcessorModel<"cortex-a65", CortexA53Model, ProcessorFeatures.A65, @@ -1415,6 +1568,8 @@ def : ProcessorModel<"cortex-a710", NeoverseN2Model, ProcessorFeatures.A710, [TuneA710]>; def : ProcessorModel<"cortex-a715", NeoverseN2Model, ProcessorFeatures.A715, [TuneA715]>; +def : ProcessorModel<"cortex-a720", NeoverseN2Model, ProcessorFeatures.A720, + [TuneA720]>; def : ProcessorModel<"cortex-r82", CortexA55Model, ProcessorFeatures.R82, [TuneR82]>; def : ProcessorModel<"cortex-x1", CortexA57Model, ProcessorFeatures.X1, @@ -1425,6 +1580,8 @@ def : ProcessorModel<"cortex-x2", NeoverseN2Model, ProcessorFeatures.X2, [TuneX2]>; def : ProcessorModel<"cortex-x3", NeoverseN2Model, ProcessorFeatures.X3, [TuneX3]>; +def : ProcessorModel<"cortex-x4", NeoverseN2Model, ProcessorFeatures.X4, + [TuneX4]>; def : ProcessorModel<"neoverse-e1", CortexA53Model, ProcessorFeatures.NeoverseE1, [TuneNeoverseE1]>; def : ProcessorModel<"neoverse-n1", NeoverseN1Model, @@ -1492,12 +1649,15 @@ def : ProcessorModel<"apple-a15", CycloneModel, ProcessorFeatures.AppleA15, [TuneAppleA15]>; def : ProcessorModel<"apple-a16", CycloneModel, ProcessorFeatures.AppleA16, [TuneAppleA16]>; - +def : ProcessorModel<"apple-a17", CycloneModel, ProcessorFeatures.AppleA17, + [TuneAppleA17]>; // Mac CPUs def : ProcessorModel<"apple-m1", CycloneModel, ProcessorFeatures.AppleA14, [TuneAppleA14]>; def : ProcessorModel<"apple-m2", CycloneModel, ProcessorFeatures.AppleA15, [TuneAppleA15]>; +def : ProcessorModel<"apple-m3", CycloneModel, ProcessorFeatures.AppleA16, + [TuneAppleA16]>; // watch CPUs. def : ProcessorModel<"apple-s4", CycloneModel, ProcessorFeatures.AppleA12, |