diff options
author | Dimitry Andric <dim@FreeBSD.org> | 2024-03-10 12:37:50 +0000 |
---|---|---|
committer | Dimitry Andric <dim@FreeBSD.org> | 2024-04-06 20:14:25 +0000 |
commit | 4c2d3b022a1d543dbbff75a0c53e8d3d7242216d (patch) | |
tree | 2cbf757a507afe2113432d167f1464c3d457674f /contrib | |
parent | 56727255ad47072ec2cc81b4ae728a099697b0e4 (diff) | |
parent | 8b885620ff1b22914b6fac738d7b2874d94e7543 (diff) | |
download | src-4c2d3b022a1d543dbbff75a0c53e8d3d7242216d.tar.gz src-4c2d3b022a1d543dbbff75a0c53e8d3d7242216d.zip |
Merge llvm-project release/18.x llvmorg-18.1.1-0-gdba2a75e9c7e
This updates llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and
openmp to llvm-project release/18.x llvmorg-18.1.1-0-gdba2a75e9c7e.
PR: 276104
MFC after: 1 month
Diffstat (limited to 'contrib')
46 files changed, 1402 insertions, 70 deletions
diff --git a/contrib/llvm-project/clang/include/clang/Sema/Sema.h b/contrib/llvm-project/clang/include/clang/Sema/Sema.h index 1f1cbd11ff73..6adb8fb7966b 100644 --- a/contrib/llvm-project/clang/include/clang/Sema/Sema.h +++ b/contrib/llvm-project/clang/include/clang/Sema/Sema.h @@ -1090,7 +1090,9 @@ public: if (FD) { FD->setWillHaveBody(true); S.ExprEvalContexts.back().InImmediateFunctionContext = - FD->isImmediateFunction(); + FD->isImmediateFunction() || + S.ExprEvalContexts[S.ExprEvalContexts.size() - 2] + .isConstantEvaluated(); S.ExprEvalContexts.back().InImmediateEscalatingFunctionContext = S.getLangOpts().CPlusPlus20 && FD->isImmediateEscalating(); } else diff --git a/contrib/llvm-project/clang/lib/Basic/Targets/AArch64.cpp b/contrib/llvm-project/clang/lib/Basic/Targets/AArch64.cpp index 3036f461c1de..f5a5d689fa09 100644 --- a/contrib/llvm-project/clang/lib/Basic/Targets/AArch64.cpp +++ b/contrib/llvm-project/clang/lib/Basic/Targets/AArch64.cpp @@ -258,7 +258,6 @@ void AArch64TargetInfo::getTargetDefinesARMV83A(const LangOptions &Opts, MacroBuilder &Builder) const { Builder.defineMacro("__ARM_FEATURE_COMPLEX", "1"); Builder.defineMacro("__ARM_FEATURE_JCVT", "1"); - Builder.defineMacro("__ARM_FEATURE_PAUTH", "1"); // Also include the Armv8.2 defines getTargetDefinesARMV82A(Opts, Builder); } diff --git a/contrib/llvm-project/clang/lib/Basic/Targets/Mips.h b/contrib/llvm-project/clang/lib/Basic/Targets/Mips.h index f46b95abfd75..23d4e1b598fa 100644 --- a/contrib/llvm-project/clang/lib/Basic/Targets/Mips.h +++ b/contrib/llvm-project/clang/lib/Basic/Targets/Mips.h @@ -237,12 +237,14 @@ public: case 'r': // CPU registers. case 'd': // Equivalent to "r" unless generating MIPS16 code. case 'y': // Equivalent to "r", backward compatibility only. - case 'f': // floating-point registers. case 'c': // $25 for indirect jumps case 'l': // lo register case 'x': // hilo register pair Info.setAllowsRegister(); return true; + case 'f': // floating-point registers. + Info.setAllowsRegister(); + return FloatABI != SoftFloat; case 'I': // Signed 16-bit constant case 'J': // Integer 0 case 'K': // Unsigned 16-bit constant diff --git a/contrib/llvm-project/clang/lib/CodeGen/CodeGenPGO.cpp b/contrib/llvm-project/clang/lib/CodeGen/CodeGenPGO.cpp index 5d7c38477457..fb4e86e8bd80 100644 --- a/contrib/llvm-project/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/contrib/llvm-project/clang/lib/CodeGen/CodeGenPGO.cpp @@ -240,9 +240,12 @@ struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> { if (MCDCMaxCond == 0) return true; - /// At the top of the logical operator nest, reset the number of conditions. - if (LogOpStack.empty()) + /// At the top of the logical operator nest, reset the number of conditions, + /// also forget previously seen split nesting cases. + if (LogOpStack.empty()) { NumCond = 0; + SplitNestedLogicalOp = false; + } if (const Expr *E = dyn_cast<Expr>(S)) { const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(E->IgnoreParens()); @@ -293,7 +296,7 @@ struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> { "contains an operation with a nested boolean expression. " "Expression will not be covered"); Diag.Report(S->getBeginLoc(), DiagID); - return false; + return true; } /// Was the maximum number of conditions encountered? @@ -304,7 +307,7 @@ struct MapRegionCounters : public RecursiveASTVisitor<MapRegionCounters> { "number of conditions (%0) exceeds max (%1). " "Expression will not be covered"); Diag.Report(S->getBeginLoc(), DiagID) << NumCond << MCDCMaxCond; - return false; + return true; } // Otherwise, allocate the number of bytes required for the bitmap diff --git a/contrib/llvm-project/clang/lib/Sema/SemaExpr.cpp b/contrib/llvm-project/clang/lib/Sema/SemaExpr.cpp index 0d9c087ed0cd..4cce0abc2315 100644 --- a/contrib/llvm-project/clang/lib/Sema/SemaExpr.cpp +++ b/contrib/llvm-project/clang/lib/Sema/SemaExpr.cpp @@ -18294,7 +18294,6 @@ void Sema::CheckUnusedVolatileAssignment(Expr *E) { } void Sema::MarkExpressionAsImmediateEscalating(Expr *E) { - assert(!FunctionScopes.empty() && "Expected a function scope"); assert(getLangOpts().CPlusPlus20 && ExprEvalContexts.back().InImmediateEscalatingFunctionContext && "Cannot mark an immediate escalating expression outside of an " @@ -18311,7 +18310,8 @@ void Sema::MarkExpressionAsImmediateEscalating(Expr *E) { } else { assert(false && "expected an immediately escalating expression"); } - getCurFunction()->FoundImmediateEscalatingExpression = true; + if (FunctionScopeInfo *FI = getCurFunction()) + FI->FoundImmediateEscalatingExpression = true; } ExprResult Sema::CheckForImmediateInvocation(ExprResult E, FunctionDecl *Decl) { diff --git a/contrib/llvm-project/libcxx/include/csetjmp b/contrib/llvm-project/libcxx/include/csetjmp index d219c8e6cb22..9012cad22ebe 100644 --- a/contrib/llvm-project/libcxx/include/csetjmp +++ b/contrib/llvm-project/libcxx/include/csetjmp @@ -33,7 +33,13 @@ void longjmp(jmp_buf env, int val); #include <__assert> // all public C++ headers provide the assertion handler #include <__config> -#include <setjmp.h> +// <setjmp.h> is not provided by libc++ +#if __has_include(<setjmp.h>) +# include <setjmp.h> +# ifdef _LIBCPP_SETJMP_H +# error "If libc++ starts defining <setjmp.h>, the __has_include check should move to libc++'s <setjmp.h>" +# endif +#endif #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header diff --git a/contrib/llvm-project/llvm/include/llvm/ADT/iterator_range.h b/contrib/llvm-project/llvm/include/llvm/ADT/iterator_range.h index 2dc227935984..7d288ea4506b 100644 --- a/contrib/llvm-project/llvm/include/llvm/ADT/iterator_range.h +++ b/contrib/llvm-project/llvm/include/llvm/ADT/iterator_range.h @@ -43,8 +43,8 @@ class iterator_range { IteratorT begin_iterator, end_iterator; public: -#if __GNUC__ == 7 - // Be careful no to break gcc-7 on the mlir target. +#if __GNUC__ == 7 || (__GNUC__ == 8 && __GNUC_MINOR__ < 4) + // Be careful no to break gcc-7 and gcc-8 < 8.4 on the mlir target. // See https://github.com/llvm/llvm-project/issues/63843 template <typename Container> #else diff --git a/contrib/llvm-project/llvm/include/llvm/IR/Attributes.td b/contrib/llvm-project/llvm/include/llvm/IR/Attributes.td index 864f87f33838..d22eb76d2292 100644 --- a/contrib/llvm-project/llvm/include/llvm/IR/Attributes.td +++ b/contrib/llvm-project/llvm/include/llvm/IR/Attributes.td @@ -339,14 +339,26 @@ def UseSampleProfile : StrBoolAttr<"use-sample-profile">; def DenormalFPMath : ComplexStrAttr<"denormal-fp-math", [FnAttr]>; def DenormalFPMathF32 : ComplexStrAttr<"denormal-fp-math-f32", [FnAttr]>; +// Attribute compatiblity rules are generated to check the attribute of the +// caller and callee and decide whether inlining should be allowed. CompatRule +// and child classes are used for the rule generation. CompatRule takes only a +// compare function which could be templated with the attribute type. +// CompatRuleStrAttr takes the compare function and the string attribute for +// checking compatibility for inline substitution. class CompatRule<string F> { - // The name of the function called to check the attribute of the caller and - // callee and decide whether inlining should be allowed. The function's - // signature must match "bool(const Function&, const Function &)", where the - // first parameter is the reference to the caller and the second parameter is - // the reference to the callee. It must return false if the attributes of the - // caller and callee are incompatible, and true otherwise. + // The function's signature must match "bool(const Function&, const + // Function&)", where the first parameter is the reference to the caller and + // the second parameter is the reference to the callee. It must return false + // if the attributes of the caller and callee are incompatible, and true + // otherwise. string CompatFunc = F; + string AttrName = ""; +} + +class CompatRuleStrAttr<string F, string Attr> : CompatRule<F> { + // The checker function is extended with an third argument as the function + // attribute string "bool(const Function&, const Function&, const StringRef&)". + string AttrName = Attr; } def : CompatRule<"isEqual<SanitizeAddressAttr>">; @@ -359,7 +371,9 @@ def : CompatRule<"isEqual<ShadowCallStackAttr>">; def : CompatRule<"isEqual<UseSampleProfileAttr>">; def : CompatRule<"isEqual<NoProfileAttr>">; def : CompatRule<"checkDenormMode">; - +def : CompatRuleStrAttr<"isEqual", "sign-return-address">; +def : CompatRuleStrAttr<"isEqual", "sign-return-address-key">; +def : CompatRuleStrAttr<"isEqual", "branch-protection-pauth-lr">; class MergeRule<string F> { // The name of the function called to merge the attributes of the caller and diff --git a/contrib/llvm-project/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/contrib/llvm-project/llvm/include/llvm/TargetParser/AArch64TargetParser.h index 6d82748d8004..c10f92e28717 100644 --- a/contrib/llvm-project/llvm/include/llvm/TargetParser/AArch64TargetParser.h +++ b/contrib/llvm-project/llvm/include/llvm/TargetParser/AArch64TargetParser.h @@ -478,7 +478,7 @@ inline constexpr ArchInfo ARMV8_1A = { VersionTuple{8, 1}, AProfile, "armv8.1-a inline constexpr ArchInfo ARMV8_2A = { VersionTuple{8, 2}, AProfile, "armv8.2-a", "+v8.2a", (ARMV8_1A.DefaultExts | AArch64::ExtensionBitset({AArch64::AEK_RAS}))}; inline constexpr ArchInfo ARMV8_3A = { VersionTuple{8, 3}, AProfile, "armv8.3-a", "+v8.3a", (ARMV8_2A.DefaultExts | - AArch64::ExtensionBitset({AArch64::AEK_RCPC, AArch64::AEK_JSCVT, AArch64::AEK_FCMA}))}; + AArch64::ExtensionBitset({AArch64::AEK_FCMA, AArch64::AEK_JSCVT, AArch64::AEK_PAUTH, AArch64::AEK_RCPC}))}; inline constexpr ArchInfo ARMV8_4A = { VersionTuple{8, 4}, AProfile, "armv8.4-a", "+v8.4a", (ARMV8_3A.DefaultExts | AArch64::ExtensionBitset({AArch64::AEK_DOTPROD}))}; inline constexpr ArchInfo ARMV8_5A = { VersionTuple{8, 5}, AProfile, "armv8.5-a", "+v8.5a", (ARMV8_4A.DefaultExts)}; @@ -805,6 +805,12 @@ inline constexpr CpuInfo CpuInfos[] = { {AArch64::AEK_FP16, AArch64::AEK_RAND, AArch64::AEK_SM4, AArch64::AEK_SHA3, AArch64::AEK_SHA2, AArch64::AEK_AES, AArch64::AEK_MTE, AArch64::AEK_SB, AArch64::AEK_SSBS}))}, + {"ampere1b", ARMV8_7A, + (AArch64::ExtensionBitset({AArch64::AEK_FP16, AArch64::AEK_RAND, + AArch64::AEK_SM4, AArch64::AEK_SHA3, + AArch64::AEK_SHA2, AArch64::AEK_AES, + AArch64::AEK_MTE, AArch64::AEK_SB, + AArch64::AEK_SSBS, AArch64::AEK_CSSC}))}, }; // An alias for a CPU. diff --git a/contrib/llvm-project/llvm/lib/Analysis/Loads.cpp b/contrib/llvm-project/llvm/lib/Analysis/Loads.cpp index 6bf0d2f56eb4..5916d2ab48ec 100644 --- a/contrib/llvm-project/llvm/lib/Analysis/Loads.cpp +++ b/contrib/llvm-project/llvm/lib/Analysis/Loads.cpp @@ -364,7 +364,7 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Align Alignment, APInt &Size, if (Size.getBitWidth() > 64) return false; - const uint64_t LoadSize = Size.getZExtValue(); + const TypeSize LoadSize = TypeSize::getFixed(Size.getZExtValue()); // Otherwise, be a little bit aggressive by scanning the local block where we // want to check to see if the pointer is already being loaded or stored @@ -414,11 +414,11 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Align Alignment, APInt &Size, // Handle trivial cases. if (AccessedPtr == V && - LoadSize <= DL.getTypeStoreSize(AccessedTy)) + TypeSize::isKnownLE(LoadSize, DL.getTypeStoreSize(AccessedTy))) return true; if (AreEquivalentAddressValues(AccessedPtr->stripPointerCasts(), V) && - LoadSize <= DL.getTypeStoreSize(AccessedTy)) + TypeSize::isKnownLE(LoadSize, DL.getTypeStoreSize(AccessedTy))) return true; } return false; diff --git a/contrib/llvm-project/llvm/lib/IR/Attributes.cpp b/contrib/llvm-project/llvm/lib/IR/Attributes.cpp index fd5160209506..19076771ff2e 100644 --- a/contrib/llvm-project/llvm/lib/IR/Attributes.cpp +++ b/contrib/llvm-project/llvm/lib/IR/Attributes.cpp @@ -2045,6 +2045,11 @@ static bool isEqual(const Function &Caller, const Function &Callee) { Callee.getFnAttribute(AttrClass::getKind()); } +static bool isEqual(const Function &Caller, const Function &Callee, + const StringRef &AttrName) { + return Caller.getFnAttribute(AttrName) == Callee.getFnAttribute(AttrName); +} + /// Compute the logical AND of the attributes of the caller and the /// callee. /// diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td index 36700f73df4b..feabd137c0cf 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64.td @@ -837,6 +837,7 @@ include "AArch64SchedA64FX.td" include "AArch64SchedThunderX3T110.td" include "AArch64SchedTSV110.td" include "AArch64SchedAmpere1.td" +include "AArch64SchedAmpere1B.td" include "AArch64SchedNeoverseN1.td" include "AArch64SchedNeoverseN2.td" include "AArch64SchedNeoverseV1.td" @@ -1376,6 +1377,24 @@ def TuneAmpere1A : SubtargetFeature<"ampere1a", "ARMProcFamily", "Ampere1A", FeatureLdpAlignedOnly, FeatureStpAlignedOnly]>; +def TuneAmpere1B : SubtargetFeature<"ampere1b", "ARMProcFamily", "Ampere1B", + "Ampere Computing Ampere-1B processors", [ + FeaturePostRAScheduler, + FeatureFuseAES, + FeatureFuseAdrpAdd, + FeatureAddrLSLFast, + FeatureALULSLFast, + FeatureAggressiveFMA, + FeatureArithmeticBccFusion, + FeatureCmpBccFusion, + FeatureFuseAddress, + FeatureFuseLiterals, + FeatureStorePairSuppress, + FeatureEnableSelectOptimize, + FeaturePredictableSelectIsExpensive, + FeatureLdpAlignedOnly, + FeatureStpAlignedOnly]>; + def ProcessorFeatures { list<SubtargetFeature> A53 = [HasV8_0aOps, FeatureCRC, FeatureCrypto, FeatureFPARMv8, FeatureNEON, FeaturePerfMon]; @@ -1529,6 +1548,11 @@ def ProcessorFeatures { FeatureMTE, FeatureSSBS, FeatureRandGen, FeatureSB, FeatureSM4, FeatureSHA2, FeatureSHA3, FeatureAES]; + list<SubtargetFeature> Ampere1B = [HasV8_7aOps, FeatureNEON, FeaturePerfMon, + FeatureMTE, FeatureSSBS, FeatureRandGen, + FeatureSB, FeatureSM4, FeatureSHA2, + FeatureSHA3, FeatureAES, FeatureCSSC, + FeatureWFxT, FeatureFullFP16]; // ETE and TRBE are future architecture extensions. We temporarily enable them // by default for users targeting generic AArch64. The extensions do not @@ -1696,6 +1720,9 @@ def : ProcessorModel<"ampere1", Ampere1Model, ProcessorFeatures.Ampere1, def : ProcessorModel<"ampere1a", Ampere1Model, ProcessorFeatures.Ampere1A, [TuneAmpere1A]>; +def : ProcessorModel<"ampere1b", Ampere1BModel, ProcessorFeatures.Ampere1B, + [TuneAmpere1B]>; + //===----------------------------------------------------------------------===// // Assembly parser //===----------------------------------------------------------------------===// diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA53.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA53.td index 3e4168f5f445..c714bad92b7f 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA53.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA53.td @@ -29,7 +29,7 @@ def CortexA53Model : SchedMachineModel { list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, PAUnsupported.F, SMEUnsupported.F, - [HasMTE]); + [HasMTE, HasCSSC]); } diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA57.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA57.td index 277ec772cf0f..ebbc3b72b506 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA57.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA57.td @@ -34,7 +34,7 @@ def CortexA57Model : SchedMachineModel { list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, PAUnsupported.F, SMEUnsupported.F, - [HasMTE]); + [HasMTE, HasCSSC]); } //===----------------------------------------------------------------------===// diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA64FX.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA64FX.td index 7edce4b61605..d6fe84a2c9c9 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA64FX.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA64FX.td @@ -22,7 +22,8 @@ def A64FXModel : SchedMachineModel { list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F, SVEUnsupported.F, [HasMTE, HasMatMulInt8, HasBF16, - HasPAuth, HasPAuthLR, HasCPA]); + HasPAuth, HasPAuthLR, HasCPA, + HasCSSC]); let FullInstRWOverlapCheck = 0; } diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedAmpere1B.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedAmpere1B.td new file mode 100644 index 000000000000..9c4f000cf351 --- /dev/null +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedAmpere1B.td @@ -0,0 +1,1149 @@ +//=- AArch64SchedAmpere1B.td - Ampere-1B scheduling def -----*- tablegen -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for the Ampere Computing Ampere-1B to +// support instruction scheduling and other instruction cost heuristics. +// +//===----------------------------------------------------------------------===// + +// The Ampere-1B core is an out-of-order micro-architecture. The front +// end has branch prediction, with a 10-cycle recovery time from a +// mispredicted branch. Instructions coming out of the front end are +// decoded into internal micro-ops (uops). + +def Ampere1BModel : SchedMachineModel { + let IssueWidth = 12; // Maximum micro-ops dispatch rate. + let MicroOpBufferSize = 192; // micro-op re-order buffer size + let LoadLatency = 3; // Optimistic load latency + let MispredictPenalty = 10; // Branch mispredict penalty + let LoopMicroOpBufferSize = 32; // Instruction queue size + let CompleteModel = 1; + + list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, + SMEUnsupported.F, + PAUnsupported.F); +} + +let SchedModel = Ampere1BModel in { + +//===----------------------------------------------------------------------===// +// Define each kind of processor resource and number available on Ampere-1B. + +def Ampere1BUnitA : ProcResource<2>; // integer single-cycle, branch, and flags r/w +def Ampere1BUnitB : ProcResource<2>; // integer single-cycle, and complex shifts +def Ampere1BUnitBS : ProcResource<1>; // integer multi-cycle +def Ampere1BUnitL : ProcResource<2>; // load +def Ampere1BUnitS : ProcResource<2>; // store address calculation +def Ampere1BUnitX : ProcResource<1>; // FP and vector operations, and flag write +def Ampere1BUnitY : ProcResource<1>; // FP and vector operations, and crypto +def Ampere1BUnitZ : ProcResource<1>; // FP store data and FP-to-integer moves + +def Ampere1BUnitAB : ProcResGroup<[Ampere1BUnitA, Ampere1BUnitB]>; +def Ampere1BUnitXY : ProcResGroup<[Ampere1BUnitX, Ampere1BUnitY]>; + +//===----------------------------------------------------------------------===// +// Define customized scheduler read/write types specific to the Ampere-1. + +def Ampere1BWrite_1cyc_1A : SchedWriteRes<[Ampere1BUnitA]> { + let Latency = 1; + let NumMicroOps = 1; +} + +def Ampere1BWrite_1cyc_2A : SchedWriteRes<[Ampere1BUnitA, Ampere1BUnitA]> { + let Latency = 1; + let NumMicroOps = 2; +} + +def Ampere1BWrite_1cyc_1B : SchedWriteRes<[Ampere1BUnitB]> { + let Latency = 1; + let NumMicroOps = 1; +} + +def Ampere1BWrite_1cyc_1BS : SchedWriteRes<[Ampere1BUnitBS]> { + let Latency = 1; + let NumMicroOps = 1; +} + +def Ampere1BWrite_1cyc_1BS_1B : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitB]> { + let Latency = 1; + let NumMicroOps = 2; +} + +def Ampere1BWrite_1cyc_1AB : SchedWriteRes<[Ampere1BUnitAB]> { + let Latency = 1; + let NumMicroOps = 1; +} + +def Ampere1BWrite_1cyc_1AB_1A : SchedWriteRes<[Ampere1BUnitAB, Ampere1BUnitA]> { + let Latency = 1; + let NumMicroOps = 2; +} + +def Ampere1BWrite_1cyc_1L : SchedWriteRes<[Ampere1BUnitL]> { + let Latency = 1; + let NumMicroOps = 1; +} + +def Ampere1BWrite_1cyc_1S : SchedWriteRes<[Ampere1BUnitS]> { + let Latency = 1; + let NumMicroOps = 1; +} + +def Ampere1BWrite_1cyc_2S : SchedWriteRes<[Ampere1BUnitS, Ampere1BUnitS]> { + let Latency = 1; + let NumMicroOps = 2; +} + +def Ampere1BWrite_2cyc_1Y : SchedWriteRes<[Ampere1BUnitY]> { + let Latency = 2; + let NumMicroOps = 1; +} + +def Ampere1BWrite_2cyc_2AB : SchedWriteRes<[Ampere1BUnitAB, Ampere1BUnitAB]> { + let Latency = 2; + let NumMicroOps = 2; +} + +def Ampere1BWrite_2cyc_1B_1AB : SchedWriteRes<[Ampere1BUnitB, Ampere1BUnitAB]> { + let Latency = 2; + let NumMicroOps = 2; +} + +def Ampere1BWrite_2cyc_1B_1S : SchedWriteRes<[Ampere1BUnitB, Ampere1BUnitS]> { + let Latency = 2; + let NumMicroOps = 2; +} + +def Ampere1BWrite_2cyc_1B_1S_1AB : SchedWriteRes<[Ampere1BUnitB, + Ampere1BUnitS, + Ampere1BUnitAB]> { + let Latency = 2; + let NumMicroOps = 3; +} + +def Ampere1BWrite_2cyc_1S_2Z : SchedWriteRes<[Ampere1BUnitS, + Ampere1BUnitZ, + Ampere1BUnitZ]> { + let Latency = 2; + let NumMicroOps = 3; +} + +def Ampere1BWrite_2cyc_1XY : SchedWriteRes<[Ampere1BUnitXY]> { + let Latency = 2; + let NumMicroOps = 1; +} + +def Ampere1BWrite_2cyc_1S_1Z : SchedWriteRes<[Ampere1BUnitS, Ampere1BUnitZ]> { + let Latency = 2; + let NumMicroOps = 2; +} + +def Ampere1BWrite_3cyc_1BS : SchedWriteRes<[Ampere1BUnitBS]> { + let Latency = 3; + let NumMicroOps = 1; +} + +def Ampere1BWrite_3cyc_1L : SchedWriteRes<[Ampere1BUnitL]> { + let Latency = 3; + let NumMicroOps = 1; +} + +def Ampere1BWrite_3cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { + let Latency = 3; + let NumMicroOps = 1; +} + +def Ampere1BWrite_3cyc_1XY : SchedWriteRes<[Ampere1BUnitXY]> { + let Latency = 3; + let NumMicroOps = 1; +} + +def Ampere1BWrite_3cyc_1Z : SchedWriteRes<[Ampere1BUnitZ]> { + let Latency = 3; + let NumMicroOps = 1; +} + +def Ampere1BWrite_3cyc_1S_1Z : SchedWriteRes<[Ampere1BUnitS, + Ampere1BUnitZ]> { + let Latency = 3; + let NumMicroOps = 2; +} + +def Ampere1BWrite_3cyc_1S_2Z : SchedWriteRes<[Ampere1BUnitS, + Ampere1BUnitZ, Ampere1BUnitZ]> { + let Latency = 3; + let NumMicroOps = 3; +} + +def Ampere1BWrite_3cyc_2S_2Z : SchedWriteRes<[Ampere1BUnitS, Ampere1BUnitS, + Ampere1BUnitZ, Ampere1BUnitZ]> { + let Latency = 3; + let NumMicroOps = 4; +} + +def Ampere1BWrite_4cyc_1BS_1AB : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitAB]> { + let Latency = 4; + let NumMicroOps = 2; +} + +def Ampere1BWrite_4cyc_1L : SchedWriteRes<[Ampere1BUnitL]> { + let Latency = 4; + let NumMicroOps = 1; +} + +def Ampere1BWrite_4cyc_2L : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL]> { + let Latency = 4; + let NumMicroOps = 2; +} + +def Ampere1BWrite_4cyc_1L_1B : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitB]> { + let Latency = 4; + let NumMicroOps = 2; +} + +def Ampere1BWrite_4cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { + let Latency = 4; + let NumMicroOps = 1; +} + +def Ampere1BWrite_4cyc_1XY : SchedWriteRes<[Ampere1BUnitXY]> { + let Latency = 4; + let NumMicroOps = 1; +} + +def Ampere1BWrite_4cyc_2XY : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY]> { + let Latency = 4; + let NumMicroOps = 2; +} + +def Ampere1BWrite_5cyc_1BS : SchedWriteRes<[Ampere1BUnitBS]> { + let Latency = 5; + let NumMicroOps = 1; +} + +def Ampere1BWrite_4cyc_1XY_1S_1Z : SchedWriteRes<[Ampere1BUnitXY, + Ampere1BUnitS, + Ampere1BUnitZ]> { + let Latency = 4; + let NumMicroOps = 3; +} + +def Ampere1BWrite_4cyc_3S_3Z : SchedWriteRes<[Ampere1BUnitS, Ampere1BUnitS, + Ampere1BUnitS, Ampere1BUnitZ, + Ampere1BUnitZ, Ampere1BUnitZ]> { + let Latency = 4; + let NumMicroOps = 6; +} + +def Ampere1BWrite_5cyc_4S_4Z : SchedWriteRes<[Ampere1BUnitS, Ampere1BUnitS, + Ampere1BUnitS, Ampere1BUnitS, + Ampere1BUnitZ, Ampere1BUnitZ, + Ampere1BUnitZ, Ampere1BUnitZ]> { + let Latency = 5; + let NumMicroOps = 8; +} + +def Ampere1BWrite_5cyc_1L_1BS : SchedWriteRes<[Ampere1BUnitL, + Ampere1BUnitBS]> { + let Latency = 5; + let NumMicroOps = 2; +} + +def Ampere1BWrite_5cyc_3L : SchedWriteRes<[Ampere1BUnitL, + Ampere1BUnitL, + Ampere1BUnitL]> { + let Latency = 5; + let NumMicroOps = 3; +} + +def Ampere1BWrite_5cyc_4L : SchedWriteRes<[Ampere1BUnitL, + Ampere1BUnitL, + Ampere1BUnitL, + Ampere1BUnitL]> { + let Latency = 5; + let NumMicroOps = 4; +} + +def Ampere1BWrite_5cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { + let Latency = 5; + let NumMicroOps = 1; +} + +def Ampere1BWrite_5cyc_2XY_2S_2Z : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, + Ampere1BUnitS, Ampere1BUnitS, + Ampere1BUnitZ, Ampere1BUnitZ]> { + let Latency = 5; + let NumMicroOps = 6; +} + +def Ampere1BWrite_6cyc_1BS_1A : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitA]> { + let Latency = 6; + let NumMicroOps = 2; +} + +def Ampere1BWrite_6cyc_1BS_2A : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitA, + Ampere1BUnitA]> { + let Latency = 6; + let NumMicroOps = 3; +} + +def Ampere1BWrite_6cyc_1L_1XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitXY]> { + let Latency = 6; + let NumMicroOps = 2; +} + +def Ampere1BWrite_6cyc_2L_2XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL, + Ampere1BUnitXY, Ampere1BUnitXY]> { + let Latency = 6; + let NumMicroOps = 4; +} + +def Ampere1BWrite_6cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { + let Latency = 6; + let NumMicroOps = 2; +} + +def Ampere1BWrite_6cyc_2XY : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY]> { + let Latency = 6; + let NumMicroOps = 2; +} + +def Ampere1BWrite_6cyc_3XY : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, + Ampere1BUnitXY]> { + let Latency = 6; + let NumMicroOps = 3; +} + +def Ampere1BWrite_6cyc_2XY_2S_2Z : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, + Ampere1BUnitS, Ampere1BUnitS, + Ampere1BUnitZ, Ampere1BUnitZ]> { + let Latency = 6; + let NumMicroOps = 6; +} + +def Ampere1BWrite_6cyc_3XY_3S_3Z : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, Ampere1BUnitXY, + Ampere1BUnitS, Ampere1BUnitS, Ampere1BUnitS, + Ampere1BUnitZ, Ampere1BUnitZ, Ampere1BUnitZ]> { + let Latency = 6; + let NumMicroOps = 9; +} + +def Ampere1BWrite_7cyc_1BS_1XY : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitXY]> { + let Latency = 7; + let NumMicroOps = 2; +} + +def Ampere1BWrite_7cyc_1XY_1Z : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitZ]> { + let Latency = 7; + let NumMicroOps = 2; +} + +def Ampere1BWrite_7cyc_1X_1Z : SchedWriteRes<[Ampere1BUnitX, Ampere1BUnitZ]> { + let Latency = 7; + let NumMicroOps = 2; +} + +def Ampere1BWrite_7cyc_3L_3XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL, + Ampere1BUnitL, Ampere1BUnitXY, + Ampere1BUnitXY, Ampere1BUnitXY]> { + let Latency = 7; + let NumMicroOps = 6; +} + +def Ampere1BWrite_7cyc_4L_4XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL, + Ampere1BUnitL, Ampere1BUnitL, + Ampere1BUnitXY, Ampere1BUnitXY, + Ampere1BUnitXY, Ampere1BUnitXY]> { + let Latency = 7; + let NumMicroOps = 8; +} + +def Ampere1BWrite_7cyc_4XY_4S_4Z : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, + Ampere1BUnitXY, Ampere1BUnitXY, + Ampere1BUnitS, Ampere1BUnitS, + Ampere1BUnitS, Ampere1BUnitS, + Ampere1BUnitZ, Ampere1BUnitZ, + Ampere1BUnitZ, Ampere1BUnitZ]> { + let Latency = 7; + let NumMicroOps = 12; +} + +def Ampere1BWrite_8cyc_1BS_1L : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitL]> { + let Latency = 8; + let NumMicroOps = 2; +} + +def Ampere1BWrite_8cyc_1BS_1XY : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitXY]> { + let Latency = 8; + let NumMicroOps = 2; +} + +def Ampere1BWrite_8cyc_2L_3XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL, + Ampere1BUnitXY, Ampere1BUnitXY, + Ampere1BUnitXY]> { + let Latency = 8; + let NumMicroOps = 5; +} + +def Ampere1BWrite_8cyc_3L_3XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL, + Ampere1BUnitL, Ampere1BUnitXY, + Ampere1BUnitXY, Ampere1BUnitXY]> { + let Latency = 8; + let NumMicroOps = 6; +} + +def Ampere1BWrite_8cyc_4L_4XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL, + Ampere1BUnitL, Ampere1BUnitL, + Ampere1BUnitXY, Ampere1BUnitXY, + Ampere1BUnitXY, Ampere1BUnitXY]> { + let Latency = 8; + let NumMicroOps = 8; +} + +def Ampere1BWrite_8cyc_2XY : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY]> { + let Latency = 8; + let NumMicroOps = 2; +} + +def Ampere1BWrite_8cyc_4XY : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, + Ampere1BUnitXY, Ampere1BUnitXY]> { + let Latency = 8; + let NumMicroOps = 4; +} + +def Ampere1BWrite_9cyc_6XY_4S_4Z : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, + Ampere1BUnitXY, Ampere1BUnitXY, + Ampere1BUnitXY, Ampere1BUnitXY, + Ampere1BUnitS, Ampere1BUnitS, + Ampere1BUnitS, Ampere1BUnitS, + Ampere1BUnitZ, Ampere1BUnitZ, + Ampere1BUnitZ, Ampere1BUnitZ]> { + let Latency = 9; + let NumMicroOps = 14; +} + +def Ampere1BWrite_9cyc_1A_1BS_1X : SchedWriteRes<[Ampere1BUnitA, Ampere1BUnitBS, Ampere1BUnitX]> { + let Latency = 9; + let NumMicroOps = 3; +} + +def Ampere1BWrite_9cyc_1A_1BS_1XY : SchedWriteRes<[Ampere1BUnitA, Ampere1BUnitBS, Ampere1BUnitXY]> { + let Latency = 9; + let NumMicroOps = 3; +} + +def Ampere1BWrite_9cyc_3L_3XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL, + Ampere1BUnitL, Ampere1BUnitXY, + Ampere1BUnitXY, Ampere1BUnitXY]> { + let Latency = 9; + let NumMicroOps = 6; +} + +def Ampere1BWrite_9cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { + let Latency = 9; + let NumMicroOps = 1; +} + +def Ampere1BWrite_9cyc_3XY : SchedWriteRes<[Ampere1BUnitXY, Ampere1BUnitXY, Ampere1BUnitXY]> { + let Latency = 9; + let NumMicroOps = 3; +} + +def Ampere1BWrite_10cyc_4L_8XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL, + Ampere1BUnitL, Ampere1BUnitL, + Ampere1BUnitXY, Ampere1BUnitXY, + Ampere1BUnitXY, Ampere1BUnitXY]> { + let Latency = 10; + let NumMicroOps = 12; +} + +def Ampere1BWrite_11cyc_1BS_2XY : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitXY, Ampere1BUnitXY]> { + let Latency = 11; + let NumMicroOps = 3; +} + +def Ampere1BWrite_11cyc_4L_8XY : SchedWriteRes<[Ampere1BUnitL, Ampere1BUnitL, + Ampere1BUnitL, Ampere1BUnitL, + Ampere1BUnitXY, Ampere1BUnitXY, + Ampere1BUnitXY, Ampere1BUnitXY]> { + let Latency = 11; + let NumMicroOps = 12; +} + +def Ampere1BWrite_12cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { + let Latency = 12; + let NumMicroOps = 1; +} + +def Ampere1BWrite_13cyc_1BS_1X : SchedWriteRes<[Ampere1BUnitBS, Ampere1BUnitX]> { + let Latency = 13; + let NumMicroOps = 2; +} + +def Ampere1BWrite_17cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { + let Latency = 17; + let NumMicroOps = 1; +} + +def Ampere1BWrite_19cyc_2BS_1X : SchedWriteRes<[Ampere1BUnitBS, + Ampere1BUnitBS, + Ampere1BUnitX]> { + let Latency = 13; + let NumMicroOps = 3; +} + +def Ampere1BWrite_19cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { + let Latency = 19; + let NumMicroOps = 1; +} + +def Ampere1BWrite_21cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { + let Latency = 21; + let NumMicroOps = 1; +} + +def Ampere1BWrite_33cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { + let Latency = 33; + let NumMicroOps = 1; +} + +def Ampere1BWrite_39cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { + let Latency = 39; + let NumMicroOps = 1; +} + +def Ampere1BWrite_63cyc_1X : SchedWriteRes<[Ampere1BUnitX]> { + let Latency = 63; + let NumMicroOps = 1; +} + +// For basic arithmetic, we have more flexibility for short shifts (LSL shift <= 4), +// which are a single uop, and for extended registers, which have full flexibility +// across Unit A or B for both uops. +def Ampere1BWrite_Arith : SchedWriteVariant<[ + SchedVar<RegExtendedPred, [Ampere1BWrite_2cyc_2AB]>, + SchedVar<IsCheapLSL, [Ampere1BWrite_1cyc_1AB]>, + SchedVar<NoSchedPred, [Ampere1BWrite_2cyc_1B_1AB]>]>; + +def Ampere1BWrite_ArithFlagsetting : SchedWriteVariant<[ + SchedVar<RegExtendedPred, [Ampere1BWrite_2cyc_2AB]>, + SchedVar<IsCheapLSL, [Ampere1BWrite_1cyc_1AB]>, + SchedVar<NoSchedPred, [Ampere1BWrite_2cyc_1B_1AB]>]>; + +//===----------------------------------------------------------------------===// +// Map the target-defined scheduler read/write resources and latencies for Ampere-1. +// This provides a coarse model, which is then specialised below. + +def : WriteRes<WriteImm, [Ampere1BUnitAB]>; // MOVN, MOVZ +def : WriteRes<WriteI, [Ampere1BUnitAB]>; // ALU +def : WriteRes<WriteISReg, [Ampere1BUnitB, Ampere1BUnitAB]> { + let Latency = 2; + let NumMicroOps = 2; +} // ALU of Shifted-Reg +def : WriteRes<WriteIEReg, [Ampere1BUnitAB, Ampere1BUnitAB]> { + let Latency = 2; + let NumMicroOps = 2; +} // ALU of Extended-Reg +def : WriteRes<WriteExtr, [Ampere1BUnitB]>; // EXTR shifts a reg pair +def : WriteRes<WriteIS, [Ampere1BUnitB]>; // Shift/Scale +def : WriteRes<WriteID32, [Ampere1BUnitBS, Ampere1BUnitX]> { + let Latency = 13; +} // 32-bit Divide +def : WriteRes<WriteID64, [Ampere1BUnitBS, Ampere1BUnitX]> { + let Latency = 19; +} // 64-bit Divide +def : WriteRes<WriteIM32, [Ampere1BUnitBS]> { + let Latency = 3; +} // 32-bit Multiply +def : WriteRes<WriteIM64, [Ampere1BUnitBS, Ampere1BUnitAB]> { + let Latency = 3; +} // 64-bit Multiply +def : WriteRes<WriteBr, [Ampere1BUnitA]>; +def : WriteRes<WriteBrReg, [Ampere1BUnitA, Ampere1BUnitA]>; +def : WriteRes<WriteLD, [Ampere1BUnitL]> { + let Latency = 3; +} // Load from base addr plus immediate offset +def : WriteRes<WriteST, [Ampere1BUnitS]> { + let Latency = 1; +} // Store to base addr plus immediate offset +def : WriteRes<WriteSTP, [Ampere1BUnitS, Ampere1BUnitS]> { + let Latency = 1; + let NumMicroOps = 1; +} // Store a register pair. +def : WriteRes<WriteAdr, [Ampere1BUnitAB]>; +def : WriteRes<WriteLDIdx, [Ampere1BUnitAB, Ampere1BUnitS]> { + let Latency = 3; + let NumMicroOps = 1; +} // Load from a register index (maybe scaled). +def : WriteRes<WriteSTIdx, [Ampere1BUnitS, Ampere1BUnitS]> { + let Latency = 1; + let NumMicroOps = 2; +} // Store to a register index (maybe scaled). +def : WriteRes<WriteF, [Ampere1BUnitXY]> { + let Latency = 2; +} // General floating-point ops. +def : WriteRes<WriteFCmp, [Ampere1BUnitX]> { + let Latency = 3; +} // Floating-point compare. +def : WriteRes<WriteFCvt, [Ampere1BUnitXY]> { + let Latency = 3; +} // Float conversion. +def : WriteRes<WriteFCopy, [Ampere1BUnitXY]> { +} // Float-int register copy. +def : WriteRes<WriteFImm, [Ampere1BUnitXY]> { + let Latency = 2; +} // Float-int register copy. +def : WriteRes<WriteFMul, [Ampere1BUnitXY]> { + let Latency = 4; +} // Floating-point multiply. +def : WriteRes<WriteFDiv, [Ampere1BUnitXY]> { + let Latency = 19; +} // Floating-point division. +def : WriteRes<WriteVd, [Ampere1BUnitXY]> { + let Latency = 3; +} // 64bit Vector D ops. +def : WriteRes<WriteVq, [Ampere1BUnitXY]> { + let Latency = 3; +} // 128bit Vector Q ops. +def : WriteRes<WriteVLD, [Ampere1BUnitL, Ampere1BUnitL]> { + let Latency = 4; +} // Vector loads. +def : WriteRes<WriteVST, [Ampere1BUnitS, Ampere1BUnitZ]> { + let Latency = 2; +} // Vector stores. + +def : WriteRes<WriteAtomic, []> { let Unsupported = 1; } + +def : WriteRes<WriteSys, []> { let Latency = 1; } +def : WriteRes<WriteBarrier, []> { let Latency = 1; } +def : WriteRes<WriteHint, []> { let Latency = 1; } + +def : WriteRes<WriteLDHi, []> { + let Latency = 3; +} // The second register of a load-pair: LDP,LDPSW,LDNP,LDXP,LDAXP + +// Forwarding logic. +def : ReadAdvance<ReadI, 0>; +def : ReadAdvance<ReadISReg, 0>; +def : ReadAdvance<ReadIEReg, 0>; +def : ReadAdvance<ReadIM, 0>; +def : ReadAdvance<ReadIMA, 1, [WriteIM32, WriteIM64]>; +def : ReadAdvance<ReadID, 0>; +def : ReadAdvance<ReadExtrHi, 0>; +def : ReadAdvance<ReadST, 0>; +def : ReadAdvance<ReadAdrBase, 0>; +def : ReadAdvance<ReadVLD, 0>; + +//===----------------------------------------------------------------------===// +// Specialising the scheduling model further for Ampere-1B. + +def : InstRW<[Ampere1BWrite_1cyc_1AB], (instrs COPY)>; + +// Branch instructions +def : InstRW<[Ampere1BWrite_1cyc_1A], (instrs Bcc, BL, RET)>; +def : InstRW<[Ampere1BWrite_1cyc_1A], + (instrs CBZW, CBZX, CBNZW, CBNZX, TBZW, TBZX, TBNZW, TBNZX)>; +def : InstRW<[Ampere1BWrite_1cyc_2A], (instrs BLR)>; + +// Common Short Sequence Compression (CSSC) +def : InstRW<[Ampere1BWrite_1cyc_1AB], (instregex "^ABS[WX]")>; +def : InstRW<[Ampere1BWrite_3cyc_1BS], (instregex "^CNT[WX]")>; +def : InstRW<[Ampere1BWrite_1cyc_1B], (instregex "^CTZ[WX]")>; +def : InstRW<[Ampere1BWrite_1cyc_1AB_1A], (instregex "^[SU](MAX|MIN)[WX]")>; + +// Cryptography instructions +// -- AES encryption/decryption +def : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^AES[DE]")>; +def : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^AESI?MC")>; +// -- Polynomial multiplication +def : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^PMUL", "^PMULL")>; +// -- SHA-256 hash +def : InstRW<[Ampere1BWrite_4cyc_1X], (instregex "^SHA256(H|H2)")>; +// -- SHA-256 schedule update +def : InstRW<[Ampere1BWrite_2cyc_1Y], (instregex "^SHA256SU[01]")>; +// -- SHA-3 instructions +def : InstRW<[Ampere1BWrite_2cyc_1XY], + (instregex "^BCAX", "^EOR3", "^RAX1", "^XAR")>; +// -- SHA-512 hash +def : InstRW<[Ampere1BWrite_4cyc_1X], (instregex "^SHA512(H|H2)")>; +// -- SHA-512 schedule update +def : InstRW<[Ampere1BWrite_2cyc_1Y], (instregex "^SHA512SU[01]")>; +// -- SHA1 choose/majority/parity +def : InstRW<[Ampere1BWrite_4cyc_1X], (instregex "^SHA1[CMP]")>; +// -- SHA1 hash/schedule update +def : InstRW<[Ampere1BWrite_2cyc_1Y], (instregex "^SHA1SU[01]")>; +def : InstRW<[Ampere1BWrite_2cyc_1Y], (instregex "^SHA1H")>; +// -- SM3 hash +def : InstRW<[Ampere1BWrite_2cyc_1XY], + (instregex "^SM3PARTW[12]$", "^SM3SS1$", "^SM3TT[12][AB]$")>; +def : InstRW<[Ampere1BWrite_4cyc_1X], (instrs SM4E, SM4ENCKEY)>; + +// FP and vector load instructions +// -- Load 1-element structure to one/all lanes +// ---- all lanes +def : InstRW<[Ampere1BWrite_6cyc_1L_1XY], + (instregex "^LD1Rv(8b|4h|2s|16b|8h|4s|2d)")>; +// ---- one lane +def : InstRW<[Ampere1BWrite_6cyc_1L_1XY], + (instregex "^LD1i(8|16|32|64)")>; +// -- Load 1-element structure to one/all lanes, 1D size +def : InstRW<[Ampere1BWrite_4cyc_1L], + (instregex "^LD1Rv1d")>; +// -- Load 1-element structures to 1 register +def : InstRW<[Ampere1BWrite_4cyc_1L], + (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)")>; +// -- Load 1-element structures to 2 registers +def : InstRW<[Ampere1BWrite_4cyc_2L], + (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)")>; +// -- Load 1-element structures to 3 registers +def : InstRW<[Ampere1BWrite_5cyc_3L], + (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)")>; +// -- Load 1-element structures to 4 registers +def : InstRW<[Ampere1BWrite_5cyc_4L], + (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)")>; +// -- Load 2-element structure to all lanes of 2 registers, 1D size +def : InstRW<[Ampere1BWrite_4cyc_2L], + (instregex "^LD2Rv1d")>; +// -- Load 2-element structure to all lanes of 2 registers, other sizes +def : InstRW<[Ampere1BWrite_6cyc_2L_2XY], + (instregex "^LD2Rv(8b|4h|2s|16b|8h|4s|2d)")>; +// -- Load 2-element structure to one lane of 2 registers +def : InstRW<[Ampere1BWrite_6cyc_2L_2XY], + (instregex "^LD2i(8|16|32|64)")>; +// -- Load 2-element structures to 2 registers, 16B/8H/4S/2D size +def : InstRW<[Ampere1BWrite_6cyc_2L_2XY], + (instregex "^LD2Twov(16b|8h|4s|2d)")>; +// -- Load 2-element structures to 2 registers, 8B/4H/2S size +def : InstRW<[Ampere1BWrite_8cyc_2L_3XY], + (instregex "^LD2Twov(8b|4h|2s)")>; +// -- Load 3-element structure to all lanes of 3 registers, 1D size +def : InstRW<[Ampere1BWrite_5cyc_3L], + (instregex "^LD3Rv1d")>; +// -- Load 3-element structure to all lanes of 3 registers, other sizes +def : InstRW<[Ampere1BWrite_7cyc_3L_3XY], + (instregex "^LD3Rv(8b|4h|2s|16b|8h|4s|2d)")>; +// -- Load 3-element structure to one lane of 3 registers +def : InstRW<[Ampere1BWrite_7cyc_3L_3XY], + (instregex "^LD3i(8|16|32|64)")>; +// -- Load 3-element structures to 3 registers, 16B/8H/4S sizes +def : InstRW<[Ampere1BWrite_8cyc_3L_3XY], + (instregex "^LD3Threev(16b|8h|4s)")>; +// -- Load 3-element structures to 3 registers, 2D size +def : InstRW<[Ampere1BWrite_7cyc_3L_3XY], + (instregex "^LD3Threev2d")>; +// -- Load 3-element structures to 3 registers, 8B/4H/2S sizes +def : InstRW<[Ampere1BWrite_9cyc_3L_3XY], + (instregex "^LD3Threev(8b|4h|2s)")>; +// -- Load 4-element structure to all lanes of 4 registers, 1D size +def : InstRW<[Ampere1BWrite_5cyc_4L], + (instregex "^LD4Rv1d")>; +// -- Load 4-element structure to all lanes of 4 registers, other sizes +def : InstRW<[Ampere1BWrite_7cyc_4L_4XY], + (instregex "^LD4Rv(8b|4h|2s|16b|8h|4s|2d)")>; +// -- Load 4-element structure to one lane of 4 registers +def : InstRW<[Ampere1BWrite_7cyc_4L_4XY], + (instregex "^LD4i(8|16|32|64)")>; +// -- Load 4-element structures to 4 registers, 2D size +def : InstRW<[Ampere1BWrite_8cyc_4L_4XY], + (instregex "^LD4Fourv2d")>; +// -- Load 4-element structures to 4 registers, 2S size +def : InstRW<[Ampere1BWrite_11cyc_4L_8XY], + (instregex "^LD4Fourv2s")>; +// -- Load 4-element structures to 4 registers, other sizes +def : InstRW<[Ampere1BWrite_10cyc_4L_8XY], + (instregex "^LD4Fourv(8b|4h|16b|8h|4s)")>; +// -- Load pair, Q-form +def : InstRW<[Ampere1BWrite_4cyc_2L], (instregex "LDN?PQ")>; +// -- Load pair, S/D-form +def : InstRW<[Ampere1BWrite_5cyc_1L_1BS], (instregex "LDN?P(S|D)")>; +// -- Load register +def : InstRW<[Ampere1BWrite_4cyc_1L], (instregex "LDU?R[BHSDQ]i")>; +// -- Load register, sign-extended register +def : InstRW<[Ampere1BWrite_4cyc_1L], (instregex "LDR[BHSDQ]ro(W|X)")>; + +// FP and vector store instructions +// -- Store 1-element structure from one lane of 1 register +def : InstRW<[Ampere1BWrite_4cyc_1XY_1S_1Z], + (instregex "^ST1i(8|16|32|64)")>; +// -- Store 1-element structures from 1 register +def : InstRW<[Ampere1BWrite_2cyc_1S_1Z], + (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)")>; +// -- Store 1-element structures from 2 registers +def : InstRW<[Ampere1BWrite_3cyc_2S_2Z], + (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)")>; +// -- Store 1-element structures from 3 registers +def : InstRW<[Ampere1BWrite_4cyc_3S_3Z], + (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)")>; +// -- Store 1-element structures from 4 registers +def : InstRW<[Ampere1BWrite_5cyc_4S_4Z], + (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)")>; +// -- Store 2-element structure from one lane of 2 registers +def : InstRW<[Ampere1BWrite_5cyc_2XY_2S_2Z], + (instregex "^ST2i(8|16|32|64)")>; +// -- Store 2-element structures from 2 registers, 16B/8H/4S/2D sizes +def : InstRW<[Ampere1BWrite_5cyc_2XY_2S_2Z], + (instregex "^ST2Twov(16b|8h|4s|2d)")>; +// -- Store 2-element structures from 2 registers, 8B/4H/2S sizes +def : InstRW<[Ampere1BWrite_6cyc_2XY_2S_2Z], + (instregex "^ST2Twov(8b|4h|2s)")>; +// -- Store 3-element structure from one lane of 3 registers +def : InstRW<[Ampere1BWrite_6cyc_3XY_3S_3Z], + (instregex "^ST3i(8|16|32|64)")>; +// -- Store 3-element structures from 3 registers +def : InstRW<[Ampere1BWrite_6cyc_3XY_3S_3Z], + (instregex "^ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)")>; +// -- Store 4-element structure from one lane of 4 registers +def : InstRW<[Ampere1BWrite_7cyc_4XY_4S_4Z], + (instregex "^ST4i(8|16|32|64)")>; +// -- Store 4-element structures from 4 registers, 16B/8H/4S sizes +def : InstRW<[Ampere1BWrite_7cyc_4XY_4S_4Z], + (instregex "^ST4Fourv(16b|8h|4s)")>; +// -- Store 4-element structures from 4 registers, 2D sizes +def : InstRW<[Ampere1BWrite_7cyc_4XY_4S_4Z], + (instregex "^ST4Fourv2d")>; +// -- Store 4-element structures from 4 registers, 8B/4H/2S sizes +def : InstRW<[Ampere1BWrite_9cyc_6XY_4S_4Z], + (instregex "^ST4Fourv(8b|4h|2s)")>; +// -- Store pair, Q-form +def : InstRW<[Ampere1BWrite_3cyc_2S_2Z], (instregex "^STN?PQ")>; +// -- Store pair, S/D-form +def : InstRW<[Ampere1BWrite_3cyc_2S_2Z], (instregex "^STN?P[SD]")>; +// -- Store register +def : InstRW<[Ampere1BWrite_2cyc_1S_1Z], (instregex "^STU?R[BHSDQ](ui|i)")>; +// -- Store register, sign-extended register offset +def : InstRW<[Ampere1BWrite_2cyc_1S_1Z], (instregex "^STR[BHSDQ]ro[XW]")>; + +// FP data processing, bfloat16 format +def : InstRW<[Ampere1BWrite_3cyc_1XY], (instrs BFCVT)>; +def : InstRW<[Ampere1BWrite_8cyc_2XY], (instrs BFCVTN, BFCVTN2)>; +def : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^BFDOTv", "^BF16DOT")>; +def : InstRW<[Ampere1BWrite_3cyc_1XY], (instrs BFMMLA)>; +def : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^BFMLAL")>; + +// FP data processing, scalar/vector, half precision +def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^F(ABD|ABS)v.[fi]16")>; +def : InstRW<[Ampere1BWrite_3cyc_1XY], + (instregex "^F(ADD|ADDP|CADD|NEG|NMUL|SUB)v.[fi]16")>; +def : InstRW<[Ampere1BWrite_3cyc_1XY], + (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v.[fi]16")>; +def : InstRW<[Ampere1BWrite_3cyc_1XY], + (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)16")>; +def : InstRW<[Ampere1BWrite_3cyc_1X], + (instregex "^FCMPE?H")>; +def : InstRW<[Ampere1BWrite_9cyc_1A_1BS_1X], + (instregex "^FCCMPE?H")>; +def : InstRW<[Ampere1BWrite_9cyc_1A_1BS_1XY], + (instregex "^FCSELH")>; +def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FCVT[AMNPZ][SU]v.[if]16")>; +// Convert FP to integer, H-form +def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^[SUd]CVTFv.[fi]16")>; +// Convert to FP from GPR, H-form +def : InstRW<[Ampere1BWrite_8cyc_1BS_1XY], (instregex "^[SU]CVTF_ZPmZ_[DSH]toH$")>; +// Convert to FP from GPR, fixed-point, H-form +def : InstRW<[Ampere1BWrite_11cyc_1BS_2XY], (instregex "^[SU]CVTF[SU][WX]Hri$")>; +def : InstRW<[Ampere1BWrite_9cyc_1X], (instrs FDIVHrr)>; +def : InstRW<[Ampere1BWrite_17cyc_1X], (instregex "^FDIVv.[if]16")>; +def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^F(MAX|MIN)(NM)?P?v.[if]16")>; +def : InstRW<[Ampere1BWrite_6cyc_2XY], (instregex "^F(MAX|MIN)(NM)?Vv4[if]16")>; +def : InstRW<[Ampere1BWrite_9cyc_3XY], (instregex "^F(MAX|MIN)(NM)?Vv8[if]16")>; +def : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^FMULX?v.[if]16")>; +def : InstRW<[Ampere1BWrite_4cyc_1XY], (instrs FMULX16)>; +def : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^FN?M(ADD|SUB)[H]rrr")>; +def : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^FML[AS]v.[if]16")>; +def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FRECPXv.[if]16")>; +def : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^F(RECP|RSQRT)S16")>; +def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FRINT[AIMNPXZ]v.[if]16")>; +// FP square root, H-form +def : InstRW<[Ampere1BWrite_21cyc_1X], (instrs FSQRTHr)>; +// FP square root, vector-form, F16 +def : InstRW<[Ampere1BWrite_39cyc_1X], (instregex "^FSQRTv.f16")>; + +// FP data processing, scalar/vector, single/double precision +def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^F(ABD|ABS)v.[fi](32|64)")>; +def : InstRW<[Ampere1BWrite_3cyc_1XY], + (instregex "^F(ADD|ADDP|CADD|NEG|NMUL|SUB)v.[fi](32|64)")>; +def : InstRW<[Ampere1BWrite_3cyc_1XY], + (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v.[fi](32|64)")>; +def : InstRW<[Ampere1BWrite_3cyc_1XY], + (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)(32|64)")>; +def : InstRW<[Ampere1BWrite_3cyc_1X], + (instregex "^FCMPE?(S|D)")>; +def : InstRW<[Ampere1BWrite_9cyc_1A_1BS_1X], + (instregex "^FCCMPE?(S|D)")>; +def : InstRW<[Ampere1BWrite_9cyc_1A_1BS_1XY], + (instregex "^FCSEL(S|D)")>; +def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FCVT[AMNPZ][SU]v.[if](32|64)")>; +// Convert FP to integer, S/D-form +def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^[SUd]CVTFv.[fi](32|64)")>; +// Convert to FP from GPR, S/D-form +def : InstRW<[Ampere1BWrite_8cyc_1BS_1XY], (instregex "^[SU]CVTF_ZPmZ_[DSH]to[DS]$")>; +// Convert to FP from GPR, fixed-point, S/D-form +def : InstRW<[Ampere1BWrite_11cyc_1BS_2XY], (instregex "^[SU]CVTF[SU][WX][SD]ri$")>; +def : InstRW<[Ampere1BWrite_19cyc_1X], (instregex "^FDIVv.[if](64)", "FDIVD")>; +def : InstRW<[Ampere1BWrite_12cyc_1X], (instregex "^FDIVv.[if](32)", "FDIVS")>; +def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^F(MAX|MIN)(NM)?P?v.[if](32|64)")>; +def : InstRW<[Ampere1BWrite_6cyc_2XY], (instregex "^F(MAX|MIN)(NM)?Vv.[if](32|64)")>; +def : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^FMULX?v.[if](32|64)")>; +def : InstRW<[Ampere1BWrite_4cyc_1XY], (instrs FMULX32, FMULX64)>; +def : InstRW<[Ampere1BWrite_4cyc_1XY], (instrs FMULSrr, FNMULSrr)>; +def : InstRW<[Ampere1BWrite_4cyc_1XY], (instrs FMULDrr, FNMULDrr)>; +def : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^FN?M(ADD|SUB)[SD]rrr")>; +def : InstRW<[Ampere1BWrite_4cyc_1XY], (instregex "^FML[AS]v.[if](32|64)")>; +def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FRECPXv.[if](32|64)")>; +def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^F(RECP|RSQRT)S(32|64)")>; +def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FRINT[AIMNPXZ]v.[if](32|64)")>; +def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FRINT(32|64)")>; +def : InstRW<[Ampere1BWrite_63cyc_1X], (instregex "^FSQRTv.f64", "^FSQRTDr")>; +def : InstRW<[Ampere1BWrite_33cyc_1X], (instregex "^FSQRTv.f32", "^FSQRTSr")>; + +// FP miscellaneous instructions +def : InstRW<[Ampere1BWrite_7cyc_1XY_1Z], (instregex "^FCVT[AMNPZ][SU][SU][XW][HSD]r")>; +def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FCVT[HSD]Hr")>; +def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FCVT[HSD][SD]r")>; +def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FCVTLv")>; +def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^FCVT(N|XN)v")>; +def : InstRW<[Ampere1BWrite_7cyc_1X_1Z], (instrs FJCVTZS)>; +def : InstRW<[Ampere1BWrite_5cyc_1BS], (instregex "^FMOV[HSD][WX]r")>; +def : InstRW<[Ampere1BWrite_7cyc_1BS_1XY], (instregex "^FMOVDXHighr")>; +def : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^FMOV[HSD][ri]")>; +def : InstRW<[Ampere1BWrite_5cyc_1X], (instregex "^FMOVXDHighr")>; +def : InstRW<[Ampere1BWrite_3cyc_1Z], (instregex "^FMOV[WX][HSD]r")>; + +// Integer arithmetic and logical instructions +def : InstRW<[Ampere1BWrite_1cyc_1A], + (instregex "ADC(W|X)r", "SBC(W|X)r")>; +def : InstRW<[Ampere1BWrite_Arith], + (instregex "(ADD|AND|BIC|EON|EOR|ORN|ORR|SUB)[WX]r[sx]")>; +def : InstRW<[Ampere1BWrite_1cyc_1AB], + (instregex "(ADD|AND|BIC|EON|EOR|ORN|ORR|SUB)[WX]r[ri]")>; +def : InstRW<[Ampere1BWrite_ArithFlagsetting], + (instregex "(ADD|AND|BIC|SUB)S[WX]r[sx]")>; +def : InstRW<[Ampere1BWrite_1cyc_1A], + (instregex "(ADD|AND|BIC|SUB)S[WX]r[ri]")>; +def : InstRW<[Ampere1BWrite_1cyc_1A], + (instregex "(ADC|SBC)S[WX]r")>; +def : InstRW<[Ampere1BWrite_1cyc_1A], (instrs RMIF)>; +def : InstRW<[Ampere1BWrite_1cyc_1A], + (instregex "(CCMN|CCMP)(X|W)")>; +def : InstRW<[Ampere1BWrite_1cyc_1A], + (instregex "(CSEL|CSINC|CSINV|CSNEG)(X|W)")>; +def : InstRW<[Ampere1BWrite_13cyc_1BS_1X], (instrs SDIVWr, UDIVWr)>; +def : InstRW<[Ampere1BWrite_19cyc_2BS_1X], (instrs SDIVXr, UDIVXr)>; +def : InstRW<[Ampere1BWrite_3cyc_1BS], + (instregex "(S|U)MULHr")>; +def : InstRW<[Ampere1BWrite_4cyc_1BS_1AB], + (instregex "(S|U)?M(ADD|SUB)L?r")>; + +// Integer load instructions +def : InstRW<[Ampere1BWrite_3cyc_1L], + (instregex "(LDNP|LDP|LDPSW)(X|W)")>; +def : InstRW<[Ampere1BWrite_3cyc_1L], + (instregex "LDR(B|D|H|Q|S)ui")>; +def : InstRW<[Ampere1BWrite_3cyc_1L], + (instregex "LDR(D|Q|W|X)l")>; +def : InstRW<[Ampere1BWrite_3cyc_1L], + (instregex "LDTR(B|H|W|X)i")>; +def : InstRW<[Ampere1BWrite_3cyc_1L], + (instregex "LDTRS(BW|BX|HW|HX|W)i")>; +def : InstRW<[Ampere1BWrite_3cyc_1L], + (instregex "LDUR(BB|HH|X|W)i")>; +def : InstRW<[Ampere1BWrite_3cyc_1L], + (instregex "LDURS(BW|BX|HW|HX|W)i")>; +def : InstRW<[Ampere1BWrite_3cyc_1L], + (instregex "LDR(HH|SHW|SHX|W|X)ro(W|X)")>; +def : InstRW<[Ampere1BWrite_1cyc_1L], + (instrs PRFMl, PRFUMi, PRFUMi)>; +def : InstRW<[Ampere1BWrite_1cyc_1L], + (instrs PRFMroW, PRFMroX)>; + +// Integer miscellaneous instructions +def : InstRW<[Ampere1BWrite_1cyc_1A], (instrs ADR, ADRP)>; +def : InstRW<[Ampere1BWrite_1cyc_1B], (instregex "EXTR(W|X)")>; +def : InstRW<[Ampere1BWrite_1cyc_1B], (instregex "(S|U)?BFM(W|X)")>; +def : InstRW<[Ampere1BWrite_3cyc_1BS], (instregex "^CRC32C?[BHWX]")>; +def : InstRW<[Ampere1BWrite_1cyc_1B], (instregex "CLS(W|X)")>; +def : InstRW<[Ampere1BWrite_1cyc_1A], (instrs SETF8, SETF16)>; +def : InstRW<[Ampere1BWrite_1cyc_1AB], + (instrs MOVKWi, MOVKXi, MOVNWi, MOVNXi, MOVZWi, MOVZXi)>; +def : InstRW<[Ampere1BWrite_1cyc_1B], + (instregex "(RBIT|REV|REV16)(W|X)r", "REV32Xr")>; +def : InstRW<[Ampere1BWrite_1cyc_1B], + (instregex "(ASR|LSL|LSR|ROR)V(W|X)r")>; + +// Integer store instructions +def : InstRW<[Ampere1BWrite_1cyc_2S], (instregex "STNP(X|W)i")>; +def : InstRW<[Ampere1BWrite_1cyc_2S], (instrs STPXi)>; +def : InstRW<[Ampere1BWrite_2cyc_1B_1S], (instrs STPWi)>; +def : InstRW<[Ampere1BWrite_2cyc_1B_1S_1AB], (instregex "STP(W|X)(pre|post)")>; +def : InstRW<[Ampere1BWrite_1cyc_1S], (instrs STTRBi, STTRHi, STTRWi, STTRXi)>; +def : InstRW<[Ampere1BWrite_1cyc_1S], (instregex "STUR(BB|HH|X|W)i", + "STR(X|W)ui", + "STUR(BB|HH|X|W)i")>; +def : InstRW<[Ampere1BWrite_1cyc_2S], (instrs STRWroX, STRXroX)>; +def : InstRW<[Ampere1BWrite_1cyc_2S], (instrs STRWroW, STRXroW)>; + +// Memory tagging + +// Insert Random Tags +def : InstRW<[Ampere1BWrite_1cyc_1BS_1B], (instrs IRG, IRGstack)>; +// Load allocation tag +def : InstRW<[Ampere1BWrite_4cyc_1L_1B], (instrs LDG, LDGM)>; +// Store allocation tags +def : InstRW<[Ampere1BWrite_1cyc_1S], + (instrs STGi, STGM, STGPreIndex, STGPostIndex)>; +// Store allocation tags and pair of registers +def : InstRW<[Ampere1BWrite_1cyc_2S], + (instrs STGPi, STGPpre, STGPpost)>; +// Store allocation tags and zero data +def : InstRW<[Ampere1BWrite_1cyc_1S], + (instrs STZGi, STZGM, STZGPreIndex, STZGPostIndex)>; +// Store two tags +def : InstRW<[Ampere1BWrite_1cyc_2S], + (instrs ST2Gi, ST2GPreIndex, ST2GPostIndex)>; +// Store two tags and zero data +def : InstRW<[Ampere1BWrite_1cyc_2S], + (instrs STZ2Gi, STZ2GPreIndex, STZ2GPostIndex)>; +// Subtract Pointer +def : InstRW<[Ampere1BWrite_1cyc_1AB], (instrs SUBP)>; +// Subtract Pointer, flagset +def : InstRW<[Ampere1BWrite_1cyc_1AB], (instrs SUBPS)>; +// Insert Tag Mask +def : InstRW<[Ampere1BWrite_1cyc_1AB], (instrs GMI)>; +// Arithmetic, immediate to logical address tag +def : InstRW<[Ampere1BWrite_1cyc_1B], (instrs ADDG, SUBG)>; + +// Pointer authentication +def : InstRW<[Ampere1BWrite_5cyc_1BS], (instregex "^AUT")>; +def : InstRW<[Ampere1BWrite_6cyc_1BS_1A], + (instregex "BRA(A|AZ|B|BZ)", "RETA(A|B)", "ERETA(A|B)")>; +def : InstRW<[Ampere1BWrite_6cyc_1BS_2A], + (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ)>; +def : InstRW<[Ampere1BWrite_5cyc_1BS], (instregex "^PAC")>; +def : InstRW<[Ampere1BWrite_8cyc_1BS_1L], (instregex "^LDRA(A|B)")>; +def : InstRW<[Ampere1BWrite_1cyc_1B], (instrs XPACD, XPACI)>; + +// Vector integer instructions +// -- absolute difference +def : InstRW<[Ampere1BWrite_2cyc_1XY], + (instregex "^SABAv", "^SABALv", "^SABDv", "^SABDLv", + "^UABAv", "^UABALv", "^UABDv", "^UABDLv")>; +// -- arithmetic +def : InstRW<[Ampere1BWrite_2cyc_1XY], + (instregex "^ABSv", "^(ADD|SUB)v", "^SADDLv", "^SADDW", "SHADD", + "SHSUB", "^SRHADD", "^URHADD", "SSUBL", "SSUBW", + "^UADDLv", "^UADDW", "UHADD", "UHSUB", "USUBL", "USUBW")>; +// -- arithmetic, horizontal, 16B +def : InstRW<[Ampere1BWrite_8cyc_4XY], + (instregex "^ADDVv16i8v", "^SADDLVv16i8v", "^UADDLVv16i8v")>; +def : InstRW<[Ampere1BWrite_8cyc_4XY], + (instregex "^[SU](MIN|MAX)Vv16i8v")>; +// -- arithmetic, horizontal, 4H/4S +def : InstRW<[Ampere1BWrite_4cyc_2XY], + (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v")>; +def : InstRW<[Ampere1BWrite_4cyc_2XY], + (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v")>; +// -- arithmetic, horizontal, 8B/8H +def : InstRW<[Ampere1BWrite_6cyc_3XY], + (instregex "^[SU]?ADDL?V(v8i16|v4i32)v")>; +def : InstRW<[Ampere1BWrite_6cyc_3XY], + (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v")>; +// -- arithmetic, narrowing +def : InstRW<[Ampere1BWrite_6cyc_2XY], (instregex "(ADD|SUB)HNv.*")>; +def : InstRW<[Ampere1BWrite_6cyc_2XY], (instregex "(RADD|RSUB)HNv.*")>; +// -- arithmetic, pairwise +def : InstRW<[Ampere1BWrite_2cyc_1XY], + (instregex "^ADDPv", "^SADALP", "^UADALP", "^SADDLPv", "^UADDLPv")>; +// -- arithmetic, saturating +def : InstRW<[Ampere1BWrite_2cyc_1XY], + (instregex "^SQADD", "^SQSUB", "^SUQADD", "^UQADD", "^UQSUB", "^USQADD")>; +// -- bit count +def : InstRW<[Ampere1BWrite_2cyc_1XY], + (instregex "^(CLS|CLZ|CNT)v")>; +// -- compare +def : InstRW<[Ampere1BWrite_2cyc_1XY], + (instregex "^CMEQv", "^CMGEv", "^CMGTv", "^CMLEv", "^CMLTv", + "^CMHIv", "^CMHSv")>; +// -- compare non-zero +def : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^CMTSTv")>; +// -- dot product +def : InstRW<[Ampere1BWrite_3cyc_1XY], (instregex "^(S|SU|U|US)DOTv")>; +// -- fp reciprocal estimate +def : InstRW<[Ampere1BWrite_6cyc_1X], (instregex "^FRECPEv", "^FRSQRTEv")>; +// -- integer reciprocal estimate +def : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^URECPEv", "^URSQRTEv")>; +// -- logical +def : InstRW<[Ampere1BWrite_2cyc_1XY], + (instregex "^ANDv", "^BICv", "^EORv", "^ORRv", "^ORNv", "^NOTv")>; +// -- logical, narrowing +def : InstRW<[Ampere1BWrite_6cyc_2XY], + (instregex "RSHRNv", + "SHRNv", "SQSHRNv", "SQSHRUNv", + "UQXTNv")>; +// -- matrix multiply +def : InstRW<[Ampere1BWrite_3cyc_1XY], + (instrs SMMLA, UMMLA, USMMLA)>; +// -- max/min +def : InstRW<[Ampere1BWrite_2cyc_1XY], + (instregex "^SMAXv", "^SMINv", "^UMAXv", "^UMINv")>; +def : InstRW<[Ampere1BWrite_2cyc_1XY], + (instregex "^SMAXPv", "^SMINPv", "^UMAXPv", "^UMINPv")>; +// -- move immediate +def : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^MOVIv", "^MVNIv")>; +// -- multiply +def : InstRW<[Ampere1BWrite_3cyc_1XY], + (instregex "MULv", "SMULLv", "UMULLv", "SQDMUL(H|L)v", "SQRDMULHv")>; +// -- multiply accumulate +def : InstRW<[Ampere1BWrite_3cyc_1XY], + (instregex "MLAv", "MLSv", "(S|U|SQD)(MLAL|MLSL)v", "SQRDML(A|S)Hv")>; +// -- negation, saturating +def : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^SQABS", "^SQNEG")>; +// -- reverse bits/bytes +def : InstRW<[Ampere1BWrite_2cyc_1XY], + (instregex "^RBITv", "^REV16v", "^REV32v", "^REV64v")>; +// -- shift +def : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>; +// -- shift and accumulate +def : InstRW<[Ampere1BWrite_2cyc_1XY], + (instregex "SRSRAv", "SSRAv", "URSRAv", "USRAv")>; +// -- shift, saturating +def : InstRW<[Ampere1BWrite_2cyc_1XY], + (instregex "^SQRSHLv", "^SQRSHRNv", "^SQRSHRUNv", "^SQSHL", "^SQSHLU", + "^SQXTNv", "^SQXTUNv", "^UQSHRNv", "UQRSHRNv", "^UQRSHL", + "^UQSHL")>; + +// Vector miscellaneous instructions +// -- duplicate element +def : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^DUPv.+lane")>; +// -- duplicate from GPR +def : InstRW<[Ampere1BWrite_5cyc_1BS], (instregex "^DUPv.+gpr")>; +// -- extract narrow +def : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^XTNv")>; +// -- insert/extract element +def : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^EXTv", "^INSv.+lane")>; +// -- move FP immediate +def : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^FMOVv")>; +// -- move element to GPR +def : InstRW<[Ampere1BWrite_5cyc_1X], (instregex "(S|U)MOVv")>; +// -- move from GPR to any element +def : InstRW<[Ampere1BWrite_7cyc_1BS_1XY], (instregex "^INSv.+gpr")>; +// -- table lookup +def : InstRW<[Ampere1BWrite_2cyc_1XY], + (instrs TBLv8i8One, TBLv16i8One, TBXv8i8One, TBXv16i8One)>; +def : InstRW<[Ampere1BWrite_4cyc_2XY], + (instrs TBLv8i8Two, TBLv16i8Two, TBXv8i8Two, TBXv16i8Two)>; +def : InstRW<[Ampere1BWrite_6cyc_3XY], + (instrs TBLv8i8Three, TBLv16i8Three, TBXv8i8Three, TBXv16i8Three)>; +def : InstRW<[Ampere1BWrite_8cyc_4XY], + (instrs TBLv8i8Four, TBLv16i8Four, TBXv8i8Four, TBXv16i8Four)>; +// -- transpose +def : InstRW<[Ampere1BWrite_2cyc_1XY], + (instregex "^TRN1v", "^TRN2v", "^UZP1v", "^UZP2v")>; +// -- zip/unzip +def : InstRW<[Ampere1BWrite_2cyc_1XY], (instregex "^ZIP1v", "^ZIP2v")>; + +} // SchedModel = Ampere1BModel diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedCyclone.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedCyclone.td index 1ef3a2a06338..48324654949c 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedCyclone.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedCyclone.td @@ -21,7 +21,7 @@ def CycloneModel : SchedMachineModel { list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, PAUnsupported.F, SMEUnsupported.F, - [HasMTE]); + [HasMTE, HasCSSC]); } //===----------------------------------------------------------------------===// diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td index 2127a34a58d5..6fc4ec3ae41b 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM3.td @@ -27,7 +27,7 @@ def ExynosM3Model : SchedMachineModel { list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, PAUnsupported.F, SMEUnsupported.F, - [HasMTE]); + [HasMTE, HasCSSC]); } //===----------------------------------------------------------------------===// diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td index 83cf56088d4c..5163de280f2e 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM4.td @@ -27,7 +27,7 @@ def ExynosM4Model : SchedMachineModel { list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, PAUnsupported.F, SMEUnsupported.F, - [HasMTE]); + [HasMTE, HasCSSC]); } //===----------------------------------------------------------------------===// diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td index 85058af86dec..2ccbe1614dcd 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedExynosM5.td @@ -27,7 +27,7 @@ def ExynosM5Model : SchedMachineModel { list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, PAUnsupported.F, SMEUnsupported.F, - [HasMTE]); + [HasMTE, HasCSSC]); } //===----------------------------------------------------------------------===// diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedFalkor.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedFalkor.td index a765cd1cdfe3..e9172e82b099 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedFalkor.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedFalkor.td @@ -26,7 +26,7 @@ def FalkorModel : SchedMachineModel { list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, PAUnsupported.F, SMEUnsupported.F, - [HasMTE]); + [HasMTE, HasCSSC]); // FIXME: Remove when all errors have been fixed. let FullInstRWOverlapCheck = 0; } diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedKryo.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedKryo.td index 3551066ee7c3..258b34c38898 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedKryo.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedKryo.td @@ -30,7 +30,7 @@ def KryoModel : SchedMachineModel { list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, PAUnsupported.F, SMEUnsupported.F, - [HasMTE]); + [HasMTE, HasCSSC]); // FIXME: Remove when all errors have been fixed. let FullInstRWOverlapCheck = 0; } diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseN1.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseN1.td index 2ec9600f84f7..524fa33f498b 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseN1.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseN1.td @@ -25,7 +25,7 @@ def NeoverseN1Model : SchedMachineModel { list<Predicate> UnsupportedFeatures = !listconcat(PAUnsupported.F, SMEUnsupported.F, SVEUnsupported.F, - [HasMTE]); + [HasMTE, HasCSSC]); } //===----------------------------------------------------------------------===// diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td index a6fab5e6245f..8ec124954362 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td @@ -19,7 +19,7 @@ def NeoverseN2Model : SchedMachineModel { let CompleteModel = 1; list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F, - [HasSVE2p1, HasPAuthLR, HasCPA]); + [HasSVE2p1, HasPAuthLR, HasCPA, HasCSSC]); } //===----------------------------------------------------------------------===// diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td index 75fbb85dce9d..613db353cb0a 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td @@ -28,7 +28,8 @@ def NeoverseV1Model : SchedMachineModel { list<Predicate> UnsupportedFeatures = !listconcat(SVE2Unsupported.F, SMEUnsupported.F, - [HasMTE, HasCPA]); + [HasMTE, HasCPA, + HasCSSC]); } //===----------------------------------------------------------------------===// diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td index 658d7cdd23a6..e7de40fdf1de 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td @@ -22,7 +22,8 @@ def NeoverseV2Model : SchedMachineModel { let CompleteModel = 1; list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F, - [HasSVE2p1, HasCPA]); + [HasSVE2p1, HasCPA, + HasCSSC]); } //===----------------------------------------------------------------------===// diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedTSV110.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedTSV110.td index 9e5060f1f364..0ae9a69fd482 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedTSV110.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedTSV110.td @@ -27,7 +27,7 @@ def TSV110Model : SchedMachineModel { list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, PAUnsupported.F, SMEUnsupported.F, - [HasMTE]); + [HasMTE, HasCSSC]); } // Define each kind of processor resource and number available on the TSV110, diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX.td index e1536f208e44..8df3f56e4573 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX.td @@ -28,7 +28,7 @@ def ThunderXT8XModel : SchedMachineModel { list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, PAUnsupported.F, SMEUnsupported.F, - [HasMTE]); + [HasMTE, HasCSSC]); // FIXME: Remove when all errors have been fixed. let FullInstRWOverlapCheck = 0; } diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td index 89faa92155e0..ef4baa3dedff 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX2T99.td @@ -28,7 +28,7 @@ def ThunderX2T99Model : SchedMachineModel { list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, PAUnsupported.F, SMEUnsupported.F, - [HasMTE]); + [HasMTE, HasCSSC]); // FIXME: Remove when all errors have been fixed. let FullInstRWOverlapCheck = 0; } diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td index 8685554b00d7..796bd4b8b5c9 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedThunderX3T110.td @@ -27,7 +27,7 @@ def ThunderX3T110Model : SchedMachineModel { list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, PAUnsupported.F, SMEUnsupported.F, - [HasMTE]); + [HasMTE, HasCSSC]); // FIXME: Remove when all errors have been fixed. let FullInstRWOverlapCheck = 0; } diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index e3a0606331db..dd4c0e2eb642 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -296,6 +296,7 @@ void AArch64Subtarget::initializeProperties(bool HasMinSize) { break; case Ampere1: case Ampere1A: + case Ampere1B: CacheLineSize = 64; PrefFunctionAlignment = Align(64); PrefLoopAlignment = Align(64); diff --git a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.h b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.h index 16864102df59..f8dcbe97b632 100644 --- a/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -42,6 +42,7 @@ public: A64FX, Ampere1, Ampere1A, + Ampere1B, AppleA7, AppleA10, AppleA11, diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 36aab383da68..9d6e8dc573a8 100644 --- a/contrib/llvm-project/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/contrib/llvm-project/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -150,6 +150,7 @@ class MipsAsmParser : public MCTargetAsmParser { bool IsLittleEndian; bool IsPicEnabled; bool IsCpRestoreSet; + bool CurForbiddenSlotAttr; int CpRestoreOffset; unsigned GPReg; unsigned CpSaveLocation; @@ -552,6 +553,7 @@ public: CurrentFn = nullptr; + CurForbiddenSlotAttr = false; IsPicEnabled = getContext().getObjectFileInfo()->isPositionIndependent(); IsCpRestoreSet = false; @@ -723,6 +725,16 @@ public: return getSTI().hasFeature(Mips::FeatureGINV); } + bool hasForbiddenSlot(const MCInstrDesc &MCID) const { + return !inMicroMipsMode() && (MCID.TSFlags & MipsII::HasForbiddenSlot); + } + + bool SafeInForbiddenSlot(const MCInstrDesc &MCID) const { + return !(MCID.TSFlags & MipsII::IsCTI); + } + + void onEndOfFile() override; + /// Warn if RegIndex is the same as the current AT. void warnIfRegIndexIsAT(unsigned RegIndex, SMLoc Loc); @@ -2307,7 +2319,41 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, bool FillDelaySlot = MCID.hasDelaySlot() && AssemblerOptions.back()->isReorder(); - if (FillDelaySlot) + + // Get previous instruction`s forbidden slot attribute and + // whether set reorder. + bool PrevForbiddenSlotAttr = CurForbiddenSlotAttr; + + // Flag represents we set reorder after nop. + bool SetReorderAfterNop = false; + + // If previous instruction has forbidden slot and .set reorder + // is active and current instruction is CTI. + // Then emit a NOP after it. + if (PrevForbiddenSlotAttr && !SafeInForbiddenSlot(MCID)) { + TOut.emitEmptyDelaySlot(false, IDLoc, STI); + // When 'FillDelaySlot' is true, the existing logic will add + // noreorder before instruction and reorder after it. So there + // need exclude this case avoiding two '.set reorder'. + // The format of the first case is: + // .set noreorder + // bnezc + // nop + // .set reorder + if (AssemblerOptions.back()->isReorder() && !FillDelaySlot) { + SetReorderAfterNop = true; + TOut.emitDirectiveSetReorder(); + } + } + + // Save current instruction`s forbidden slot and whether set reorder. + // This is the judgment condition for whether to add nop. + // We would add a couple of '.set noreorder' and '.set reorder' to + // wrap the current instruction and the next instruction. + CurForbiddenSlotAttr = + hasForbiddenSlot(MCID) && AssemblerOptions.back()->isReorder(); + + if (FillDelaySlot || CurForbiddenSlotAttr) TOut.emitDirectiveSetNoReorder(); MacroExpanderResultTy ExpandResult = @@ -2322,6 +2368,17 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, return true; } + // When current instruction was not CTI, recover reorder state. + // The format of the second case is: + // .set noreoder + // bnezc + // add + // .set reorder + if (PrevForbiddenSlotAttr && !SetReorderAfterNop && !FillDelaySlot && + AssemblerOptions.back()->isReorder()) { + TOut.emitDirectiveSetReorder(); + } + // We know we emitted an instruction on the MER_NotAMacro or MER_Success path. // If we're in microMIPS mode then we must also set EF_MIPS_MICROMIPS. if (inMicroMipsMode()) { @@ -2331,6 +2388,14 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, // If this instruction has a delay slot and .set reorder is active, // emit a NOP after it. + // The format of the third case is: + // .set noreorder + // bnezc + // nop + // .set noreorder + // j + // nop + // .set reorder if (FillDelaySlot) { TOut.emitEmptyDelaySlot(hasShortDelaySlot(Inst), IDLoc, STI); TOut.emitDirectiveSetReorder(); @@ -2356,6 +2421,17 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, return false; } +void MipsAsmParser::onEndOfFile() { + MipsTargetStreamer &TOut = getTargetStreamer(); + SMLoc IDLoc = SMLoc(); + // If has pending forbidden slot, fill nop and recover reorder. + if (CurForbiddenSlotAttr) { + TOut.emitEmptyDelaySlot(false, IDLoc, STI); + if (AssemblerOptions.back()->isReorder()) + TOut.emitDirectiveSetReorder(); + } +} + MipsAsmParser::MacroExpanderResultTy MipsAsmParser::tryExpandInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out, const MCSubtargetInfo *STI) { diff --git a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.cpp index d431d3d91494..88b226eaaccf 100644 --- a/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -4128,14 +4128,18 @@ MipsTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, case 'd': // Address register. Same as 'r' unless generating MIPS16 code. case 'y': // Same as 'r'. Exists for compatibility. case 'r': - if (VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8 || VT == MVT::i1) { + if ((VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8 || + VT == MVT::i1) || + (VT == MVT::f32 && Subtarget.useSoftFloat())) { if (Subtarget.inMips16Mode()) return std::make_pair(0U, &Mips::CPU16RegsRegClass); return std::make_pair(0U, &Mips::GPR32RegClass); } - if (VT == MVT::i64 && !Subtarget.isGP64bit()) + if ((VT == MVT::i64 || (VT == MVT::f64 && Subtarget.useSoftFloat())) && + !Subtarget.isGP64bit()) return std::make_pair(0U, &Mips::GPR32RegClass); - if (VT == MVT::i64 && Subtarget.isGP64bit()) + if ((VT == MVT::i64 || (VT == MVT::f64 && Subtarget.useSoftFloat())) && + Subtarget.isGP64bit()) return std::make_pair(0U, &Mips::GPR64RegClass); // This will generate an error message return std::make_pair(0U, nullptr); diff --git a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp index 924df12578fe..5db04a8bef82 100644 --- a/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -1067,7 +1067,8 @@ bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL, if (!isInt<20>(AM.BaseOffs)) return false; - bool RequireD12 = Subtarget.hasVector() && Ty->isVectorTy(); + bool RequireD12 = + Subtarget.hasVector() && (Ty->isVectorTy() || Ty->isIntegerTy(128)); AddressingMode SupportedAM(!RequireD12, true); if (I != nullptr) SupportedAM = supportedAddressingMode(I, Subtarget.hasVector()); @@ -1922,7 +1923,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT); SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N); } else { - SlotVT = Outs[I].ArgVT; + SlotVT = Outs[I].VT; } SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT); int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); diff --git a/contrib/llvm-project/llvm/lib/TargetParser/Host.cpp b/contrib/llvm-project/llvm/lib/TargetParser/Host.cpp index f1197c296553..4466d50458e1 100644 --- a/contrib/llvm-project/llvm/lib/TargetParser/Host.cpp +++ b/contrib/llvm-project/llvm/lib/TargetParser/Host.cpp @@ -321,6 +321,7 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) { return StringSwitch<const char *>(Part) .Case("0xac3", "ampere1") .Case("0xac4", "ampere1a") + .Case("0xac5", "ampere1b") .Default("generic"); } diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/FlattenCFG.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/FlattenCFG.cpp index 1925b91c4da7..c5cb3748a52f 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/FlattenCFG.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/FlattenCFG.cpp @@ -407,6 +407,10 @@ bool FlattenCFGOpt::CompareIfRegionBlock(BasicBlock *Block1, BasicBlock *Block2, /// form, by inverting the condition and the branch successors. The same /// approach goes for the opposite case. bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder) { + // We cannot merge the if-region if the merge point has phi nodes. + if (isa<PHINode>(BB->front())) + return false; + BasicBlock *IfTrue2, *IfFalse2; BranchInst *DomBI2 = GetIfCondition(BB, IfTrue2, IfFalse2); if (!DomBI2) @@ -493,16 +497,6 @@ bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder) { PBI->replaceUsesOfWith(PBI->getCondition(), NC); Builder.SetInsertPoint(SaveInsertBB, SaveInsertPt); - // Handle PHI node to replace its predecessors to FirstEntryBlock. - for (BasicBlock *Succ : successors(PBI)) { - for (PHINode &Phi : Succ->phis()) { - for (unsigned i = 0, e = Phi.getNumIncomingValues(); i != e; ++i) { - if (Phi.getIncomingBlock(i) == SecondEntryBlock) - Phi.setIncomingBlock(i, FirstEntryBlock); - } - } - } - // Remove IfTrue1 if (IfTrue1 != FirstEntryBlock) { IfTrue1->dropAllReferences(); diff --git a/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp index 459e3d980592..a1c6bbc52fd0 100644 --- a/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp +++ b/contrib/llvm-project/llvm/lib/Transforms/Utils/Local.cpp @@ -3369,11 +3369,17 @@ void llvm::patchReplacementInstruction(Instruction *I, Value *Repl) { // Patch the replacement so that it is not more restrictive than the value // being replaced. + WithOverflowInst *UnusedWO; + // When replacing the result of a llvm.*.with.overflow intrinsic with a + // overflowing binary operator, nuw/nsw flags may no longer hold. + if (isa<OverflowingBinaryOperator>(ReplInst) && + match(I, m_ExtractValue<0>(m_WithOverflowInst(UnusedWO)))) + ReplInst->dropPoisonGeneratingFlags(); // Note that if 'I' is a load being replaced by some operation, // for example, by an arithmetic operation, then andIRFlags() // would just erase all math flags from the original arithmetic // operation, which is clearly not wanted and not needed. - if (!isa<LoadInst>(I)) + else if (!isa<LoadInst>(I)) ReplInst->andIRFlags(I); // FIXME: If both the original and replacement value are part of the diff --git a/contrib/llvm-project/llvm/tools/llvm-cov/SourceCoverageView.cpp b/contrib/llvm-project/llvm/tools/llvm-cov/SourceCoverageView.cpp index 71edd5fec428..5b85d7d86bfb 100644 --- a/contrib/llvm-project/llvm/tools/llvm-cov/SourceCoverageView.cpp +++ b/contrib/llvm-project/llvm/tools/llvm-cov/SourceCoverageView.cpp @@ -139,7 +139,7 @@ bool SourceCoverageView::shouldRenderRegionMarkers( bool SourceCoverageView::hasSubViews() const { return !ExpansionSubViews.empty() || !InstantiationSubViews.empty() || - !BranchSubViews.empty(); + !BranchSubViews.empty() || !MCDCSubViews.empty(); } std::unique_ptr<SourceCoverageView> diff --git a/contrib/llvm-project/llvm/tools/llvm-cov/SourceCoverageViewHTML.cpp b/contrib/llvm-project/llvm/tools/llvm-cov/SourceCoverageViewHTML.cpp index abc4c49ecae9..b93d8cb03530 100644 --- a/contrib/llvm-project/llvm/tools/llvm-cov/SourceCoverageViewHTML.cpp +++ b/contrib/llvm-project/llvm/tools/llvm-cov/SourceCoverageViewHTML.cpp @@ -246,6 +246,9 @@ tr:hover { tr:last-child { border-bottom: none; } +tr:has(> td >a:target) > td.code > pre { + background-color: #ffa; +} )"; const char *EndHeader = "</head>"; @@ -990,15 +993,13 @@ void SourceCoverageViewHTML::renderMCDCView(raw_ostream &OS, MCDCView &MRV, std::string ColNoStr = Twine(DecisionRegion.ColumnStart).str(); std::string TargetName = "L" + LineNoStr; OS << tag("span", - a("#" + TargetName, tag("span", LineNoStr + ":" + ColNoStr), - TargetName), + a("#" + TargetName, tag("span", LineNoStr + ":" + ColNoStr)), "line-number") + ") to ("; LineNoStr = utostr(uint64_t(DecisionRegion.LineEnd)); ColNoStr = utostr(uint64_t(DecisionRegion.ColumnEnd)); OS << tag("span", - a("#" + TargetName, tag("span", LineNoStr + ":" + ColNoStr), - TargetName), + a("#" + TargetName, tag("span", LineNoStr + ":" + ColNoStr)), "line-number") + ")\n\n"; diff --git a/contrib/llvm-project/llvm/tools/llvm-cov/SourceCoverageViewText.cpp b/contrib/llvm-project/llvm/tools/llvm-cov/SourceCoverageViewText.cpp index 73b7ffe16a96..580da45ecfc0 100644 --- a/contrib/llvm-project/llvm/tools/llvm-cov/SourceCoverageViewText.cpp +++ b/contrib/llvm-project/llvm/tools/llvm-cov/SourceCoverageViewText.cpp @@ -382,7 +382,8 @@ void SourceCoverageViewText::renderMCDCView(raw_ostream &OS, MCDCView &MRV, colored_ostream(OS, raw_ostream::RED, getOptions().Colors && Record.getPercentCovered() < 100.0, /*Bold=*/false, /*BG=*/true) - << format("%0.2f", Record.getPercentCovered()) << "%\n"; + << format("%0.2f", Record.getPercentCovered()) << "%"; + OS << "\n"; renderLinePrefix(OS, ViewDepth); OS << "\n"; } diff --git a/contrib/llvm-project/llvm/tools/llvm-readobj/ObjDumper.cpp b/contrib/llvm-project/llvm/tools/llvm-readobj/ObjDumper.cpp index 59060ac217e3..0d3fea71aafd 100644 --- a/contrib/llvm-project/llvm/tools/llvm-readobj/ObjDumper.cpp +++ b/contrib/llvm-project/llvm/tools/llvm-readobj/ObjDumper.cpp @@ -14,6 +14,7 @@ #include "ObjDumper.h" #include "llvm-readobj.h" #include "llvm/Object/Archive.h" +#include "llvm/Object/Decompressor.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/Error.h" #include "llvm/Support/FormatVariadic.h" @@ -142,8 +143,23 @@ getSectionRefsByNameOrIndex(const object::ObjectFile &Obj, return Ret; } +static void maybeDecompress(const object::ObjectFile &Obj, + StringRef SectionName, StringRef &SectionContent, + SmallString<0> &Out) { + Expected<object::Decompressor> Decompressor = object::Decompressor::create( + SectionName, SectionContent, Obj.isLittleEndian(), Obj.is64Bit()); + if (!Decompressor) + reportWarning(Decompressor.takeError(), Obj.getFileName()); + else if (auto Err = Decompressor->resizeAndDecompress(Out)) + reportWarning(std::move(Err), Obj.getFileName()); + else + SectionContent = Out; +} + void ObjDumper::printSectionsAsString(const object::ObjectFile &Obj, - ArrayRef<std::string> Sections) { + ArrayRef<std::string> Sections, + bool Decompress) { + SmallString<0> Out; bool First = true; for (object::SectionRef Section : getSectionRefsByNameOrIndex(Obj, Sections)) { @@ -156,12 +172,16 @@ void ObjDumper::printSectionsAsString(const object::ObjectFile &Obj, StringRef SectionContent = unwrapOrError(Obj.getFileName(), Section.getContents()); + if (Decompress && Section.isCompressed()) + maybeDecompress(Obj, SectionName, SectionContent, Out); printAsStringList(SectionContent); } } void ObjDumper::printSectionsAsHex(const object::ObjectFile &Obj, - ArrayRef<std::string> Sections) { + ArrayRef<std::string> Sections, + bool Decompress) { + SmallString<0> Out; bool First = true; for (object::SectionRef Section : getSectionRefsByNameOrIndex(Obj, Sections)) { @@ -174,6 +194,8 @@ void ObjDumper::printSectionsAsHex(const object::ObjectFile &Obj, StringRef SectionContent = unwrapOrError(Obj.getFileName(), Section.getContents()); + if (Decompress && Section.isCompressed()) + maybeDecompress(Obj, SectionName, SectionContent, Out); const uint8_t *SecContent = SectionContent.bytes_begin(); const uint8_t *SecEnd = SecContent + SectionContent.size(); diff --git a/contrib/llvm-project/llvm/tools/llvm-readobj/ObjDumper.h b/contrib/llvm-project/llvm/tools/llvm-readobj/ObjDumper.h index 1d679453581b..3958dd3a3333 100644 --- a/contrib/llvm-project/llvm/tools/llvm-readobj/ObjDumper.h +++ b/contrib/llvm-project/llvm/tools/llvm-readobj/ObjDumper.h @@ -175,9 +175,9 @@ public: void printAsStringList(StringRef StringContent, size_t StringDataOffset = 0); void printSectionsAsString(const object::ObjectFile &Obj, - ArrayRef<std::string> Sections); + ArrayRef<std::string> Sections, bool Decompress); void printSectionsAsHex(const object::ObjectFile &Obj, - ArrayRef<std::string> Sections); + ArrayRef<std::string> Sections, bool Decompress); std::function<Error(const Twine &Msg)> WarningHandler; void reportUniqueWarning(Error Err) const; diff --git a/contrib/llvm-project/llvm/tools/llvm-readobj/Opts.td b/contrib/llvm-project/llvm/tools/llvm-readobj/Opts.td index e2d93c6ec229..018facc278e8 100644 --- a/contrib/llvm-project/llvm/tools/llvm-readobj/Opts.td +++ b/contrib/llvm-project/llvm/tools/llvm-readobj/Opts.td @@ -20,6 +20,7 @@ def all : FF<"all", "Equivalent to setting: --file-header, --program-headers, -- def arch_specific : FF<"arch-specific", "Display architecture-specific information">; def bb_addr_map : FF<"bb-addr-map", "Display the BB address map section">; def cg_profile : FF<"cg-profile", "Display call graph profile section">; +def decompress : FF<"decompress", "Dump decompressed section content when used with -x or -p">; defm demangle : BB<"demangle", "Demangle symbol names", "Do not demangle symbol names (default)">; def dependent_libraries : FF<"dependent-libraries", "Display the dependent libraries section">; def dyn_relocations : FF<"dyn-relocations", "Display the dynamic relocation entries in the file">; @@ -139,3 +140,4 @@ def : F<"u", "Alias for --unwind">, Alias<unwind>; def : F<"X", "Alias for --extra-sym-info">, Alias<extra_sym_info>, Group<grp_elf>; def : F<"V", "Alias for --version-info">, Alias<version_info>, Group<grp_elf>; def : JoinedOrSeparate<["-"], "x">, Alias<hex_dump_EQ>, HelpText<"Alias for --hex-dump">, MetaVarName<"<name or index>">; +def : F<"z", "Alias for --decompress">, Alias<decompress>; diff --git a/contrib/llvm-project/llvm/tools/llvm-readobj/llvm-readobj.cpp b/contrib/llvm-project/llvm/tools/llvm-readobj/llvm-readobj.cpp index f9d605d35244..979433d69011 100644 --- a/contrib/llvm-project/llvm/tools/llvm-readobj/llvm-readobj.cpp +++ b/contrib/llvm-project/llvm/tools/llvm-readobj/llvm-readobj.cpp @@ -97,6 +97,7 @@ static bool ArchSpecificInfo; static bool BBAddrMap; bool ExpandRelocs; static bool CGProfile; +static bool Decompress; bool Demangle; static bool DependentLibraries; static bool DynRelocs; @@ -212,6 +213,7 @@ static void parseOptions(const opt::InputArgList &Args) { opts::ArchSpecificInfo = Args.hasArg(OPT_arch_specific); opts::BBAddrMap = Args.hasArg(OPT_bb_addr_map); opts::CGProfile = Args.hasArg(OPT_cg_profile); + opts::Decompress = Args.hasArg(OPT_decompress); opts::Demangle = Args.hasFlag(OPT_demangle, OPT_no_demangle, false); opts::DependentLibraries = Args.hasArg(OPT_dependent_libraries); opts::DynRelocs = Args.hasArg(OPT_dyn_relocations); @@ -439,9 +441,9 @@ static void dumpObject(ObjectFile &Obj, ScopedPrinter &Writer, Dumper->printSymbols(opts::Symbols, opts::DynamicSymbols, opts::ExtraSymInfo, SymComp); if (!opts::StringDump.empty()) - Dumper->printSectionsAsString(Obj, opts::StringDump); + Dumper->printSectionsAsString(Obj, opts::StringDump, opts::Decompress); if (!opts::HexDump.empty()) - Dumper->printSectionsAsHex(Obj, opts::HexDump); + Dumper->printSectionsAsHex(Obj, opts::HexDump, opts::Decompress); if (opts::HashTable) Dumper->printHashTable(); if (opts::GnuHashTable) diff --git a/contrib/llvm-project/llvm/utils/TableGen/Attributes.cpp b/contrib/llvm-project/llvm/utils/TableGen/Attributes.cpp index 474042a3e9a3..db3c4decccb4 100644 --- a/contrib/llvm-project/llvm/utils/TableGen/Attributes.cpp +++ b/contrib/llvm-project/llvm/utils/TableGen/Attributes.cpp @@ -87,7 +87,11 @@ void Attributes::emitFnAttrCompatCheck(raw_ostream &OS, bool IsStringAttr) { for (auto *Rule : CompatRules) { StringRef FuncName = Rule->getValueAsString("CompatFunc"); - OS << " Ret &= " << FuncName << "(Caller, Callee);\n"; + OS << " Ret &= " << FuncName << "(Caller, Callee"; + StringRef AttrName = Rule->getValueAsString("AttrName"); + if (!AttrName.empty()) + OS << ", \"" << AttrName << "\""; + OS << ");\n"; } OS << "\n"; |