diff options
Diffstat (limited to 'contrib/llvm-project/llvm/include/llvm')
12 files changed, 194 insertions, 60 deletions
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/AliasAnalysis.h b/contrib/llvm-project/llvm/include/llvm/Analysis/AliasAnalysis.h index d6f732d35fd4..e8e4f491be5a 100644 --- a/contrib/llvm-project/llvm/include/llvm/Analysis/AliasAnalysis.h +++ b/contrib/llvm-project/llvm/include/llvm/Analysis/AliasAnalysis.h @@ -287,6 +287,10 @@ public: /// store %l, ... bool MayBeCrossIteration = false; + /// Whether alias analysis is allowed to use the dominator tree, for use by + /// passes that lazily update the DT while performing AA queries. + bool UseDominatorTree = true; + AAQueryInfo(AAResults &AAR, CaptureInfo *CI) : AAR(AAR), CI(CI) {} }; @@ -668,6 +672,9 @@ public: void enableCrossIterationMode() { AAQI.MayBeCrossIteration = true; } + + /// Disable the use of the dominator tree during alias analysis queries. + void disableDominatorTree() { AAQI.UseDominatorTree = false; } }; /// Temporary typedef for legacy code that uses a generic \c AliasAnalysis diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/BasicAliasAnalysis.h b/contrib/llvm-project/llvm/include/llvm/Analysis/BasicAliasAnalysis.h index afc1811239f2..7eca82729430 100644 --- a/contrib/llvm-project/llvm/include/llvm/Analysis/BasicAliasAnalysis.h +++ b/contrib/llvm-project/llvm/include/llvm/Analysis/BasicAliasAnalysis.h @@ -43,20 +43,26 @@ class BasicAAResult : public AAResultBase { const Function &F; const TargetLibraryInfo &TLI; AssumptionCache &AC; - DominatorTree *DT; + /// Use getDT() instead of accessing this member directly, in order to + /// respect the AAQI.UseDominatorTree option. + DominatorTree *DT_; + + DominatorTree *getDT(const AAQueryInfo &AAQI) const { + return AAQI.UseDominatorTree ? DT_ : nullptr; + } public: BasicAAResult(const DataLayout &DL, const Function &F, const TargetLibraryInfo &TLI, AssumptionCache &AC, DominatorTree *DT = nullptr) - : DL(DL), F(F), TLI(TLI), AC(AC), DT(DT) {} + : DL(DL), F(F), TLI(TLI), AC(AC), DT_(DT) {} BasicAAResult(const BasicAAResult &Arg) : AAResultBase(Arg), DL(Arg.DL), F(Arg.F), TLI(Arg.TLI), AC(Arg.AC), - DT(Arg.DT) {} + DT_(Arg.DT_) {} BasicAAResult(BasicAAResult &&Arg) : AAResultBase(std::move(Arg)), DL(Arg.DL), F(Arg.F), TLI(Arg.TLI), - AC(Arg.AC), DT(Arg.DT) {} + AC(Arg.AC), DT_(Arg.DT_) {} /// Handle invalidation events in the new pass manager. bool invalidate(Function &Fn, const PreservedAnalyses &PA, diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/BranchProbabilityInfo.h b/contrib/llvm-project/llvm/include/llvm/Analysis/BranchProbabilityInfo.h index 6b9d17818201..91e1872e9bd6 100644 --- a/contrib/llvm-project/llvm/include/llvm/Analysis/BranchProbabilityInfo.h +++ b/contrib/llvm-project/llvm/include/llvm/Analysis/BranchProbabilityInfo.h @@ -122,16 +122,23 @@ public: } BranchProbabilityInfo(BranchProbabilityInfo &&Arg) - : Probs(std::move(Arg.Probs)), LastF(Arg.LastF), - EstimatedBlockWeight(std::move(Arg.EstimatedBlockWeight)) {} + : Handles(std::move(Arg.Handles)), Probs(std::move(Arg.Probs)), + LastF(Arg.LastF), + EstimatedBlockWeight(std::move(Arg.EstimatedBlockWeight)) { + for (auto &Handle : Handles) + Handle.setBPI(this); + } BranchProbabilityInfo(const BranchProbabilityInfo &) = delete; BranchProbabilityInfo &operator=(const BranchProbabilityInfo &) = delete; BranchProbabilityInfo &operator=(BranchProbabilityInfo &&RHS) { releaseMemory(); + Handles = std::move(RHS.Handles); Probs = std::move(RHS.Probs); EstimatedBlockWeight = std::move(RHS.EstimatedBlockWeight); + for (auto &Handle : Handles) + Handle.setBPI(this); return *this; } @@ -279,6 +286,8 @@ private: } public: + void setBPI(BranchProbabilityInfo *BPI) { this->BPI = BPI; } + BasicBlockCallbackVH(const Value *V, BranchProbabilityInfo *BPI = nullptr) : CallbackVH(const_cast<Value *>(V)), BPI(BPI) {} }; diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/Loads.h b/contrib/llvm-project/llvm/include/llvm/Analysis/Loads.h index 2880ed33a34c..0926093bba99 100644 --- a/contrib/llvm-project/llvm/include/llvm/Analysis/Loads.h +++ b/contrib/llvm-project/llvm/include/llvm/Analysis/Loads.h @@ -18,7 +18,7 @@ namespace llvm { -class AAResults; +class BatchAAResults; class AssumptionCache; class DataLayout; class DominatorTree; @@ -129,11 +129,10 @@ extern cl::opt<unsigned> DefMaxInstsToScan; /// location in memory, as opposed to the value operand of a store. /// /// \returns The found value, or nullptr if no value is found. -Value *FindAvailableLoadedValue(LoadInst *Load, - BasicBlock *ScanBB, +Value *FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan = DefMaxInstsToScan, - AAResults *AA = nullptr, + BatchAAResults *AA = nullptr, bool *IsLoadCSE = nullptr, unsigned *NumScanedInst = nullptr); @@ -141,7 +140,8 @@ Value *FindAvailableLoadedValue(LoadInst *Load, /// FindAvailableLoadedValue() for the case where we are not interested in /// finding the closest clobbering instruction if no available load is found. /// This overload cannot be used to scan across multiple blocks. -Value *FindAvailableLoadedValue(LoadInst *Load, AAResults &AA, bool *IsLoadCSE, +Value *FindAvailableLoadedValue(LoadInst *Load, BatchAAResults &AA, + bool *IsLoadCSE, unsigned MaxInstsToScan = DefMaxInstsToScan); /// Scan backwards to see if we have the value of the given pointer available @@ -170,7 +170,7 @@ Value *FindAvailableLoadedValue(LoadInst *Load, AAResults &AA, bool *IsLoadCSE, Value *findAvailablePtrLoadStore(const MemoryLocation &Loc, Type *AccessTy, bool AtLeastAtomic, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, - unsigned MaxInstsToScan, AAResults *AA, + unsigned MaxInstsToScan, BatchAAResults *AA, bool *IsLoadCSE, unsigned *NumScanedInst); /// Returns true if a pointer value \p A can be replace with another pointer diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/VecFuncs.def b/contrib/llvm-project/llvm/include/llvm/Analysis/VecFuncs.def index f09e12f3038c..07edf68c667a 100644 --- a/contrib/llvm-project/llvm/include/llvm/Analysis/VecFuncs.def +++ b/contrib/llvm-project/llvm/include/llvm/Analysis/VecFuncs.def @@ -771,8 +771,8 @@ TLI_DEFINE_VECFUNC("log2f", "_ZGVsMxv_log2f", SCALABLE(4), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.log2.f64", "_ZGVsMxv_log2", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.log2.f32", "_ZGVsMxv_log2f", SCALABLE(4), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("modf", "_ZGVsMxvl8_modf", SCALABLE(2), MASKED, "_ZGVsMxvl8") -TLI_DEFINE_VECFUNC("modff", "_ZGVsMxvl4_modff", SCALABLE(4), MASKED, "_ZGVsMxvl4") +TLI_DEFINE_VECFUNC("modf", "_ZGVsNxvl8_modf", SCALABLE(2), NOMASK, "_ZGVsNxvl8") +TLI_DEFINE_VECFUNC("modff", "_ZGVsNxvl4_modff", SCALABLE(4), NOMASK, "_ZGVsNxvl4") TLI_DEFINE_VECFUNC("nextafter", "_ZGVsMxvv_nextafter", SCALABLE(2), MASKED, "_ZGVsMxvv") TLI_DEFINE_VECFUNC("nextafterf", "_ZGVsMxvv_nextafterf", SCALABLE(4), MASKED, "_ZGVsMxvv") @@ -787,11 +787,11 @@ TLI_DEFINE_VECFUNC("sinf", "_ZGVsMxv_sinf", SCALABLE(4), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGVsMxv_sin", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVsMxv_sinf", SCALABLE(4), MASKED, "_ZGVsMxv") -TLI_DEFINE_VECFUNC("sincos", "_ZGVsMxvl8l8_sincos", SCALABLE(2), MASKED, "_ZGVsMxvl8l8") -TLI_DEFINE_VECFUNC("sincosf", "_ZGVsMxvl4l4_sincosf", SCALABLE(4), MASKED, "_ZGVsMxvl4l4") +TLI_DEFINE_VECFUNC("sincos", "_ZGVsNxvl8l8_sincos", SCALABLE(2), NOMASK, "_ZGVsNxvl8l8") +TLI_DEFINE_VECFUNC("sincosf", "_ZGVsNxvl4l4_sincosf", SCALABLE(4), NOMASK, "_ZGVsNxvl4l4") -TLI_DEFINE_VECFUNC("sincospi", "_ZGVsMxvl8l8_sincospi", SCALABLE(2), MASKED, "_ZGVsMxvl8l8") -TLI_DEFINE_VECFUNC("sincospif", "_ZGVsMxvl4l4_sincospif", SCALABLE(4), MASKED, "_ZGVsMxvl4l4") +TLI_DEFINE_VECFUNC("sincospi", "_ZGVsNxvl8l8_sincospi", SCALABLE(2), NOMASK, "_ZGVsNxvl8l8") +TLI_DEFINE_VECFUNC("sincospif", "_ZGVsNxvl4l4_sincospif", SCALABLE(4), NOMASK, "_ZGVsNxvl4l4") TLI_DEFINE_VECFUNC("sinh", "_ZGVsMxv_sinh", SCALABLE(2), MASKED, "_ZGVsMxv") TLI_DEFINE_VECFUNC("sinhf", "_ZGVsMxv_sinhf", SCALABLE(4), MASKED, "_ZGVsMxv") @@ -1005,8 +1005,6 @@ TLI_DEFINE_VECFUNC("llvm.log2.f32", "armpl_svlog2_f32_x", SCALABLE(4), MASKED, " TLI_DEFINE_VECFUNC("modf", "armpl_vmodfq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vl8") TLI_DEFINE_VECFUNC("modff", "armpl_vmodfq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vl4") -TLI_DEFINE_VECFUNC("modf", "armpl_svmodf_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvl8") -TLI_DEFINE_VECFUNC("modff", "armpl_svmodf_f32_x", SCALABLE(4), MASKED, "_ZGVsMxvl4") TLI_DEFINE_VECFUNC("nextafter", "armpl_vnextafterq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vv") TLI_DEFINE_VECFUNC("nextafterf", "armpl_vnextafterq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vv") @@ -1035,13 +1033,9 @@ TLI_DEFINE_VECFUNC("llvm.sin.f32", "armpl_svsin_f32_x", SCALABLE(4), MASKED, "_Z TLI_DEFINE_VECFUNC("sincos", "armpl_vsincosq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vl8l8") TLI_DEFINE_VECFUNC("sincosf", "armpl_vsincosq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vl4l4") -TLI_DEFINE_VECFUNC("sincos", "armpl_svsincos_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvl8l8") -TLI_DEFINE_VECFUNC("sincosf", "armpl_svsincos_f32_x", SCALABLE(4), MASKED, "_ZGVsMxvl4l4") TLI_DEFINE_VECFUNC("sincospi", "armpl_vsincospiq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vl8l8") TLI_DEFINE_VECFUNC("sincospif", "armpl_vsincospiq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vl4l4") -TLI_DEFINE_VECFUNC("sincospi", "armpl_svsincospi_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvl8l8") -TLI_DEFINE_VECFUNC("sincospif", "armpl_svsincospi_f32_x", SCALABLE(4), MASKED, "_ZGVsMxvl4l4") TLI_DEFINE_VECFUNC("sinh", "armpl_vsinhq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v") TLI_DEFINE_VECFUNC("sinhf", "armpl_vsinhq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v") diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/LivePhysRegs.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/LivePhysRegs.h index 76bb34d270a2..1d40b1cbb0ea 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/LivePhysRegs.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/LivePhysRegs.h @@ -193,11 +193,18 @@ void addLiveIns(MachineBasicBlock &MBB, const LivePhysRegs &LiveRegs); void computeAndAddLiveIns(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB); -/// Convenience function for recomputing live-in's for \p MBB. -static inline void recomputeLiveIns(MachineBasicBlock &MBB) { +/// Convenience function for recomputing live-in's for a MBB. Returns true if +/// any changes were made. +static inline bool recomputeLiveIns(MachineBasicBlock &MBB) { LivePhysRegs LPR; + auto oldLiveIns = MBB.getLiveIns(); + MBB.clearLiveIns(); computeAndAddLiveIns(LPR, MBB); + MBB.sortUniqueLiveIns(); + + auto newLiveIns = MBB.getLiveIns(); + return oldLiveIns != newLiveIns; } } // end namespace llvm diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineBasicBlock.h index c84fd281c6a5..dc2035fa598c 100644 --- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -111,6 +111,10 @@ public: RegisterMaskPair(MCPhysReg PhysReg, LaneBitmask LaneMask) : PhysReg(PhysReg), LaneMask(LaneMask) {} + + bool operator==(const RegisterMaskPair &other) const { + return PhysReg == other.PhysReg && LaneMask == other.LaneMask; + } }; private: @@ -473,6 +477,8 @@ public: /// Remove entry from the livein set and return iterator to the next. livein_iterator removeLiveIn(livein_iterator I); + std::vector<RegisterMaskPair> getLiveIns() const { return LiveIns; } + class liveout_iterator { public: using iterator_category = std::input_iterator_tag; diff --git a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index e6db9da5526a..c5f43d17d1c1 100644 --- a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2601,6 +2601,11 @@ def int_amdgcn_ds_bvh_stack_rtn : [ImmArg<ArgIndex<3>>, IntrWillReturn, IntrNoCallback, IntrNoFree] >; +def int_amdgcn_s_wait_event_export_ready : + ClangBuiltin<"__builtin_amdgcn_s_wait_event_export_ready">, + Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn] +>; + // WMMA (Wave Matrix Multiply-Accumulate) intrinsics // // These operations perform a matrix multiplication and accumulation of @@ -2608,10 +2613,10 @@ def int_amdgcn_ds_bvh_stack_rtn : class AMDGPUWmmaIntrinsic<LLVMType AB, LLVMType CD> : Intrinsic< - [CD], // %D + [CD], // %D [ AB, // %A - AB, // %B + LLVMMatchType<1>, // %B LLVMMatchType<0>, // %C ], [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree] @@ -2619,49 +2624,50 @@ class AMDGPUWmmaIntrinsic<LLVMType AB, LLVMType CD> : class AMDGPUWmmaIntrinsicOPSEL<LLVMType AB, LLVMType CD> : Intrinsic< - [CD], // %D + [CD], // %D [ AB, // %A - AB, // %B + LLVMMatchType<1>, // %B LLVMMatchType<0>, // %C - llvm_i1_ty, // %high + llvm_i1_ty, // %high (op_sel) for GFX11, 0 for GFX12 ], [IntrNoMem, IntrConvergent, ImmArg<ArgIndex<3>>, IntrWillReturn, IntrNoCallback, IntrNoFree] >; class AMDGPUWmmaIntrinsicIU<LLVMType AB, LLVMType CD> : Intrinsic< - [CD], // %D + [CD], // %D [ llvm_i1_ty, // %A_sign AB, // %A llvm_i1_ty, // %B_sign - AB, // %B + LLVMMatchType<1>, // %B LLVMMatchType<0>, // %C llvm_i1_ty, // %clamp ], [IntrNoMem, IntrConvergent, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<5>>, IntrWillReturn, IntrNoCallback, IntrNoFree] >; -def int_amdgcn_wmma_f32_16x16x16_f16 : AMDGPUWmmaIntrinsic<llvm_v16f16_ty, llvm_anyfloat_ty>; -def int_amdgcn_wmma_f32_16x16x16_bf16 : AMDGPUWmmaIntrinsic<llvm_v16i16_ty, llvm_anyfloat_ty>; -// The regular, untied f16/bf16 wmma intrinsics only write to one half -// of the registers (set via the op_sel bit). -// The content of the other 16-bit of the registers is undefined. -def int_amdgcn_wmma_f16_16x16x16_f16 : AMDGPUWmmaIntrinsicOPSEL<llvm_v16f16_ty, llvm_anyfloat_ty>; -def int_amdgcn_wmma_bf16_16x16x16_bf16 : AMDGPUWmmaIntrinsicOPSEL<llvm_v16i16_ty, llvm_anyint_ty>; -// The tied versions of the f16/bf16 wmma intrinsics tie the destination matrix -// registers to the input accumulator registers. -// Essentially, the content of the other 16-bit is preserved from the input. -def int_amdgcn_wmma_f16_16x16x16_f16_tied : AMDGPUWmmaIntrinsicOPSEL<llvm_v16f16_ty, llvm_anyfloat_ty>; -def int_amdgcn_wmma_bf16_16x16x16_bf16_tied : AMDGPUWmmaIntrinsicOPSEL<llvm_v16i16_ty, llvm_anyint_ty>; -def int_amdgcn_wmma_i32_16x16x16_iu8 : AMDGPUWmmaIntrinsicIU<llvm_v4i32_ty, llvm_anyint_ty>; -def int_amdgcn_wmma_i32_16x16x16_iu4 : AMDGPUWmmaIntrinsicIU<llvm_v2i32_ty, llvm_anyint_ty>; +// WMMA GFX11Only -def int_amdgcn_s_wait_event_export_ready : - ClangBuiltin<"__builtin_amdgcn_s_wait_event_export_ready">, - Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn] ->; +// The OPSEL intrinsics read from and write to one half of the registers, selected by the op_sel bit. +// The tied versions of the f16/bf16 wmma intrinsics tie the destination matrix registers to the input accumulator registers. +// The content of the other 16-bit half is preserved from the input. +def int_amdgcn_wmma_f16_16x16x16_f16_tied : AMDGPUWmmaIntrinsicOPSEL<llvm_anyfloat_ty, llvm_anyfloat_ty>; +def int_amdgcn_wmma_bf16_16x16x16_bf16_tied : AMDGPUWmmaIntrinsicOPSEL<llvm_anyint_ty, llvm_anyint_ty>; + +// WMMA GFX11Plus + +def int_amdgcn_wmma_f32_16x16x16_f16 : AMDGPUWmmaIntrinsic<llvm_anyfloat_ty, llvm_anyfloat_ty>; +def int_amdgcn_wmma_f32_16x16x16_bf16 : AMDGPUWmmaIntrinsic<llvm_anyint_ty, llvm_anyfloat_ty>; +def int_amdgcn_wmma_i32_16x16x16_iu8 : AMDGPUWmmaIntrinsicIU<llvm_anyint_ty, llvm_anyint_ty>; +def int_amdgcn_wmma_i32_16x16x16_iu4 : AMDGPUWmmaIntrinsicIU<llvm_anyint_ty, llvm_anyint_ty>; + +// GFX11: The OPSEL intrinsics read from and write to one half of the registers, selected by the op_sel bit. +// The content of the other 16-bit half is undefined. +// GFX12: The op_sel bit must be 0. +def int_amdgcn_wmma_f16_16x16x16_f16 : AMDGPUWmmaIntrinsicOPSEL<llvm_anyfloat_ty, llvm_anyfloat_ty>; +def int_amdgcn_wmma_bf16_16x16x16_bf16 : AMDGPUWmmaIntrinsicOPSEL<llvm_anyint_ty, llvm_anyint_ty>; //===----------------------------------------------------------------------===// // GFX12 Intrinsics @@ -2681,6 +2687,65 @@ def int_amdgcn_permlanex16_var : ClangBuiltin<"__builtin_amdgcn_permlanex16_var" [IntrNoMem, IntrConvergent, IntrWillReturn, ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, IntrNoCallback, IntrNoFree]>; + +// WMMA (Wave Matrix Multiply-Accumulate) intrinsics +// +// These operations perform a matrix multiplication and accumulation of +// the form: D = A * B + C . + +// A and B are <8 x fp8> or <8 x bf8>, but since fp8 and bf8 are not supported by llvm we use <2 x i32>. +def int_amdgcn_wmma_f32_16x16x16_fp8_fp8 : AMDGPUWmmaIntrinsic<llvm_anyint_ty, llvm_anyfloat_ty>; +def int_amdgcn_wmma_f32_16x16x16_fp8_bf8 : AMDGPUWmmaIntrinsic<llvm_anyint_ty, llvm_anyfloat_ty>; +def int_amdgcn_wmma_f32_16x16x16_bf8_fp8 : AMDGPUWmmaIntrinsic<llvm_anyint_ty, llvm_anyfloat_ty>; +def int_amdgcn_wmma_f32_16x16x16_bf8_bf8 : AMDGPUWmmaIntrinsic<llvm_anyint_ty, llvm_anyfloat_ty>; +// A and B are <16 x iu4>. +def int_amdgcn_wmma_i32_16x16x32_iu4 : AMDGPUWmmaIntrinsicIU<llvm_anyint_ty, llvm_anyint_ty>; + +// SWMMAC (Wave Matrix(sparse) Multiply-Accumulate) intrinsics +// +// These operations perform a sparse matrix multiplication and accumulation of +// the form: D = A * B + C. +// A is sparse matrix, half the size of B, and is expanded using sparsity index. + +class AMDGPUSWmmacIntrinsicIdx<LLVMType A, LLVMType B, LLVMType CD, LLVMType Index> : + Intrinsic< + [CD], // %D + [ + A, // %A + B, // %B + LLVMMatchType<0>, // %C + Index // %Sparsity index for A + ], + [IntrNoMem, IntrConvergent, IntrWillReturn] +>; + +class AMDGPUSWmmacIntrinsicIUIdx<LLVMType A, LLVMType B, LLVMType CD, LLVMType Index> : + Intrinsic< + [CD], // %D + [ + llvm_i1_ty, // %A_sign + A, // %A + llvm_i1_ty, // %B_sign + B, // %B + LLVMMatchType<0>, // %C + Index, // %Sparsity index for A + llvm_i1_ty, // %clamp + ], + [IntrNoMem, IntrConvergent, IntrWillReturn, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<6>>] +>; + +def int_amdgcn_swmmac_f32_16x16x32_f16 : AMDGPUSWmmacIntrinsicIdx<llvm_anyfloat_ty, llvm_anyfloat_ty, llvm_anyfloat_ty, llvm_anyint_ty>; +def int_amdgcn_swmmac_f32_16x16x32_bf16 : AMDGPUSWmmacIntrinsicIdx<llvm_anyint_ty, llvm_anyint_ty, llvm_anyfloat_ty, llvm_anyint_ty>; +def int_amdgcn_swmmac_f16_16x16x32_f16 : AMDGPUSWmmacIntrinsicIdx<llvm_anyfloat_ty, llvm_anyfloat_ty, llvm_anyfloat_ty, llvm_anyint_ty>; +def int_amdgcn_swmmac_bf16_16x16x32_bf16 : AMDGPUSWmmacIntrinsicIdx<llvm_anyint_ty, llvm_anyint_ty, llvm_anyint_ty, llvm_anyint_ty>; +def int_amdgcn_swmmac_i32_16x16x32_iu8 : AMDGPUSWmmacIntrinsicIUIdx<llvm_anyint_ty, llvm_anyint_ty, llvm_anyint_ty, llvm_anyint_ty>; +def int_amdgcn_swmmac_i32_16x16x32_iu4 : AMDGPUSWmmacIntrinsicIUIdx<llvm_anyint_ty, llvm_anyint_ty, llvm_anyint_ty, llvm_anyint_ty>; +def int_amdgcn_swmmac_i32_16x16x64_iu4 : AMDGPUSWmmacIntrinsicIUIdx<llvm_anyint_ty, llvm_anyint_ty, llvm_anyint_ty, llvm_anyint_ty>; +def int_amdgcn_swmmac_f32_16x16x32_fp8_fp8 : AMDGPUSWmmacIntrinsicIdx<llvm_anyint_ty, llvm_anyint_ty, llvm_anyfloat_ty, llvm_anyint_ty>; +def int_amdgcn_swmmac_f32_16x16x32_fp8_bf8 : AMDGPUSWmmacIntrinsicIdx<llvm_anyint_ty, llvm_anyint_ty, llvm_anyfloat_ty, llvm_anyint_ty>; +def int_amdgcn_swmmac_f32_16x16x32_bf8_fp8 : AMDGPUSWmmacIntrinsicIdx<llvm_anyint_ty, llvm_anyint_ty, llvm_anyfloat_ty, llvm_anyint_ty>; +def int_amdgcn_swmmac_f32_16x16x32_bf8_bf8 : AMDGPUSWmmacIntrinsicIdx<llvm_anyint_ty, llvm_anyint_ty, llvm_anyfloat_ty, llvm_anyint_ty>; + def int_amdgcn_global_atomic_ordered_add_b64 : AMDGPUAtomicRtn<llvm_i64_ty, global_ptr_ty>; def int_amdgcn_flat_atomic_fmin_num : AMDGPUAtomicRtn<llvm_anyfloat_ty>; @@ -2712,6 +2777,10 @@ class AMDGPULoadTr<LLVMType ptr_ty>: def int_amdgcn_global_load_tr : AMDGPULoadTr<global_ptr_ty>; +// i32 @llvm.amdgcn.wave.id() +def int_amdgcn_wave_id : + DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>; + //===----------------------------------------------------------------------===// // Deep learning intrinsics. //===----------------------------------------------------------------------===// diff --git a/contrib/llvm-project/llvm/include/llvm/Support/X86FoldTablesUtils.h b/contrib/llvm-project/llvm/include/llvm/Support/X86FoldTablesUtils.h index ed244febc38d..77d32cc7fb37 100644 --- a/contrib/llvm-project/llvm/include/llvm/Support/X86FoldTablesUtils.h +++ b/contrib/llvm-project/llvm/include/llvm/Support/X86FoldTablesUtils.h @@ -46,11 +46,12 @@ enum { // Broadcast type. // (stored in bits 12 - 14) TB_BCAST_TYPE_SHIFT = TB_ALIGN_SHIFT + 3, - TB_BCAST_D = 0 << TB_BCAST_TYPE_SHIFT, - TB_BCAST_Q = 1 << TB_BCAST_TYPE_SHIFT, - TB_BCAST_SS = 2 << TB_BCAST_TYPE_SHIFT, - TB_BCAST_SD = 3 << TB_BCAST_TYPE_SHIFT, - TB_BCAST_SH = 4 << TB_BCAST_TYPE_SHIFT, + TB_BCAST_W = 0 << TB_BCAST_TYPE_SHIFT, + TB_BCAST_D = 1 << TB_BCAST_TYPE_SHIFT, + TB_BCAST_Q = 2 << TB_BCAST_TYPE_SHIFT, + TB_BCAST_SS = 3 << TB_BCAST_TYPE_SHIFT, + TB_BCAST_SD = 4 << TB_BCAST_TYPE_SHIFT, + TB_BCAST_SH = 5 << TB_BCAST_TYPE_SHIFT, TB_BCAST_MASK = 0x7 << TB_BCAST_TYPE_SHIFT, // Unused bits 15-16 diff --git a/contrib/llvm-project/llvm/include/llvm/Target/TargetInstrPredicate.td b/contrib/llvm-project/llvm/include/llvm/Target/TargetInstrPredicate.td index 82c4c7b23a49..b5419cb9f386 100644 --- a/contrib/llvm-project/llvm/include/llvm/Target/TargetInstrPredicate.td +++ b/contrib/llvm-project/llvm/include/llvm/Target/TargetInstrPredicate.td @@ -152,6 +152,34 @@ class CheckImmOperand_s<int Index, string Value> : CheckOperandBase<Index> { string ImmVal = Value; } +// Check that the operand at position `Index` is less than `Imm`. +// If field `FunctionMapper` is a non-empty string, then function +// `FunctionMapper` is applied to the operand value, and the return value is then +// compared against `Imm`. +class CheckImmOperandLT<int Index, int Imm> : CheckOperandBase<Index> { + int ImmVal = Imm; +} + +// Check that the operand at position `Index` is greater than `Imm`. +// If field `FunctionMapper` is a non-empty string, then function +// `FunctionMapper` is applied to the operand value, and the return value is then +// compared against `Imm`. +class CheckImmOperandGT<int Index, int Imm> : CheckOperandBase<Index> { + int ImmVal = Imm; +} + +// Check that the operand at position `Index` is less than or equal to `Imm`. +// If field `FunctionMapper` is a non-empty string, then function +// `FunctionMapper` is applied to the operand value, and the return value is then +// compared against `Imm`. +class CheckImmOperandLE<int Index, int Imm> : CheckNot<CheckImmOperandGT<Index, Imm>>; + +// Check that the operand at position `Index` is greater than or equal to `Imm`. +// If field `FunctionMapper` is a non-empty string, then function +// `FunctionMapper` is applied to the operand value, and the return value is then +// compared against `Imm`. +class CheckImmOperandGE<int Index, int Imm> : CheckNot<CheckImmOperandLT<Index, Imm>>; + // Expands to a call to `FunctionMapper` if field `FunctionMapper` is set. // Otherwise, it expands to a CheckNot<CheckInvalidRegOperand<Index>>. class CheckRegOperandSimple<int Index> : CheckOperandBase<Index>; @@ -203,6 +231,12 @@ class CheckAll<list<MCInstPredicate> Sequence> class CheckAny<list<MCInstPredicate> Sequence> : CheckPredicateSequence<Sequence>; +// Check that the operand at position `Index` is in range [Start, End]. +// If field `FunctionMapper` is a non-empty string, then function +// `FunctionMapper` is applied to the operand value, and the return value is then +// compared against range [Start, End]. +class CheckImmOperandRange<int Index, int Start, int End> + : CheckAll<[CheckImmOperandGE<Index, Start>, CheckImmOperandLE<Index, End>]>; // Used to expand the body of a function predicate. See the definition of // TIIPredicate below. diff --git a/contrib/llvm-project/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/contrib/llvm-project/llvm/include/llvm/TargetParser/AArch64TargetParser.h index 623fdc21ba65..6d82748d8004 100644 --- a/contrib/llvm-project/llvm/include/llvm/TargetParser/AArch64TargetParser.h +++ b/contrib/llvm-project/llvm/include/llvm/TargetParser/AArch64TargetParser.h @@ -813,7 +813,8 @@ struct CpuAlias { StringRef Name; }; -inline constexpr CpuAlias CpuAliases[] = {{"grace", "neoverse-v2"}}; +inline constexpr CpuAlias CpuAliases[] = {{"cobalt-100", "neoverse-n2"}, + {"grace", "neoverse-v2"}}; bool getExtensionFeatures( const AArch64::ExtensionBitset &Extensions, diff --git a/contrib/llvm-project/llvm/include/llvm/TargetParser/Triple.h b/contrib/llvm-project/llvm/include/llvm/TargetParser/Triple.h index 870dc75b1c1f..49ec8de9c528 100644 --- a/contrib/llvm-project/llvm/include/llvm/TargetParser/Triple.h +++ b/contrib/llvm-project/llvm/include/llvm/TargetParser/Triple.h @@ -1033,11 +1033,11 @@ public: isWindowsCygwinEnvironment() || isOHOSFamily(); } - /// Tests whether the target uses TLS Descriptor by default. + /// True if the target supports both general-dynamic and TLSDESC, and TLSDESC + /// is enabled by default. bool hasDefaultTLSDESC() const { // TODO: Improve check for other platforms, like Android, and RISC-V - // Note: This is currently only used on RISC-V. - return isOSBinFormatELF() && isAArch64(); + return false; } /// Tests whether the target uses -data-sections as default. |