aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/include/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/include/llvm')
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/AliasAnalysis.h7
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/BasicAliasAnalysis.h14
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/BranchProbabilityInfo.h13
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/Loads.h12
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Analysis/VecFuncs.def18
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/LivePhysRegs.h11
-rw-r--r--contrib/llvm-project/llvm/include/llvm/CodeGen/MachineBasicBlock.h6
-rw-r--r--contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAMDGPU.td119
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Support/X86FoldTablesUtils.h11
-rw-r--r--contrib/llvm-project/llvm/include/llvm/Target/TargetInstrPredicate.td34
-rw-r--r--contrib/llvm-project/llvm/include/llvm/TargetParser/AArch64TargetParser.h3
-rw-r--r--contrib/llvm-project/llvm/include/llvm/TargetParser/Triple.h6
12 files changed, 194 insertions, 60 deletions
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/AliasAnalysis.h b/contrib/llvm-project/llvm/include/llvm/Analysis/AliasAnalysis.h
index d6f732d35fd4..e8e4f491be5a 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/AliasAnalysis.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/AliasAnalysis.h
@@ -287,6 +287,10 @@ public:
/// store %l, ...
bool MayBeCrossIteration = false;
+ /// Whether alias analysis is allowed to use the dominator tree, for use by
+ /// passes that lazily update the DT while performing AA queries.
+ bool UseDominatorTree = true;
+
AAQueryInfo(AAResults &AAR, CaptureInfo *CI) : AAR(AAR), CI(CI) {}
};
@@ -668,6 +672,9 @@ public:
void enableCrossIterationMode() {
AAQI.MayBeCrossIteration = true;
}
+
+ /// Disable the use of the dominator tree during alias analysis queries.
+ void disableDominatorTree() { AAQI.UseDominatorTree = false; }
};
/// Temporary typedef for legacy code that uses a generic \c AliasAnalysis
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/BasicAliasAnalysis.h b/contrib/llvm-project/llvm/include/llvm/Analysis/BasicAliasAnalysis.h
index afc1811239f2..7eca82729430 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/BasicAliasAnalysis.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/BasicAliasAnalysis.h
@@ -43,20 +43,26 @@ class BasicAAResult : public AAResultBase {
const Function &F;
const TargetLibraryInfo &TLI;
AssumptionCache ∾
- DominatorTree *DT;
+ /// Use getDT() instead of accessing this member directly, in order to
+ /// respect the AAQI.UseDominatorTree option.
+ DominatorTree *DT_;
+
+ DominatorTree *getDT(const AAQueryInfo &AAQI) const {
+ return AAQI.UseDominatorTree ? DT_ : nullptr;
+ }
public:
BasicAAResult(const DataLayout &DL, const Function &F,
const TargetLibraryInfo &TLI, AssumptionCache &AC,
DominatorTree *DT = nullptr)
- : DL(DL), F(F), TLI(TLI), AC(AC), DT(DT) {}
+ : DL(DL), F(F), TLI(TLI), AC(AC), DT_(DT) {}
BasicAAResult(const BasicAAResult &Arg)
: AAResultBase(Arg), DL(Arg.DL), F(Arg.F), TLI(Arg.TLI), AC(Arg.AC),
- DT(Arg.DT) {}
+ DT_(Arg.DT_) {}
BasicAAResult(BasicAAResult &&Arg)
: AAResultBase(std::move(Arg)), DL(Arg.DL), F(Arg.F), TLI(Arg.TLI),
- AC(Arg.AC), DT(Arg.DT) {}
+ AC(Arg.AC), DT_(Arg.DT_) {}
/// Handle invalidation events in the new pass manager.
bool invalidate(Function &Fn, const PreservedAnalyses &PA,
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/BranchProbabilityInfo.h b/contrib/llvm-project/llvm/include/llvm/Analysis/BranchProbabilityInfo.h
index 6b9d17818201..91e1872e9bd6 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/BranchProbabilityInfo.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/BranchProbabilityInfo.h
@@ -122,16 +122,23 @@ public:
}
BranchProbabilityInfo(BranchProbabilityInfo &&Arg)
- : Probs(std::move(Arg.Probs)), LastF(Arg.LastF),
- EstimatedBlockWeight(std::move(Arg.EstimatedBlockWeight)) {}
+ : Handles(std::move(Arg.Handles)), Probs(std::move(Arg.Probs)),
+ LastF(Arg.LastF),
+ EstimatedBlockWeight(std::move(Arg.EstimatedBlockWeight)) {
+ for (auto &Handle : Handles)
+ Handle.setBPI(this);
+ }
BranchProbabilityInfo(const BranchProbabilityInfo &) = delete;
BranchProbabilityInfo &operator=(const BranchProbabilityInfo &) = delete;
BranchProbabilityInfo &operator=(BranchProbabilityInfo &&RHS) {
releaseMemory();
+ Handles = std::move(RHS.Handles);
Probs = std::move(RHS.Probs);
EstimatedBlockWeight = std::move(RHS.EstimatedBlockWeight);
+ for (auto &Handle : Handles)
+ Handle.setBPI(this);
return *this;
}
@@ -279,6 +286,8 @@ private:
}
public:
+ void setBPI(BranchProbabilityInfo *BPI) { this->BPI = BPI; }
+
BasicBlockCallbackVH(const Value *V, BranchProbabilityInfo *BPI = nullptr)
: CallbackVH(const_cast<Value *>(V)), BPI(BPI) {}
};
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/Loads.h b/contrib/llvm-project/llvm/include/llvm/Analysis/Loads.h
index 2880ed33a34c..0926093bba99 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/Loads.h
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/Loads.h
@@ -18,7 +18,7 @@
namespace llvm {
-class AAResults;
+class BatchAAResults;
class AssumptionCache;
class DataLayout;
class DominatorTree;
@@ -129,11 +129,10 @@ extern cl::opt<unsigned> DefMaxInstsToScan;
/// location in memory, as opposed to the value operand of a store.
///
/// \returns The found value, or nullptr if no value is found.
-Value *FindAvailableLoadedValue(LoadInst *Load,
- BasicBlock *ScanBB,
+Value *FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB,
BasicBlock::iterator &ScanFrom,
unsigned MaxInstsToScan = DefMaxInstsToScan,
- AAResults *AA = nullptr,
+ BatchAAResults *AA = nullptr,
bool *IsLoadCSE = nullptr,
unsigned *NumScanedInst = nullptr);
@@ -141,7 +140,8 @@ Value *FindAvailableLoadedValue(LoadInst *Load,
/// FindAvailableLoadedValue() for the case where we are not interested in
/// finding the closest clobbering instruction if no available load is found.
/// This overload cannot be used to scan across multiple blocks.
-Value *FindAvailableLoadedValue(LoadInst *Load, AAResults &AA, bool *IsLoadCSE,
+Value *FindAvailableLoadedValue(LoadInst *Load, BatchAAResults &AA,
+ bool *IsLoadCSE,
unsigned MaxInstsToScan = DefMaxInstsToScan);
/// Scan backwards to see if we have the value of the given pointer available
@@ -170,7 +170,7 @@ Value *FindAvailableLoadedValue(LoadInst *Load, AAResults &AA, bool *IsLoadCSE,
Value *findAvailablePtrLoadStore(const MemoryLocation &Loc, Type *AccessTy,
bool AtLeastAtomic, BasicBlock *ScanBB,
BasicBlock::iterator &ScanFrom,
- unsigned MaxInstsToScan, AAResults *AA,
+ unsigned MaxInstsToScan, BatchAAResults *AA,
bool *IsLoadCSE, unsigned *NumScanedInst);
/// Returns true if a pointer value \p A can be replace with another pointer
diff --git a/contrib/llvm-project/llvm/include/llvm/Analysis/VecFuncs.def b/contrib/llvm-project/llvm/include/llvm/Analysis/VecFuncs.def
index f09e12f3038c..07edf68c667a 100644
--- a/contrib/llvm-project/llvm/include/llvm/Analysis/VecFuncs.def
+++ b/contrib/llvm-project/llvm/include/llvm/Analysis/VecFuncs.def
@@ -771,8 +771,8 @@ TLI_DEFINE_VECFUNC("log2f", "_ZGVsMxv_log2f", SCALABLE(4), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("llvm.log2.f64", "_ZGVsMxv_log2", SCALABLE(2), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("llvm.log2.f32", "_ZGVsMxv_log2f", SCALABLE(4), MASKED, "_ZGVsMxv")
-TLI_DEFINE_VECFUNC("modf", "_ZGVsMxvl8_modf", SCALABLE(2), MASKED, "_ZGVsMxvl8")
-TLI_DEFINE_VECFUNC("modff", "_ZGVsMxvl4_modff", SCALABLE(4), MASKED, "_ZGVsMxvl4")
+TLI_DEFINE_VECFUNC("modf", "_ZGVsNxvl8_modf", SCALABLE(2), NOMASK, "_ZGVsNxvl8")
+TLI_DEFINE_VECFUNC("modff", "_ZGVsNxvl4_modff", SCALABLE(4), NOMASK, "_ZGVsNxvl4")
TLI_DEFINE_VECFUNC("nextafter", "_ZGVsMxvv_nextafter", SCALABLE(2), MASKED, "_ZGVsMxvv")
TLI_DEFINE_VECFUNC("nextafterf", "_ZGVsMxvv_nextafterf", SCALABLE(4), MASKED, "_ZGVsMxvv")
@@ -787,11 +787,11 @@ TLI_DEFINE_VECFUNC("sinf", "_ZGVsMxv_sinf", SCALABLE(4), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGVsMxv_sin", SCALABLE(2), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVsMxv_sinf", SCALABLE(4), MASKED, "_ZGVsMxv")
-TLI_DEFINE_VECFUNC("sincos", "_ZGVsMxvl8l8_sincos", SCALABLE(2), MASKED, "_ZGVsMxvl8l8")
-TLI_DEFINE_VECFUNC("sincosf", "_ZGVsMxvl4l4_sincosf", SCALABLE(4), MASKED, "_ZGVsMxvl4l4")
+TLI_DEFINE_VECFUNC("sincos", "_ZGVsNxvl8l8_sincos", SCALABLE(2), NOMASK, "_ZGVsNxvl8l8")
+TLI_DEFINE_VECFUNC("sincosf", "_ZGVsNxvl4l4_sincosf", SCALABLE(4), NOMASK, "_ZGVsNxvl4l4")
-TLI_DEFINE_VECFUNC("sincospi", "_ZGVsMxvl8l8_sincospi", SCALABLE(2), MASKED, "_ZGVsMxvl8l8")
-TLI_DEFINE_VECFUNC("sincospif", "_ZGVsMxvl4l4_sincospif", SCALABLE(4), MASKED, "_ZGVsMxvl4l4")
+TLI_DEFINE_VECFUNC("sincospi", "_ZGVsNxvl8l8_sincospi", SCALABLE(2), NOMASK, "_ZGVsNxvl8l8")
+TLI_DEFINE_VECFUNC("sincospif", "_ZGVsNxvl4l4_sincospif", SCALABLE(4), NOMASK, "_ZGVsNxvl4l4")
TLI_DEFINE_VECFUNC("sinh", "_ZGVsMxv_sinh", SCALABLE(2), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("sinhf", "_ZGVsMxv_sinhf", SCALABLE(4), MASKED, "_ZGVsMxv")
@@ -1005,8 +1005,6 @@ TLI_DEFINE_VECFUNC("llvm.log2.f32", "armpl_svlog2_f32_x", SCALABLE(4), MASKED, "
TLI_DEFINE_VECFUNC("modf", "armpl_vmodfq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vl8")
TLI_DEFINE_VECFUNC("modff", "armpl_vmodfq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vl4")
-TLI_DEFINE_VECFUNC("modf", "armpl_svmodf_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvl8")
-TLI_DEFINE_VECFUNC("modff", "armpl_svmodf_f32_x", SCALABLE(4), MASKED, "_ZGVsMxvl4")
TLI_DEFINE_VECFUNC("nextafter", "armpl_vnextafterq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vv")
TLI_DEFINE_VECFUNC("nextafterf", "armpl_vnextafterq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vv")
@@ -1035,13 +1033,9 @@ TLI_DEFINE_VECFUNC("llvm.sin.f32", "armpl_svsin_f32_x", SCALABLE(4), MASKED, "_Z
TLI_DEFINE_VECFUNC("sincos", "armpl_vsincosq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vl8l8")
TLI_DEFINE_VECFUNC("sincosf", "armpl_vsincosq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vl4l4")
-TLI_DEFINE_VECFUNC("sincos", "armpl_svsincos_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvl8l8")
-TLI_DEFINE_VECFUNC("sincosf", "armpl_svsincos_f32_x", SCALABLE(4), MASKED, "_ZGVsMxvl4l4")
TLI_DEFINE_VECFUNC("sincospi", "armpl_vsincospiq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vl8l8")
TLI_DEFINE_VECFUNC("sincospif", "armpl_vsincospiq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vl4l4")
-TLI_DEFINE_VECFUNC("sincospi", "armpl_svsincospi_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvl8l8")
-TLI_DEFINE_VECFUNC("sincospif", "armpl_svsincospi_f32_x", SCALABLE(4), MASKED, "_ZGVsMxvl4l4")
TLI_DEFINE_VECFUNC("sinh", "armpl_vsinhq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("sinhf", "armpl_vsinhq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/LivePhysRegs.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/LivePhysRegs.h
index 76bb34d270a2..1d40b1cbb0ea 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/LivePhysRegs.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/LivePhysRegs.h
@@ -193,11 +193,18 @@ void addLiveIns(MachineBasicBlock &MBB, const LivePhysRegs &LiveRegs);
void computeAndAddLiveIns(LivePhysRegs &LiveRegs,
MachineBasicBlock &MBB);
-/// Convenience function for recomputing live-in's for \p MBB.
-static inline void recomputeLiveIns(MachineBasicBlock &MBB) {
+/// Convenience function for recomputing live-in's for a MBB. Returns true if
+/// any changes were made.
+static inline bool recomputeLiveIns(MachineBasicBlock &MBB) {
LivePhysRegs LPR;
+ auto oldLiveIns = MBB.getLiveIns();
+
MBB.clearLiveIns();
computeAndAddLiveIns(LPR, MBB);
+ MBB.sortUniqueLiveIns();
+
+ auto newLiveIns = MBB.getLiveIns();
+ return oldLiveIns != newLiveIns;
}
} // end namespace llvm
diff --git a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index c84fd281c6a5..dc2035fa598c 100644
--- a/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/contrib/llvm-project/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -111,6 +111,10 @@ public:
RegisterMaskPair(MCPhysReg PhysReg, LaneBitmask LaneMask)
: PhysReg(PhysReg), LaneMask(LaneMask) {}
+
+ bool operator==(const RegisterMaskPair &other) const {
+ return PhysReg == other.PhysReg && LaneMask == other.LaneMask;
+ }
};
private:
@@ -473,6 +477,8 @@ public:
/// Remove entry from the livein set and return iterator to the next.
livein_iterator removeLiveIn(livein_iterator I);
+ std::vector<RegisterMaskPair> getLiveIns() const { return LiveIns; }
+
class liveout_iterator {
public:
using iterator_category = std::input_iterator_tag;
diff --git a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index e6db9da5526a..c5f43d17d1c1 100644
--- a/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/contrib/llvm-project/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -2601,6 +2601,11 @@ def int_amdgcn_ds_bvh_stack_rtn :
[ImmArg<ArgIndex<3>>, IntrWillReturn, IntrNoCallback, IntrNoFree]
>;
+def int_amdgcn_s_wait_event_export_ready :
+ ClangBuiltin<"__builtin_amdgcn_s_wait_event_export_ready">,
+ Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]
+>;
+
// WMMA (Wave Matrix Multiply-Accumulate) intrinsics
//
// These operations perform a matrix multiplication and accumulation of
@@ -2608,10 +2613,10 @@ def int_amdgcn_ds_bvh_stack_rtn :
class AMDGPUWmmaIntrinsic<LLVMType AB, LLVMType CD> :
Intrinsic<
- [CD], // %D
+ [CD], // %D
[
AB, // %A
- AB, // %B
+ LLVMMatchType<1>, // %B
LLVMMatchType<0>, // %C
],
[IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]
@@ -2619,49 +2624,50 @@ class AMDGPUWmmaIntrinsic<LLVMType AB, LLVMType CD> :
class AMDGPUWmmaIntrinsicOPSEL<LLVMType AB, LLVMType CD> :
Intrinsic<
- [CD], // %D
+ [CD], // %D
[
AB, // %A
- AB, // %B
+ LLVMMatchType<1>, // %B
LLVMMatchType<0>, // %C
- llvm_i1_ty, // %high
+ llvm_i1_ty, // %high (op_sel) for GFX11, 0 for GFX12
],
[IntrNoMem, IntrConvergent, ImmArg<ArgIndex<3>>, IntrWillReturn, IntrNoCallback, IntrNoFree]
>;
class AMDGPUWmmaIntrinsicIU<LLVMType AB, LLVMType CD> :
Intrinsic<
- [CD], // %D
+ [CD], // %D
[
llvm_i1_ty, // %A_sign
AB, // %A
llvm_i1_ty, // %B_sign
- AB, // %B
+ LLVMMatchType<1>, // %B
LLVMMatchType<0>, // %C
llvm_i1_ty, // %clamp
],
[IntrNoMem, IntrConvergent, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<5>>, IntrWillReturn, IntrNoCallback, IntrNoFree]
>;
-def int_amdgcn_wmma_f32_16x16x16_f16 : AMDGPUWmmaIntrinsic<llvm_v16f16_ty, llvm_anyfloat_ty>;
-def int_amdgcn_wmma_f32_16x16x16_bf16 : AMDGPUWmmaIntrinsic<llvm_v16i16_ty, llvm_anyfloat_ty>;
-// The regular, untied f16/bf16 wmma intrinsics only write to one half
-// of the registers (set via the op_sel bit).
-// The content of the other 16-bit of the registers is undefined.
-def int_amdgcn_wmma_f16_16x16x16_f16 : AMDGPUWmmaIntrinsicOPSEL<llvm_v16f16_ty, llvm_anyfloat_ty>;
-def int_amdgcn_wmma_bf16_16x16x16_bf16 : AMDGPUWmmaIntrinsicOPSEL<llvm_v16i16_ty, llvm_anyint_ty>;
-// The tied versions of the f16/bf16 wmma intrinsics tie the destination matrix
-// registers to the input accumulator registers.
-// Essentially, the content of the other 16-bit is preserved from the input.
-def int_amdgcn_wmma_f16_16x16x16_f16_tied : AMDGPUWmmaIntrinsicOPSEL<llvm_v16f16_ty, llvm_anyfloat_ty>;
-def int_amdgcn_wmma_bf16_16x16x16_bf16_tied : AMDGPUWmmaIntrinsicOPSEL<llvm_v16i16_ty, llvm_anyint_ty>;
-def int_amdgcn_wmma_i32_16x16x16_iu8 : AMDGPUWmmaIntrinsicIU<llvm_v4i32_ty, llvm_anyint_ty>;
-def int_amdgcn_wmma_i32_16x16x16_iu4 : AMDGPUWmmaIntrinsicIU<llvm_v2i32_ty, llvm_anyint_ty>;
+// WMMA GFX11Only
-def int_amdgcn_s_wait_event_export_ready :
- ClangBuiltin<"__builtin_amdgcn_s_wait_event_export_ready">,
- Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrWillReturn]
->;
+// The OPSEL intrinsics read from and write to one half of the registers, selected by the op_sel bit.
+// The tied versions of the f16/bf16 wmma intrinsics tie the destination matrix registers to the input accumulator registers.
+// The content of the other 16-bit half is preserved from the input.
+def int_amdgcn_wmma_f16_16x16x16_f16_tied : AMDGPUWmmaIntrinsicOPSEL<llvm_anyfloat_ty, llvm_anyfloat_ty>;
+def int_amdgcn_wmma_bf16_16x16x16_bf16_tied : AMDGPUWmmaIntrinsicOPSEL<llvm_anyint_ty, llvm_anyint_ty>;
+
+// WMMA GFX11Plus
+
+def int_amdgcn_wmma_f32_16x16x16_f16 : AMDGPUWmmaIntrinsic<llvm_anyfloat_ty, llvm_anyfloat_ty>;
+def int_amdgcn_wmma_f32_16x16x16_bf16 : AMDGPUWmmaIntrinsic<llvm_anyint_ty, llvm_anyfloat_ty>;
+def int_amdgcn_wmma_i32_16x16x16_iu8 : AMDGPUWmmaIntrinsicIU<llvm_anyint_ty, llvm_anyint_ty>;
+def int_amdgcn_wmma_i32_16x16x16_iu4 : AMDGPUWmmaIntrinsicIU<llvm_anyint_ty, llvm_anyint_ty>;
+
+// GFX11: The OPSEL intrinsics read from and write to one half of the registers, selected by the op_sel bit.
+// The content of the other 16-bit half is undefined.
+// GFX12: The op_sel bit must be 0.
+def int_amdgcn_wmma_f16_16x16x16_f16 : AMDGPUWmmaIntrinsicOPSEL<llvm_anyfloat_ty, llvm_anyfloat_ty>;
+def int_amdgcn_wmma_bf16_16x16x16_bf16 : AMDGPUWmmaIntrinsicOPSEL<llvm_anyint_ty, llvm_anyint_ty>;
//===----------------------------------------------------------------------===//
// GFX12 Intrinsics
@@ -2681,6 +2687,65 @@ def int_amdgcn_permlanex16_var : ClangBuiltin<"__builtin_amdgcn_permlanex16_var"
[IntrNoMem, IntrConvergent, IntrWillReturn,
ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>, IntrNoCallback, IntrNoFree]>;
+
+// WMMA (Wave Matrix Multiply-Accumulate) intrinsics
+//
+// These operations perform a matrix multiplication and accumulation of
+// the form: D = A * B + C .
+
+// A and B are <8 x fp8> or <8 x bf8>, but since fp8 and bf8 are not supported by llvm we use <2 x i32>.
+def int_amdgcn_wmma_f32_16x16x16_fp8_fp8 : AMDGPUWmmaIntrinsic<llvm_anyint_ty, llvm_anyfloat_ty>;
+def int_amdgcn_wmma_f32_16x16x16_fp8_bf8 : AMDGPUWmmaIntrinsic<llvm_anyint_ty, llvm_anyfloat_ty>;
+def int_amdgcn_wmma_f32_16x16x16_bf8_fp8 : AMDGPUWmmaIntrinsic<llvm_anyint_ty, llvm_anyfloat_ty>;
+def int_amdgcn_wmma_f32_16x16x16_bf8_bf8 : AMDGPUWmmaIntrinsic<llvm_anyint_ty, llvm_anyfloat_ty>;
+// A and B are <16 x iu4>.
+def int_amdgcn_wmma_i32_16x16x32_iu4 : AMDGPUWmmaIntrinsicIU<llvm_anyint_ty, llvm_anyint_ty>;
+
+// SWMMAC (Wave Matrix(sparse) Multiply-Accumulate) intrinsics
+//
+// These operations perform a sparse matrix multiplication and accumulation of
+// the form: D = A * B + C.
+// A is sparse matrix, half the size of B, and is expanded using sparsity index.
+
+class AMDGPUSWmmacIntrinsicIdx<LLVMType A, LLVMType B, LLVMType CD, LLVMType Index> :
+ Intrinsic<
+ [CD], // %D
+ [
+ A, // %A
+ B, // %B
+ LLVMMatchType<0>, // %C
+ Index // %Sparsity index for A
+ ],
+ [IntrNoMem, IntrConvergent, IntrWillReturn]
+>;
+
+class AMDGPUSWmmacIntrinsicIUIdx<LLVMType A, LLVMType B, LLVMType CD, LLVMType Index> :
+ Intrinsic<
+ [CD], // %D
+ [
+ llvm_i1_ty, // %A_sign
+ A, // %A
+ llvm_i1_ty, // %B_sign
+ B, // %B
+ LLVMMatchType<0>, // %C
+ Index, // %Sparsity index for A
+ llvm_i1_ty, // %clamp
+ ],
+ [IntrNoMem, IntrConvergent, IntrWillReturn, ImmArg<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<6>>]
+>;
+
+def int_amdgcn_swmmac_f32_16x16x32_f16 : AMDGPUSWmmacIntrinsicIdx<llvm_anyfloat_ty, llvm_anyfloat_ty, llvm_anyfloat_ty, llvm_anyint_ty>;
+def int_amdgcn_swmmac_f32_16x16x32_bf16 : AMDGPUSWmmacIntrinsicIdx<llvm_anyint_ty, llvm_anyint_ty, llvm_anyfloat_ty, llvm_anyint_ty>;
+def int_amdgcn_swmmac_f16_16x16x32_f16 : AMDGPUSWmmacIntrinsicIdx<llvm_anyfloat_ty, llvm_anyfloat_ty, llvm_anyfloat_ty, llvm_anyint_ty>;
+def int_amdgcn_swmmac_bf16_16x16x32_bf16 : AMDGPUSWmmacIntrinsicIdx<llvm_anyint_ty, llvm_anyint_ty, llvm_anyint_ty, llvm_anyint_ty>;
+def int_amdgcn_swmmac_i32_16x16x32_iu8 : AMDGPUSWmmacIntrinsicIUIdx<llvm_anyint_ty, llvm_anyint_ty, llvm_anyint_ty, llvm_anyint_ty>;
+def int_amdgcn_swmmac_i32_16x16x32_iu4 : AMDGPUSWmmacIntrinsicIUIdx<llvm_anyint_ty, llvm_anyint_ty, llvm_anyint_ty, llvm_anyint_ty>;
+def int_amdgcn_swmmac_i32_16x16x64_iu4 : AMDGPUSWmmacIntrinsicIUIdx<llvm_anyint_ty, llvm_anyint_ty, llvm_anyint_ty, llvm_anyint_ty>;
+def int_amdgcn_swmmac_f32_16x16x32_fp8_fp8 : AMDGPUSWmmacIntrinsicIdx<llvm_anyint_ty, llvm_anyint_ty, llvm_anyfloat_ty, llvm_anyint_ty>;
+def int_amdgcn_swmmac_f32_16x16x32_fp8_bf8 : AMDGPUSWmmacIntrinsicIdx<llvm_anyint_ty, llvm_anyint_ty, llvm_anyfloat_ty, llvm_anyint_ty>;
+def int_amdgcn_swmmac_f32_16x16x32_bf8_fp8 : AMDGPUSWmmacIntrinsicIdx<llvm_anyint_ty, llvm_anyint_ty, llvm_anyfloat_ty, llvm_anyint_ty>;
+def int_amdgcn_swmmac_f32_16x16x32_bf8_bf8 : AMDGPUSWmmacIntrinsicIdx<llvm_anyint_ty, llvm_anyint_ty, llvm_anyfloat_ty, llvm_anyint_ty>;
+
def int_amdgcn_global_atomic_ordered_add_b64 : AMDGPUAtomicRtn<llvm_i64_ty, global_ptr_ty>;
def int_amdgcn_flat_atomic_fmin_num : AMDGPUAtomicRtn<llvm_anyfloat_ty>;
@@ -2712,6 +2777,10 @@ class AMDGPULoadTr<LLVMType ptr_ty>:
def int_amdgcn_global_load_tr : AMDGPULoadTr<global_ptr_ty>;
+// i32 @llvm.amdgcn.wave.id()
+def int_amdgcn_wave_id :
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>;
+
//===----------------------------------------------------------------------===//
// Deep learning intrinsics.
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm-project/llvm/include/llvm/Support/X86FoldTablesUtils.h b/contrib/llvm-project/llvm/include/llvm/Support/X86FoldTablesUtils.h
index ed244febc38d..77d32cc7fb37 100644
--- a/contrib/llvm-project/llvm/include/llvm/Support/X86FoldTablesUtils.h
+++ b/contrib/llvm-project/llvm/include/llvm/Support/X86FoldTablesUtils.h
@@ -46,11 +46,12 @@ enum {
// Broadcast type.
// (stored in bits 12 - 14)
TB_BCAST_TYPE_SHIFT = TB_ALIGN_SHIFT + 3,
- TB_BCAST_D = 0 << TB_BCAST_TYPE_SHIFT,
- TB_BCAST_Q = 1 << TB_BCAST_TYPE_SHIFT,
- TB_BCAST_SS = 2 << TB_BCAST_TYPE_SHIFT,
- TB_BCAST_SD = 3 << TB_BCAST_TYPE_SHIFT,
- TB_BCAST_SH = 4 << TB_BCAST_TYPE_SHIFT,
+ TB_BCAST_W = 0 << TB_BCAST_TYPE_SHIFT,
+ TB_BCAST_D = 1 << TB_BCAST_TYPE_SHIFT,
+ TB_BCAST_Q = 2 << TB_BCAST_TYPE_SHIFT,
+ TB_BCAST_SS = 3 << TB_BCAST_TYPE_SHIFT,
+ TB_BCAST_SD = 4 << TB_BCAST_TYPE_SHIFT,
+ TB_BCAST_SH = 5 << TB_BCAST_TYPE_SHIFT,
TB_BCAST_MASK = 0x7 << TB_BCAST_TYPE_SHIFT,
// Unused bits 15-16
diff --git a/contrib/llvm-project/llvm/include/llvm/Target/TargetInstrPredicate.td b/contrib/llvm-project/llvm/include/llvm/Target/TargetInstrPredicate.td
index 82c4c7b23a49..b5419cb9f386 100644
--- a/contrib/llvm-project/llvm/include/llvm/Target/TargetInstrPredicate.td
+++ b/contrib/llvm-project/llvm/include/llvm/Target/TargetInstrPredicate.td
@@ -152,6 +152,34 @@ class CheckImmOperand_s<int Index, string Value> : CheckOperandBase<Index> {
string ImmVal = Value;
}
+// Check that the operand at position `Index` is less than `Imm`.
+// If field `FunctionMapper` is a non-empty string, then function
+// `FunctionMapper` is applied to the operand value, and the return value is then
+// compared against `Imm`.
+class CheckImmOperandLT<int Index, int Imm> : CheckOperandBase<Index> {
+ int ImmVal = Imm;
+}
+
+// Check that the operand at position `Index` is greater than `Imm`.
+// If field `FunctionMapper` is a non-empty string, then function
+// `FunctionMapper` is applied to the operand value, and the return value is then
+// compared against `Imm`.
+class CheckImmOperandGT<int Index, int Imm> : CheckOperandBase<Index> {
+ int ImmVal = Imm;
+}
+
+// Check that the operand at position `Index` is less than or equal to `Imm`.
+// If field `FunctionMapper` is a non-empty string, then function
+// `FunctionMapper` is applied to the operand value, and the return value is then
+// compared against `Imm`.
+class CheckImmOperandLE<int Index, int Imm> : CheckNot<CheckImmOperandGT<Index, Imm>>;
+
+// Check that the operand at position `Index` is greater than or equal to `Imm`.
+// If field `FunctionMapper` is a non-empty string, then function
+// `FunctionMapper` is applied to the operand value, and the return value is then
+// compared against `Imm`.
+class CheckImmOperandGE<int Index, int Imm> : CheckNot<CheckImmOperandLT<Index, Imm>>;
+
// Expands to a call to `FunctionMapper` if field `FunctionMapper` is set.
// Otherwise, it expands to a CheckNot<CheckInvalidRegOperand<Index>>.
class CheckRegOperandSimple<int Index> : CheckOperandBase<Index>;
@@ -203,6 +231,12 @@ class CheckAll<list<MCInstPredicate> Sequence>
class CheckAny<list<MCInstPredicate> Sequence>
: CheckPredicateSequence<Sequence>;
+// Check that the operand at position `Index` is in range [Start, End].
+// If field `FunctionMapper` is a non-empty string, then function
+// `FunctionMapper` is applied to the operand value, and the return value is then
+// compared against range [Start, End].
+class CheckImmOperandRange<int Index, int Start, int End>
+ : CheckAll<[CheckImmOperandGE<Index, Start>, CheckImmOperandLE<Index, End>]>;
// Used to expand the body of a function predicate. See the definition of
// TIIPredicate below.
diff --git a/contrib/llvm-project/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/contrib/llvm-project/llvm/include/llvm/TargetParser/AArch64TargetParser.h
index 623fdc21ba65..6d82748d8004 100644
--- a/contrib/llvm-project/llvm/include/llvm/TargetParser/AArch64TargetParser.h
+++ b/contrib/llvm-project/llvm/include/llvm/TargetParser/AArch64TargetParser.h
@@ -813,7 +813,8 @@ struct CpuAlias {
StringRef Name;
};
-inline constexpr CpuAlias CpuAliases[] = {{"grace", "neoverse-v2"}};
+inline constexpr CpuAlias CpuAliases[] = {{"cobalt-100", "neoverse-n2"},
+ {"grace", "neoverse-v2"}};
bool getExtensionFeatures(
const AArch64::ExtensionBitset &Extensions,
diff --git a/contrib/llvm-project/llvm/include/llvm/TargetParser/Triple.h b/contrib/llvm-project/llvm/include/llvm/TargetParser/Triple.h
index 870dc75b1c1f..49ec8de9c528 100644
--- a/contrib/llvm-project/llvm/include/llvm/TargetParser/Triple.h
+++ b/contrib/llvm-project/llvm/include/llvm/TargetParser/Triple.h
@@ -1033,11 +1033,11 @@ public:
isWindowsCygwinEnvironment() || isOHOSFamily();
}
- /// Tests whether the target uses TLS Descriptor by default.
+ /// True if the target supports both general-dynamic and TLSDESC, and TLSDESC
+ /// is enabled by default.
bool hasDefaultTLSDESC() const {
// TODO: Improve check for other platforms, like Android, and RISC-V
- // Note: This is currently only used on RISC-V.
- return isOSBinFormatELF() && isAArch64();
+ return false;
}
/// Tests whether the target uses -data-sections as default.