aboutsummaryrefslogtreecommitdiff
path: root/llvm
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2024-01-11 18:24:21 +0000
committerDimitry Andric <dim@FreeBSD.org>2024-01-11 18:24:21 +0000
commit950076cd18f3fa9d789b4add9d405898efff09a5 (patch)
tree2454649366290c6292cc2d94dde042f71bc1e144 /llvm
parentaca2e42c67292825f835f094eb0c4df5ce6013db (diff)
Diffstat (limited to 'llvm')
-rw-r--r--llvm/include/llvm/ADT/STLExtras.h29
-rw-r--r--llvm/include/llvm/ADT/StringRef.h4
-rw-r--r--llvm/include/llvm/Analysis/VecFuncs.def190
-rw-r--r--llvm/include/llvm/BinaryFormat/ELFRelocs/AArch64.def1
-rw-r--r--llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def3
-rw-r--r--llvm/include/llvm/CodeGen/AssignmentTrackingAnalysis.h29
-rw-r--r--llvm/include/llvm/CodeGen/CodeGenPassBuilder.h1
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h3
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h18
-rw-r--r--llvm/include/llvm/CodeGen/GlobalISel/Localizer.h7
-rw-r--r--llvm/include/llvm/CodeGen/SelectionDAGISel.h16
-rw-r--r--llvm/include/llvm/CodeGen/SelectionDAGNodes.h7
-rw-r--r--llvm/include/llvm/DWARFLinker/Utils.h (renamed from llvm/lib/DWARFLinker/Parallel/Utils.h)36
-rw-r--r--llvm/include/llvm/Frontend/OpenACC/ACC.td66
-rw-r--r--llvm/include/llvm/IR/IntrinsicsAArch64.td6
-rw-r--r--llvm/include/llvm/IR/IntrinsicsNVVM.td10
-rw-r--r--llvm/include/llvm/IR/ModuleSummaryIndex.h6
-rw-r--r--llvm/include/llvm/IR/PatternMatch.h36
-rw-r--r--llvm/include/llvm/ProfileData/InstrProf.h3
-rw-r--r--llvm/include/llvm/ProfileData/InstrProfData.inc2
-rw-r--r--llvm/include/llvm/Support/RISCVISAInfo.h22
-rw-r--r--llvm/include/llvm/Support/TargetOpcodes.def3
-rw-r--r--llvm/include/llvm/Target/GenericOpcodes.td21
-rw-r--r--llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td3
-rw-r--r--llvm/include/llvm/Target/TargetPfmCounters.td21
-rw-r--r--llvm/include/llvm/TargetParser/ARMTargetParserCommon.h1
-rw-r--r--llvm/lib/Analysis/LazyValueInfo.cpp8
-rw-r--r--llvm/lib/Bitcode/Writer/BitcodeWriter.cpp17
-rw-r--r--llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp34
-rw-r--r--llvm/lib/CodeGen/BranchFolding.cpp17
-rw-r--r--llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp84
-rw-r--r--llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp12
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp10
-rw-r--r--llvm/lib/CodeGen/GlobalISel/Localizer.cpp55
-rw-r--r--llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp9
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp74
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp2
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp12
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp48
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp5
-rw-r--r--llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp11
-rw-r--r--llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp15
-rw-r--r--llvm/lib/DWARFLinker/Parallel/AcceleratorRecordsSaver.cpp2
-rw-r--r--llvm/lib/DWARFLinker/Parallel/DWARFLinkerCompileUnit.cpp23
-rw-r--r--llvm/lib/DWARFLinker/Parallel/DWARFLinkerImpl.cpp2
-rw-r--r--llvm/lib/DWARFLinker/Parallel/DebugLineSectionEmitter.h38
-rw-r--r--llvm/lib/DWARFLinker/Utils.cpp2
-rw-r--r--llvm/lib/IR/Verifier.cpp11
-rw-r--r--llvm/lib/MC/MCParser/ELFAsmParser.cpp6
-rw-r--r--llvm/lib/MC/MCSectionELF.cpp18
-rw-r--r--llvm/lib/Passes/PassBuilder.cpp1
-rw-r--r--llvm/lib/Passes/PassRegistry.def4
-rw-r--r--llvm/lib/Support/RISCVISAInfo.cpp301
-rw-r--r--llvm/lib/TableGen/Record.cpp15
-rw-r--r--llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp5
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp4
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp3
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.cpp3
-rw-r--r--llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp64
-rw-r--r--llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td18
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp98
-rw-r--r--llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp3
-rw-r--r--llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp8
-rw-r--r--llvm/lib/Target/AArch64/SMEInstrFormats.td2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCombine.td2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp12
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp52
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp46
-rw-r--r--llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp43
-rw-r--r--llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp17
-rw-r--r--llvm/lib/Target/AMDGPU/GCNSubtarget.h2
-rw-r--r--llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp1
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp8
-rw-r--r--llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp5
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp33
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.td33
-rw-r--r--llvm/lib/Target/AMDGPU/VOP1Instructions.td55
-rw-r--r--llvm/lib/Target/AMDGPU/VOP2Instructions.td14
-rw-r--r--llvm/lib/Target/AMDGPU/VOPInstructions.td13
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp9
-rw-r--r--llvm/lib/Target/MSP430/MSP430ISelLowering.cpp13
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp6
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXInstrInfo.td3
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXIntrinsics.td13
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp2
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.td2
-rw-r--r--llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp77
-rw-r--r--llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp6
-rw-r--r--llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp40
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp2
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp7
-rw-r--r--llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp12
-rw-r--r--llvm/lib/Target/RISCV/RISCVFeatures.td1
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp46
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.h6
-rw-r--r--llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp37
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.cpp17
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoA.td140
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoD.td16
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td33
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td38
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZa.td98
-rw-r--r--llvm/lib/Target/RISCV/RISCVRegisterInfo.td45
-rw-r--r--llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp2
-rw-r--r--llvm/lib/Target/SystemZ/SystemZISelLowering.cpp7
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp4
-rw-r--r--llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h16
-rw-r--r--llvm/lib/Target/X86/X86FlagsCopyLowering.cpp1
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp53
-rw-r--r--llvm/lib/Target/X86/X86InstrArithmetic.td106
-rw-r--r--llvm/lib/Target/X86/X86InstrCompiler.td364
-rw-r--r--llvm/lib/Target/X86/X86InstrMisc.td12
-rw-r--r--llvm/lib/Target/X86/X86PfmCounters.td18
-rw-r--r--llvm/lib/TargetParser/ARMTargetParserCommon.cpp7
-rw-r--r--llvm/lib/TextAPI/InterfaceFile.cpp8
-rw-r--r--llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp521
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp24
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp114
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp4
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp4
-rw-r--r--llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp2
-rw-r--r--llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp12
-rw-r--r--llvm/lib/Transforms/Scalar/LoopFlatten.cpp79
-rw-r--r--llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp23
-rw-r--r--llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp58
-rw-r--r--llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp55
-rw-r--r--llvm/lib/Transforms/Utils/Local.cpp3
-rw-r--r--llvm/lib/Transforms/Utils/SimplifyCFG.cpp13
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp6
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp69
-rw-r--r--llvm/utils/TableGen/CodeGenDAGPatterns.cpp2
-rw-r--r--llvm/utils/TableGen/DAGISelMatcherEmitter.cpp81
-rw-r--r--llvm/utils/TableGen/ExegesisEmitter.cpp52
-rw-r--r--llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp2
135 files changed, 2899 insertions, 1418 deletions
diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h
index 18bc4d108b15..a136eeb0ff1b 100644
--- a/llvm/include/llvm/ADT/STLExtras.h
+++ b/llvm/include/llvm/ADT/STLExtras.h
@@ -1290,18 +1290,6 @@ public:
return (*this)[size() - 1];
}
- /// Compare this range with another.
- template <typename OtherT>
- friend bool operator==(const indexed_accessor_range_base &lhs,
- const OtherT &rhs) {
- return std::equal(lhs.begin(), lhs.end(), rhs.begin(), rhs.end());
- }
- template <typename OtherT>
- friend bool operator!=(const indexed_accessor_range_base &lhs,
- const OtherT &rhs) {
- return !(lhs == rhs);
- }
-
/// Return the size of this range.
size_t size() const { return count; }
@@ -1364,6 +1352,23 @@ protected:
/// The size from the owning range.
ptrdiff_t count;
};
+/// Compare this range with another.
+/// FIXME: Make me a member function instead of friend when it works in C++20.
+template <typename OtherT, typename DerivedT, typename BaseT, typename T,
+ typename PointerT, typename ReferenceT>
+bool operator==(const indexed_accessor_range_base<DerivedT, BaseT, T, PointerT,
+ ReferenceT> &lhs,
+ const OtherT &rhs) {
+ return std::equal(lhs.begin(), lhs.end(), rhs.begin(), rhs.end());
+}
+
+template <typename OtherT, typename DerivedT, typename BaseT, typename T,
+ typename PointerT, typename ReferenceT>
+bool operator!=(const indexed_accessor_range_base<DerivedT, BaseT, T, PointerT,
+ ReferenceT> &lhs,
+ const OtherT &rhs) {
+ return !(lhs == rhs);
+}
} // end namespace detail
/// This class provides an implementation of a range of
diff --git a/llvm/include/llvm/ADT/StringRef.h b/llvm/include/llvm/ADT/StringRef.h
index d892333de391..1c6c96678b5d 100644
--- a/llvm/include/llvm/ADT/StringRef.h
+++ b/llvm/include/llvm/ADT/StringRef.h
@@ -128,7 +128,7 @@ namespace llvm {
/// data - Get a pointer to the start of the string (which may not be null
/// terminated).
- [[nodiscard]] const char *data() const { return Data; }
+ [[nodiscard]] constexpr const char *data() const { return Data; }
/// empty - Check if the string is empty.
[[nodiscard]] constexpr bool empty() const { return Length == 0; }
@@ -245,7 +245,7 @@ namespace llvm {
/// @name Type Conversions
/// @{
- operator std::string_view() const {
+ constexpr operator std::string_view() const {
return std::string_view(data(), size());
}
diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def
index ee9207bb4f7d..b22bdd555cd4 100644
--- a/llvm/include/llvm/Analysis/VecFuncs.def
+++ b/llvm/include/llvm/Analysis/VecFuncs.def
@@ -470,123 +470,125 @@ TLI_DEFINE_VECFUNC("__exp2f_finite", "__svml_exp2f16", FIXED(16), "_ZGV_LLVM_N16
#elif defined(TLI_DEFINE_SLEEFGNUABI_VF2_VECFUNCS)
-TLI_DEFINE_VECFUNC( "acos", "_ZGVnN2v_acos", FIXED(2), "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("acos", "_ZGVnN2v_acos", FIXED(2), "_ZGV_LLVM_N2v")
-TLI_DEFINE_VECFUNC( "asin", "_ZGVnN2v_asin", FIXED(2), "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("asin", "_ZGVnN2v_asin", FIXED(2), "_ZGV_LLVM_N2v")
-TLI_DEFINE_VECFUNC( "atan", "_ZGVnN2v_atan", FIXED(2), "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("atan", "_ZGVnN2v_atan", FIXED(2), "_ZGV_LLVM_N2v")
-TLI_DEFINE_VECFUNC( "atan2", "_ZGVnN2vv_atan2", FIXED(2), "_ZGV_LLVM_N2vv")
+TLI_DEFINE_VECFUNC("atan2", "_ZGVnN2vv_atan2", FIXED(2), "_ZGV_LLVM_N2vv")
-TLI_DEFINE_VECFUNC( "atanh", "_ZGVnN2v_atanh", FIXED(2), "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("atanh", "_ZGVnN2v_atanh", FIXED(2), "_ZGV_LLVM_N2v")
-TLI_DEFINE_VECFUNC( "cos", "_ZGVnN2v_cos", FIXED(2), "_ZGV_LLVM_N2v")
-TLI_DEFINE_VECFUNC( "llvm.cos.f64", "_ZGVnN2v_cos", FIXED(2), "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("cos", "_ZGVnN2v_cos", FIXED(2), "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGVnN2v_cos", FIXED(2), "_ZGV_LLVM_N2v")
-TLI_DEFINE_VECFUNC( "cosh", "_ZGVnN2v_cosh", FIXED(2), "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("cosh", "_ZGVnN2v_cosh", FIXED(2), "_ZGV_LLVM_N2v")
-TLI_DEFINE_VECFUNC( "exp", "_ZGVnN2v_exp", FIXED(2), "_ZGV_LLVM_N2v")
-TLI_DEFINE_VECFUNC( "llvm.exp.f64", "_ZGVnN2v_exp", FIXED(2), "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("exp", "_ZGVnN2v_exp", FIXED(2), "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGVnN2v_exp", FIXED(2), "_ZGV_LLVM_N2v")
-TLI_DEFINE_VECFUNC( "exp2", "_ZGVnN2v_exp2", FIXED(2), "_ZGV_LLVM_N2v")
-TLI_DEFINE_VECFUNC( "llvm.exp2.f64", "_ZGVnN2v_exp2", FIXED(2), "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("exp10", "_ZGVnN2v_exp10", FIXED(2), "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("llvm.exp10.f64", "_ZGVnN2v_exp10", FIXED(2), "_ZGV_LLVM_N2v")
+
+TLI_DEFINE_VECFUNC("exp2", "_ZGVnN2v_exp2", FIXED(2), "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("llvm.exp2.f64", "_ZGVnN2v_exp2", FIXED(2), "_ZGV_LLVM_N2v")
-TLI_DEFINE_VECFUNC( "exp10", "_ZGVnN2v_exp10", FIXED(2), "_ZGV_LLVM_N2v")
-TLI_DEFINE_VECFUNC( "llvm.exp10.f64", "_ZGVnN2v_exp10", FIXED(2), "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("fmod", "_ZGVnN2vv_fmod", FIXED(2), "_ZGV_LLVM_N2vv")
-TLI_DEFINE_VECFUNC( "lgamma", "_ZGVnN2v_lgamma", FIXED(2), "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("lgamma", "_ZGVnN2v_lgamma", FIXED(2), "_ZGV_LLVM_N2v")
-TLI_DEFINE_VECFUNC( "log", "_ZGVnN2v_log", FIXED(2), "_ZGV_LLVM_N2v")
-TLI_DEFINE_VECFUNC( "llvm.log.f64", "_ZGVnN2v_log", FIXED(2), "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("log", "_ZGVnN2v_log", FIXED(2), "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGVnN2v_log", FIXED(2), "_ZGV_LLVM_N2v")
-TLI_DEFINE_VECFUNC( "log2", "_ZGVnN2v_log2", FIXED(2), "_ZGV_LLVM_N2v")
-TLI_DEFINE_VECFUNC( "llvm.log2.f64", "_ZGVnN2v_log2", FIXED(2), "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("log10", "_ZGVnN2v_log10", FIXED(2), "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("llvm.log10.f64", "_ZGVnN2v_log10", FIXED(2), "_ZGV_LLVM_N2v")
-TLI_DEFINE_VECFUNC( "log10", "_ZGVnN2v_log10", FIXED(2), "_ZGV_LLVM_N2v")
-TLI_DEFINE_VECFUNC( "llvm.log10.f64", "_ZGVnN2v_log10", FIXED(2), "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("log2", "_ZGVnN2v_log2", FIXED(2), "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("llvm.log2.f64", "_ZGVnN2v_log2", FIXED(2), "_ZGV_LLVM_N2v")
-TLI_DEFINE_VECFUNC( "modf", "_ZGVnN2vl8_modf", FIXED(2), "_ZGV_LLVM_N2vl8")
+TLI_DEFINE_VECFUNC("modf", "_ZGVnN2vl8_modf", FIXED(2), "_ZGV_LLVM_N2vl8")
-TLI_DEFINE_VECFUNC( "pow", "_ZGVnN2vv_pow", FIXED(2), "_ZGV_LLVM_N2vv")
-TLI_DEFINE_VECFUNC( "llvm.pow.f64", "_ZGVnN2vv_pow", FIXED(2), "_ZGV_LLVM_N2vv")
+TLI_DEFINE_VECFUNC("pow", "_ZGVnN2vv_pow", FIXED(2), "_ZGV_LLVM_N2vv")
+TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGVnN2vv_pow", FIXED(2), "_ZGV_LLVM_N2vv")
-TLI_DEFINE_VECFUNC( "sin", "_ZGVnN2v_sin", FIXED(2), "_ZGV_LLVM_N2v")
-TLI_DEFINE_VECFUNC( "llvm.sin.f64", "_ZGVnN2v_sin", FIXED(2), "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("sin", "_ZGVnN2v_sin", FIXED(2), "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGVnN2v_sin", FIXED(2), "_ZGV_LLVM_N2v")
-TLI_DEFINE_VECFUNC( "sincos", "_ZGVnN2vl8l8_sincos", FIXED(2), "_ZGV_LLVM_N2vl8l8")
+TLI_DEFINE_VECFUNC("sincos", "_ZGVnN2vl8l8_sincos", FIXED(2), "_ZGV_LLVM_N2vl8l8")
-TLI_DEFINE_VECFUNC( "sincospi", "_ZGVnN2vl8l8_sincospi", FIXED(2), "_ZGV_LLVM_N2vl8l8")
+TLI_DEFINE_VECFUNC("sincospi", "_ZGVnN2vl8l8_sincospi", FIXED(2), "_ZGV_LLVM_N2vl8l8")
-TLI_DEFINE_VECFUNC( "sinh", "_ZGVnN2v_sinh", FIXED(2), "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("sinh", "_ZGVnN2v_sinh", FIXED(2), "_ZGV_LLVM_N2v")
-TLI_DEFINE_VECFUNC( "sqrt", "_ZGVnN2v_sqrt", FIXED(2), "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("sqrt", "_ZGVnN2v_sqrt", FIXED(2), "_ZGV_LLVM_N2v")
-TLI_DEFINE_VECFUNC( "tan", "_ZGVnN2v_tan", FIXED(2), "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("tan", "_ZGVnN2v_tan", FIXED(2), "_ZGV_LLVM_N2v")
-TLI_DEFINE_VECFUNC( "tanh", "_ZGVnN2v_tanh", FIXED(2), "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("tanh", "_ZGVnN2v_tanh", FIXED(2), "_ZGV_LLVM_N2v")
-TLI_DEFINE_VECFUNC( "tgamma", "_ZGVnN2v_tgamma", FIXED(2), "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("tgamma", "_ZGVnN2v_tgamma", FIXED(2), "_ZGV_LLVM_N2v")
#elif defined(TLI_DEFINE_SLEEFGNUABI_VF4_VECFUNCS)
-TLI_DEFINE_VECFUNC( "acosf", "_ZGVnN4v_acosf", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("acosf", "_ZGVnN4v_acosf", FIXED(4), "_ZGV_LLVM_N4v")
+
+TLI_DEFINE_VECFUNC("asinf", "_ZGVnN4v_asinf", FIXED(4), "_ZGV_LLVM_N4v")
-TLI_DEFINE_VECFUNC( "asinf", "_ZGVnN4v_asinf", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("atanf", "_ZGVnN4v_atanf", FIXED(4), "_ZGV_LLVM_N4v")
-TLI_DEFINE_VECFUNC( "atanf", "_ZGVnN4v_atanf", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("atan2f", "_ZGVnN4vv_atan2f", FIXED(4), "_ZGV_LLVM_N4vv")
-TLI_DEFINE_VECFUNC( "atan2f", "_ZGVnN4vv_atan2f", FIXED(4), "_ZGV_LLVM_N4vv")
+TLI_DEFINE_VECFUNC("atanhf", "_ZGVnN4v_atanhf", FIXED(4), "_ZGV_LLVM_N4v")
-TLI_DEFINE_VECFUNC( "atanhf", "_ZGVnN4v_atanhf", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("cosf", "_ZGVnN4v_cosf", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("llvm.cos.f32", "_ZGVnN4v_cosf", FIXED(4), "_ZGV_LLVM_N4v")
-TLI_DEFINE_VECFUNC( "cosf", "_ZGVnN4v_cosf", FIXED(4), "_ZGV_LLVM_N4v")
-TLI_DEFINE_VECFUNC( "llvm.cos.f32", "_ZGVnN4v_cosf", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("coshf", "_ZGVnN4v_coshf", FIXED(4), "_ZGV_LLVM_N4v")
-TLI_DEFINE_VECFUNC( "coshf", "_ZGVnN4v_coshf", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("expf", "_ZGVnN4v_expf", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGVnN4v_expf", FIXED(4), "_ZGV_LLVM_N4v")
-TLI_DEFINE_VECFUNC( "expf", "_ZGVnN4v_expf", FIXED(4), "_ZGV_LLVM_N4v")
-TLI_DEFINE_VECFUNC( "llvm.exp.f32", "_ZGVnN4v_expf", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("exp10f", "_ZGVnN4v_exp10f", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("llvm.exp10.f32", "_ZGVnN4v_exp10f", FIXED(4), "_ZGV_LLVM_N4v")
-TLI_DEFINE_VECFUNC( "exp2f", "_ZGVnN4v_exp2f", FIXED(4), "_ZGV_LLVM_N4v")
-TLI_DEFINE_VECFUNC( "llvm.exp2.f32", "_ZGVnN4v_exp2f", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("exp2f", "_ZGVnN4v_exp2f", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("llvm.exp2.f32", "_ZGVnN4v_exp2f", FIXED(4), "_ZGV_LLVM_N4v")
-TLI_DEFINE_VECFUNC( "exp10f", "_ZGVnN4v_exp10f", FIXED(4), "_ZGV_LLVM_N4v")
-TLI_DEFINE_VECFUNC( "llvm.exp10.f32", "_ZGVnN4v_exp10f", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("fmodf", "_ZGVnN4vv_fmodf", FIXED(4), "_ZGV_LLVM_N4vv")
-TLI_DEFINE_VECFUNC( "lgammaf", "_ZGVnN4v_lgammaf", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("lgammaf", "_ZGVnN4v_lgammaf", FIXED(4), "_ZGV_LLVM_N4v")
-TLI_DEFINE_VECFUNC( "logf", "_ZGVnN4v_logf", FIXED(4), "_ZGV_LLVM_N4v")
-TLI_DEFINE_VECFUNC( "llvm.log.f32", "_ZGVnN4v_logf", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("logf", "_ZGVnN4v_logf", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVnN4v_logf", FIXED(4), "_ZGV_LLVM_N4v")
-TLI_DEFINE_VECFUNC( "log2f", "_ZGVnN4v_log2f", FIXED(4), "_ZGV_LLVM_N4v")
-TLI_DEFINE_VECFUNC( "llvm.log2.f32", "_ZGVnN4v_log2f", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("log10f", "_ZGVnN4v_log10f", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("llvm.log10.f32", "_ZGVnN4v_log10f", FIXED(4), "_ZGV_LLVM_N4v")
-TLI_DEFINE_VECFUNC( "log10f", "_ZGVnN4v_log10f", FIXED(4), "_ZGV_LLVM_N4v")
-TLI_DEFINE_VECFUNC( "llvm.log10.f32", "_ZGVnN4v_log10f", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("log2f", "_ZGVnN4v_log2f", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("llvm.log2.f32", "_ZGVnN4v_log2f", FIXED(4), "_ZGV_LLVM_N4v")
-TLI_DEFINE_VECFUNC( "modff", "_ZGVnN4vl4_modff", FIXED(4), "_ZGV_LLVM_N4vl4")
+TLI_DEFINE_VECFUNC("modff", "_ZGVnN4vl4_modff", FIXED(4), "_ZGV_LLVM_N4vl4")
-TLI_DEFINE_VECFUNC( "powf", "_ZGVnN4vv_powf", FIXED(4), "_ZGV_LLVM_N4vv")
-TLI_DEFINE_VECFUNC( "llvm.pow.f32", "_ZGVnN4vv_powf", FIXED(4), "_ZGV_LLVM_N4vv")
+TLI_DEFINE_VECFUNC("powf", "_ZGVnN4vv_powf", FIXED(4), "_ZGV_LLVM_N4vv")
+TLI_DEFINE_VECFUNC("llvm.pow.f32", "_ZGVnN4vv_powf", FIXED(4), "_ZGV_LLVM_N4vv")
-TLI_DEFINE_VECFUNC( "sinf", "_ZGVnN4v_sinf", FIXED(4), "_ZGV_LLVM_N4v")
-TLI_DEFINE_VECFUNC( "llvm.sin.f32", "_ZGVnN4v_sinf", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("sinf", "_ZGVnN4v_sinf", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVnN4v_sinf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("sincosf", "_ZGVnN4vl4l4_sincosf", FIXED(4), "_ZGV_LLVM_N4vl4l4")
TLI_DEFINE_VECFUNC("sincospif", "_ZGVnN4vl4l4_sincospif", FIXED(4), "_ZGV_LLVM_N4vl4l4")
-TLI_DEFINE_VECFUNC( "sinhf", "_ZGVnN4v_sinhf", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("sinhf", "_ZGVnN4v_sinhf", FIXED(4), "_ZGV_LLVM_N4v")
-TLI_DEFINE_VECFUNC( "sqrtf", "_ZGVnN4v_sqrtf", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("sqrtf", "_ZGVnN4v_sqrtf", FIXED(4), "_ZGV_LLVM_N4v")
-TLI_DEFINE_VECFUNC( "tanf", "_ZGVnN4v_tanf", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("tanf", "_ZGVnN4v_tanf", FIXED(4), "_ZGV_LLVM_N4v")
-TLI_DEFINE_VECFUNC( "tanhf", "_ZGVnN4v_tanhf", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("tanhf", "_ZGVnN4v_tanhf", FIXED(4), "_ZGV_LLVM_N4v")
-TLI_DEFINE_VECFUNC( "tgammaf", "_ZGVnN4v_tgammaf", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("tgammaf", "_ZGVnN4v_tgammaf", FIXED(4), "_ZGV_LLVM_N4v")
#elif defined(TLI_DEFINE_SLEEFGNUABI_SCALABLE_VECFUNCS)
@@ -618,16 +620,16 @@ TLI_DEFINE_VECFUNC("expf", "_ZGVsMxv_expf", SCALABLE(4), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGVsMxv_exp", SCALABLE(2), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGVsMxv_expf", SCALABLE(4), MASKED, "_ZGVsMxv")
-TLI_DEFINE_VECFUNC("exp2", "_ZGVsMxv_exp2", SCALABLE(2), MASKED, "_ZGVsMxv")
-TLI_DEFINE_VECFUNC("exp2f", "_ZGVsMxv_exp2f", SCALABLE(4), MASKED, "_ZGVsMxv")
-TLI_DEFINE_VECFUNC("llvm.exp2.f64", "_ZGVsMxv_exp2", SCALABLE(2), MASKED, "_ZGVsMxv")
-TLI_DEFINE_VECFUNC("llvm.exp2.f32", "_ZGVsMxv_exp2f", SCALABLE(4), MASKED, "_ZGVsMxv")
-
TLI_DEFINE_VECFUNC("exp10", "_ZGVsMxv_exp10", SCALABLE(2), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("exp10f", "_ZGVsMxv_exp10f", SCALABLE(4), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("llvm.exp10.f64", "_ZGVsMxv_exp10", SCALABLE(2), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("llvm.exp10.f32", "_ZGVsMxv_exp10f", SCALABLE(4), MASKED, "_ZGVsMxv")
+TLI_DEFINE_VECFUNC("exp2", "_ZGVsMxv_exp2", SCALABLE(2), MASKED, "_ZGVsMxv")
+TLI_DEFINE_VECFUNC("exp2f", "_ZGVsMxv_exp2f", SCALABLE(4), MASKED, "_ZGVsMxv")
+TLI_DEFINE_VECFUNC("llvm.exp2.f64", "_ZGVsMxv_exp2", SCALABLE(2), MASKED, "_ZGVsMxv")
+TLI_DEFINE_VECFUNC("llvm.exp2.f32", "_ZGVsMxv_exp2f", SCALABLE(4), MASKED, "_ZGVsMxv")
+
TLI_DEFINE_VECFUNC("fmod", "_ZGVsMxvv_fmod", SCALABLE(2), MASKED, "_ZGVsMxvv")
TLI_DEFINE_VECFUNC("fmodf", "_ZGVsMxvv_fmodf", SCALABLE(4), MASKED, "_ZGVsMxvv")
@@ -639,16 +641,16 @@ TLI_DEFINE_VECFUNC("logf", "_ZGVsMxv_logf", SCALABLE(4), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGVsMxv_log", SCALABLE(2), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVsMxv_logf", SCALABLE(4), MASKED, "_ZGVsMxv")
-TLI_DEFINE_VECFUNC( "log2", "_ZGVsMxv_log2", SCALABLE(2), MASKED, "_ZGVsMxv")
-TLI_DEFINE_VECFUNC( "log2f", "_ZGVsMxv_log2f", SCALABLE(4), MASKED, "_ZGVsMxv")
-TLI_DEFINE_VECFUNC( "llvm.log2.f64", "_ZGVsMxv_log2", SCALABLE(2), MASKED, "_ZGVsMxv")
-TLI_DEFINE_VECFUNC( "llvm.log2.f32", "_ZGVsMxv_log2f", SCALABLE(4), MASKED, "_ZGVsMxv")
-
TLI_DEFINE_VECFUNC("log10", "_ZGVsMxv_log10", SCALABLE(2), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("log10f", "_ZGVsMxv_log10f", SCALABLE(4), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("llvm.log10.f64", "_ZGVsMxv_log10", SCALABLE(2), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("llvm.log10.f32", "_ZGVsMxv_log10f", SCALABLE(4), MASKED, "_ZGVsMxv")
+TLI_DEFINE_VECFUNC("log2", "_ZGVsMxv_log2", SCALABLE(2), MASKED, "_ZGVsMxv")
+TLI_DEFINE_VECFUNC("log2f", "_ZGVsMxv_log2f", SCALABLE(4), MASKED, "_ZGVsMxv")
+TLI_DEFINE_VECFUNC("llvm.log2.f64", "_ZGVsMxv_log2", SCALABLE(2), MASKED, "_ZGVsMxv")
+TLI_DEFINE_VECFUNC("llvm.log2.f32", "_ZGVsMxv_log2f", SCALABLE(4), MASKED, "_ZGVsMxv")
+
TLI_DEFINE_VECFUNC("modf", "_ZGVsMxvl8_modf", SCALABLE(2), MASKED, "_ZGVsMxvl8")
TLI_DEFINE_VECFUNC("modff", "_ZGVsMxvl4_modff", SCALABLE(4), MASKED, "_ZGVsMxvl4")
@@ -765,16 +767,6 @@ TLI_DEFINE_VECFUNC("llvm.exp.f32", "armpl_vexpq_f32", FIXED(4), NOMASK, "_ZGV_LL
TLI_DEFINE_VECFUNC("llvm.exp.f64", "armpl_svexp_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("llvm.exp.f32", "armpl_svexp_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv")
-TLI_DEFINE_VECFUNC("exp2", "armpl_vexp2q_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
-TLI_DEFINE_VECFUNC("exp2f", "armpl_vexp2q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
-TLI_DEFINE_VECFUNC("exp2", "armpl_svexp2_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
-TLI_DEFINE_VECFUNC("exp2f", "armpl_svexp2_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv")
-
-TLI_DEFINE_VECFUNC("llvm.exp2.f64", "armpl_vexp2q_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
-TLI_DEFINE_VECFUNC("llvm.exp2.f32", "armpl_vexp2q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
-TLI_DEFINE_VECFUNC("llvm.exp2.f64", "armpl_svexp2_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
-TLI_DEFINE_VECFUNC("llvm.exp2.f32", "armpl_svexp2_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv")
-
TLI_DEFINE_VECFUNC("exp10", "armpl_vexp10q_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("exp10f", "armpl_vexp10q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("exp10", "armpl_svexp10_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
@@ -785,6 +777,16 @@ TLI_DEFINE_VECFUNC("llvm.exp10.f32", "armpl_vexp10q_f32", FIXED(4), NOMASK, "_ZG
TLI_DEFINE_VECFUNC("llvm.exp10.f64", "armpl_svexp10_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("llvm.exp10.f32", "armpl_svexp10_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv")
+TLI_DEFINE_VECFUNC("exp2", "armpl_vexp2q_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("exp2f", "armpl_vexp2q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("exp2", "armpl_svexp2_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
+TLI_DEFINE_VECFUNC("exp2f", "armpl_svexp2_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv")
+
+TLI_DEFINE_VECFUNC("llvm.exp2.f64", "armpl_vexp2q_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("llvm.exp2.f32", "armpl_vexp2q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("llvm.exp2.f64", "armpl_svexp2_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
+TLI_DEFINE_VECFUNC("llvm.exp2.f32", "armpl_svexp2_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv")
+
TLI_DEFINE_VECFUNC("expm1", "armpl_vexpm1q_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("expm1f", "armpl_vexpm1q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("expm1", "armpl_svexpm1_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
@@ -830,6 +832,16 @@ TLI_DEFINE_VECFUNC("llvm.log.f32", "armpl_vlogq_f32", FIXED(4), NOMASK, "_ZGV_LL
TLI_DEFINE_VECFUNC("llvm.log.f64", "armpl_svlog_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("llvm.log.f32", "armpl_svlog_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv")
+TLI_DEFINE_VECFUNC("log10", "armpl_vlog10q_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("log10f", "armpl_vlog10q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("log10", "armpl_svlog10_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
+TLI_DEFINE_VECFUNC("log10f", "armpl_svlog10_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv")
+
+TLI_DEFINE_VECFUNC("llvm.log10.f64", "armpl_vlog10q_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("llvm.log10.f32", "armpl_vlog10q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("llvm.log10.f64", "armpl_svlog10_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
+TLI_DEFINE_VECFUNC("llvm.log10.f32", "armpl_svlog10_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv")
+
TLI_DEFINE_VECFUNC("log1p", "armpl_vlog1pq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("log1pf", "armpl_vlog1pq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("log1p", "armpl_svlog1p_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
@@ -845,16 +857,6 @@ TLI_DEFINE_VECFUNC("llvm.log2.f32", "armpl_vlog2q_f32", FIXED(4), NOMASK, "_ZGV_
TLI_DEFINE_VECFUNC("llvm.log2.f64", "armpl_svlog2_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("llvm.log2.f32", "armpl_svlog2_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv")
-TLI_DEFINE_VECFUNC("log10", "armpl_vlog10q_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
-TLI_DEFINE_VECFUNC("log10f", "armpl_vlog10q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
-TLI_DEFINE_VECFUNC("log10", "armpl_svlog10_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
-TLI_DEFINE_VECFUNC("log10f", "armpl_svlog10_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv")
-
-TLI_DEFINE_VECFUNC("llvm.log10.f64", "armpl_vlog10q_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
-TLI_DEFINE_VECFUNC("llvm.log10.f32", "armpl_vlog10q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
-TLI_DEFINE_VECFUNC("llvm.log10.f64", "armpl_svlog10_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
-TLI_DEFINE_VECFUNC("llvm.log10.f32", "armpl_svlog10_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv")
-
TLI_DEFINE_VECFUNC("modf", "armpl_vmodfq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vl8")
TLI_DEFINE_VECFUNC("modff", "armpl_vmodfq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vl4")
TLI_DEFINE_VECFUNC("modf", "armpl_svmodf_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvl8")
diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/AArch64.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/AArch64.def
index 30375de420e3..5fb3fa4aeb7b 100644
--- a/llvm/include/llvm/BinaryFormat/ELFRelocs/AArch64.def
+++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/AArch64.def
@@ -59,6 +59,7 @@ ELF_RELOC(R_AARCH64_ADR_GOT_PAGE, 0x137)
ELF_RELOC(R_AARCH64_LD64_GOT_LO12_NC, 0x138)
ELF_RELOC(R_AARCH64_LD64_GOTPAGE_LO15, 0x139)
ELF_RELOC(R_AARCH64_PLT32, 0x13a)
+ELF_RELOC(R_AARCH64_GOTPCREL32, 0x13b)
ELF_RELOC(R_AARCH64_TLSGD_ADR_PREL21, 0x200)
ELF_RELOC(R_AARCH64_TLSGD_ADR_PAGE21, 0x201)
ELF_RELOC(R_AARCH64_TLSGD_ADD_LO12_NC, 0x202)
diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def
index c7fd6490041c..b478799c91fb 100644
--- a/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def
+++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/RISCV.def
@@ -40,8 +40,7 @@ ELF_RELOC(R_RISCV_SUB8, 37)
ELF_RELOC(R_RISCV_SUB16, 38)
ELF_RELOC(R_RISCV_SUB32, 39)
ELF_RELOC(R_RISCV_SUB64, 40)
-ELF_RELOC(R_RISCV_GNU_VTINHERIT, 41)
-ELF_RELOC(R_RISCV_GNU_VTENTRY, 42)
+ELF_RELOC(R_RISCV_GOT32_PCREL, 41)
ELF_RELOC(R_RISCV_ALIGN, 43)
ELF_RELOC(R_RISCV_RVC_BRANCH, 44)
ELF_RELOC(R_RISCV_RVC_JUMP, 45)
diff --git a/llvm/include/llvm/CodeGen/AssignmentTrackingAnalysis.h b/llvm/include/llvm/CodeGen/AssignmentTrackingAnalysis.h
index b740ab567b12..fb0ecd828b68 100644
--- a/llvm/include/llvm/CodeGen/AssignmentTrackingAnalysis.h
+++ b/llvm/include/llvm/CodeGen/AssignmentTrackingAnalysis.h
@@ -1,13 +1,21 @@
+//===-- llvm/CodeGen/AssignmentTrackingAnalysis.h --------------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
#ifndef LLVM_CODEGEN_ASSIGNMENTTRACKINGANALYSIS_H
#define LLVM_CODEGEN_ASSIGNMENTTRACKINGANALYSIS_H
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
namespace llvm {
-class Function;
class Instruction;
class raw_ostream;
} // namespace llvm
@@ -94,6 +102,25 @@ public:
///@}
};
+class DebugAssignmentTrackingAnalysis
+ : public AnalysisInfoMixin<DebugAssignmentTrackingAnalysis> {
+ friend AnalysisInfoMixin<DebugAssignmentTrackingAnalysis>;
+ static AnalysisKey Key;
+
+public:
+ using Result = FunctionVarLocs;
+ Result run(Function &F, FunctionAnalysisManager &FAM);
+};
+
+class DebugAssignmentTrackingPrinterPass
+ : public PassInfoMixin<DebugAssignmentTrackingPrinterPass> {
+ raw_ostream &OS;
+
+public:
+ DebugAssignmentTrackingPrinterPass(raw_ostream &OS) : OS(OS) {}
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+};
+
class AssignmentTrackingAnalysis : public FunctionPass {
std::unique_ptr<FunctionVarLocs> Results;
diff --git a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h
index fa81ff504ac6..f540f3774c41 100644
--- a/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/CodeGen/CodeGenPassBuilder.h
@@ -23,6 +23,7 @@
#include "llvm/Analysis/ScopedNoAliasAA.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
+#include "llvm/CodeGen/AssignmentTrackingAnalysis.h"
#include "llvm/CodeGen/CallBrPrepare.h"
#include "llvm/CodeGen/CodeGenPrepare.h"
#include "llvm/CodeGen/DwarfEHPrepare.h"
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index dcc1a4580b14..a6e9406bed06 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -910,6 +910,9 @@ private:
bool tryFoldSelectOfConstants(GSelect *Select, BuildFnTy &MatchInfo);
+ /// Try to fold (icmp X, Y) ? X : Y -> integer minmax.
+ bool tryFoldSelectToIntMinMax(GSelect *Select, BuildFnTy &MatchInfo);
+
bool isOneOrOneSplat(Register Src, bool AllowUndefs);
bool isZeroOrZeroSplat(Register Src, bool AllowUndefs);
bool isConstantSplatVector(Register Src, int64_t SplatValue,
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index 6ab1d4550c51..14885d5f9d08 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -558,6 +558,24 @@ public:
}
};
+/// Represents a G_PHI.
+class GPhi : public GenericMachineInstr {
+public:
+ /// Returns the number of incoming values.
+ unsigned getNumIncomingValues() const { return (getNumOperands() - 1) / 2; }
+ /// Returns the I'th incoming vreg.
+ Register getIncomingValue(unsigned I) {
+ return getOperand(I * 2 + 1).getReg();
+ }
+ /// Returns the I'th incoming basic block.
+ MachineBasicBlock *getIncomingBlock(unsigned I) {
+ return getOperand(I * 2 + 2).getMBB();
+ }
+
+ static bool classof(const MachineInstr *MI) {
+ return MI->getOpcode() == TargetOpcode::G_PHI;
+ }
+};
} // namespace llvm
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h b/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h
index b1fcdd207a60..4fbff4d10f8a 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h
@@ -67,10 +67,9 @@ private:
typedef SmallSetVector<MachineInstr *, 32> LocalizedSetVecT;
- /// If \p Op is a phi operand and not unique in that phi, that is,
- /// there are other operands in the phi with the same register,
- /// return true.
- bool isNonUniquePhiValue(MachineOperand &Op) const;
+ /// If \p Op is a reg operand of a PHI, return the number of total
+ /// operands in the PHI that are the same as \p Op, including itself.
+ unsigned getNumPhiUses(MachineOperand &Op) const;
/// Do inter-block localization from the entry block.
bool localizeInterBlock(MachineFunction &MF,
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGISel.h b/llvm/include/llvm/CodeGen/SelectionDAGISel.h
index 40046e0a8dec..e4d90f6e898f 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGISel.h
@@ -159,7 +159,15 @@ public:
OPC_CheckChild2Same,
OPC_CheckChild3Same,
OPC_CheckPatternPredicate,
+ OPC_CheckPatternPredicate0,
+ OPC_CheckPatternPredicate1,
OPC_CheckPatternPredicate2,
+ OPC_CheckPatternPredicate3,
+ OPC_CheckPatternPredicate4,
+ OPC_CheckPatternPredicate5,
+ OPC_CheckPatternPredicate6,
+ OPC_CheckPatternPredicate7,
+ OPC_CheckPatternPredicateTwoByte,
OPC_CheckPredicate,
OPC_CheckPredicateWithOperands,
OPC_CheckOpcode,
@@ -207,6 +215,14 @@ public:
OPC_CheckChild2CondCode,
OPC_CheckValueType,
OPC_CheckComplexPat,
+ OPC_CheckComplexPat0,
+ OPC_CheckComplexPat1,
+ OPC_CheckComplexPat2,
+ OPC_CheckComplexPat3,
+ OPC_CheckComplexPat4,
+ OPC_CheckComplexPat5,
+ OPC_CheckComplexPat6,
+ OPC_CheckComplexPat7,
OPC_CheckAndImm,
OPC_CheckOrImm,
OPC_CheckImmAllOnesV,
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index ebf410cc94de..65b06d0f4579 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -935,6 +935,9 @@ public:
/// Helper method returns the APInt of a ConstantSDNode operand.
inline const APInt &getConstantOperandAPInt(unsigned Num) const;
+ /// Helper method returns the APInt value of a ConstantSDNode.
+ inline const APInt &getAsAPIntVal() const;
+
const SDValue &getOperand(unsigned Num) const {
assert(Num < NumOperands && "Invalid child # of SDNode!");
return OperandList[Num];
@@ -1656,6 +1659,10 @@ const APInt &SDNode::getConstantOperandAPInt(unsigned Num) const {
return cast<ConstantSDNode>(getOperand(Num))->getAPIntValue();
}
+const APInt &SDNode::getAsAPIntVal() const {
+ return cast<ConstantSDNode>(this)->getAPIntValue();
+}
+
class ConstantFPSDNode : public SDNode {
friend class SelectionDAG;
diff --git a/llvm/lib/DWARFLinker/Parallel/Utils.h b/llvm/include/llvm/DWARFLinker/Utils.h
index 3c05b2ea173d..23e59c967011 100644
--- a/llvm/lib/DWARFLinker/Parallel/Utils.h
+++ b/llvm/include/llvm/DWARFLinker/Utils.h
@@ -6,14 +6,17 @@
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_LIB_DWARFLINKER_PARALLEL_UTILS_H
-#define LLVM_LIB_DWARFLINKER_PARALLEL_UTILS_H
+#ifndef LLVM_DWARFLINKER_UTILS_H
+#define LLVM_DWARFLINKER_UTILS_H
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/Support/Error.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
namespace llvm {
namespace dwarf_linker {
-namespace parallel {
/// This function calls \p Iteration() until it returns false.
/// If number of iterations exceeds \p MaxCounter then an Error is returned.
@@ -27,16 +30,35 @@ inline Error finiteLoop(function_ref<Expected<bool>()> Iteration,
Expected<bool> IterationResultOrError = Iteration();
if (!IterationResultOrError)
return IterationResultOrError.takeError();
-
if (!IterationResultOrError.get())
return Error::success();
}
-
return createStringError(std::errc::invalid_argument, "Infinite recursion");
}
-} // end of namespace parallel
+/// Make a best effort to guess the
+/// Xcode.app/Contents/Developer/Toolchains/ path from an SDK path.
+inline SmallString<128> guessToolchainBaseDir(StringRef SysRoot) {
+ SmallString<128> Result;
+ // Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk
+ StringRef Base = sys::path::parent_path(SysRoot);
+ if (sys::path::filename(Base) != "SDKs")
+ return Result;
+ Base = sys::path::parent_path(Base);
+ Result = Base;
+ Result += "/Toolchains";
+ return Result;
+}
+
+inline bool isPathAbsoluteOnWindowsOrPosix(const Twine &Path) {
+ // Debug info can contain paths from any OS, not necessarily
+ // an OS we're currently running on. Moreover different compilation units can
+ // be compiled on different operating systems and linked together later.
+ return sys::path::is_absolute(Path, sys::path::Style::posix) ||
+ sys::path::is_absolute(Path, sys::path::Style::windows);
+}
+
} // end of namespace dwarf_linker
} // end of namespace llvm
-#endif // LLVM_LIB_DWARFLINKER_PARALLEL_UTILS_H
+#endif // LLVM_DWARFLINKER_UTILS_H
diff --git a/llvm/include/llvm/Frontend/OpenACC/ACC.td b/llvm/include/llvm/Frontend/OpenACC/ACC.td
index 013d18e160de..0dbd934d83f0 100644
--- a/llvm/include/llvm/Frontend/OpenACC/ACC.td
+++ b/llvm/include/llvm/Frontend/OpenACC/ACC.td
@@ -391,9 +391,7 @@ def ACC_Loop : Directive<"loop"> {
let allowedClauses = [
VersionedClause<ACCC_DeviceType>,
VersionedClause<ACCC_Private>,
- VersionedClause<ACCC_Reduction>
- ];
- let allowedOnceClauses = [
+ VersionedClause<ACCC_Reduction>,
VersionedClause<ACCC_Collapse>,
VersionedClause<ACCC_Gang>,
VersionedClause<ACCC_Tile>,
@@ -421,15 +419,17 @@ def ACC_Init : Directive<"init"> {
// 2.15.1
def ACC_Routine : Directive<"routine"> {
- let allowedOnceClauses = [
+ let allowedClauses = [
VersionedClause<ACCC_Bind>,
VersionedClause<ACCC_DeviceType>,
- VersionedClause<ACCC_NoHost>,
VersionedClause<ACCC_Gang>,
VersionedClause<ACCC_Seq>,
VersionedClause<ACCC_Vector>,
VersionedClause<ACCC_Worker>
];
+ let allowedOnceClauses = [
+ VersionedClause<ACCC_NoHost>
+ ];
}
// 2.14.3
@@ -532,32 +532,32 @@ def ACC_HostData : Directive<"host_data"> {
// 2.11
def ACC_KernelsLoop : Directive<"kernels loop"> {
let allowedClauses = [
+ VersionedClause<ACCC_Attach>,
+ VersionedClause<ACCC_Collapse>,
VersionedClause<ACCC_Copy>,
VersionedClause<ACCC_Copyin>,
VersionedClause<ACCC_Copyout>,
VersionedClause<ACCC_Create>,
+ VersionedClause<ACCC_DevicePtr>,
VersionedClause<ACCC_DeviceType>,
+ VersionedClause<ACCC_Gang>,
VersionedClause<ACCC_NoCreate>,
+ VersionedClause<ACCC_NumGangs>,
+ VersionedClause<ACCC_NumWorkers>,
VersionedClause<ACCC_Present>,
VersionedClause<ACCC_Private>,
VersionedClause<ACCC_Reduction>,
- VersionedClause<ACCC_DevicePtr>,
- VersionedClause<ACCC_Attach>,
- VersionedClause<ACCC_Wait>
+ VersionedClause<ACCC_Tile>,
+ VersionedClause<ACCC_Vector>,
+ VersionedClause<ACCC_VectorLength>,
+ VersionedClause<ACCC_Wait>,
+ VersionedClause<ACCC_Worker>
];
let allowedOnceClauses = [
VersionedClause<ACCC_Async>,
- VersionedClause<ACCC_Collapse>,
VersionedClause<ACCC_Default>,
- VersionedClause<ACCC_Gang>,
VersionedClause<ACCC_If>,
- VersionedClause<ACCC_NumGangs>,
- VersionedClause<ACCC_NumWorkers>,
- VersionedClause<ACCC_Self>,
- VersionedClause<ACCC_Tile>,
- VersionedClause<ACCC_Vector>,
- VersionedClause<ACCC_VectorLength>,
- VersionedClause<ACCC_Worker>
+ VersionedClause<ACCC_Self>
];
let allowedExclusiveClauses = [
VersionedClause<ACCC_Auto>,
@@ -570,6 +570,7 @@ def ACC_KernelsLoop : Directive<"kernels loop"> {
def ACC_ParallelLoop : Directive<"parallel loop"> {
let allowedClauses = [
VersionedClause<ACCC_Attach>,
+ VersionedClause<ACCC_Collapse>,
VersionedClause<ACCC_Copy>,
VersionedClause<ACCC_Copyin>,
VersionedClause<ACCC_Copyout>,
@@ -577,25 +578,24 @@ def ACC_ParallelLoop : Directive<"parallel loop"> {
VersionedClause<ACCC_DevicePtr>,
VersionedClause<ACCC_DeviceType>,
VersionedClause<ACCC_FirstPrivate>,
+ VersionedClause<ACCC_Gang>,
VersionedClause<ACCC_NoCreate>,
+ VersionedClause<ACCC_NumGangs>,
+ VersionedClause<ACCC_NumWorkers>,
VersionedClause<ACCC_Present>,
VersionedClause<ACCC_Private>,
VersionedClause<ACCC_Reduction>,
VersionedClause<ACCC_Tile>,
- VersionedClause<ACCC_Wait>
+ VersionedClause<ACCC_Vector>,
+ VersionedClause<ACCC_VectorLength>,
+ VersionedClause<ACCC_Wait>,
+ VersionedClause<ACCC_Worker>
];
let allowedOnceClauses = [
VersionedClause<ACCC_Async>,
- VersionedClause<ACCC_Collapse>,
VersionedClause<ACCC_Default>,
- VersionedClause<ACCC_Gang>,
VersionedClause<ACCC_If>,
- VersionedClause<ACCC_NumGangs>,
- VersionedClause<ACCC_NumWorkers>,
- VersionedClause<ACCC_Self>,
- VersionedClause<ACCC_Vector>,
- VersionedClause<ACCC_VectorLength>,
- VersionedClause<ACCC_Worker>
+ VersionedClause<ACCC_Self>
];
let allowedExclusiveClauses = [
VersionedClause<ACCC_Auto>,
@@ -608,6 +608,7 @@ def ACC_ParallelLoop : Directive<"parallel loop"> {
def ACC_SerialLoop : Directive<"serial loop"> {
let allowedClauses = [
VersionedClause<ACCC_Attach>,
+ VersionedClause<ACCC_Collapse>,
VersionedClause<ACCC_Copy>,
VersionedClause<ACCC_Copyin>,
VersionedClause<ACCC_Copyout>,
@@ -615,22 +616,21 @@ def ACC_SerialLoop : Directive<"serial loop"> {
VersionedClause<ACCC_DevicePtr>,
VersionedClause<ACCC_DeviceType>,
VersionedClause<ACCC_FirstPrivate>,
+ VersionedClause<ACCC_Gang>,
VersionedClause<ACCC_NoCreate>,
VersionedClause<ACCC_Present>,
VersionedClause<ACCC_Private>,
VersionedClause<ACCC_Reduction>,
- VersionedClause<ACCC_Wait>
+ VersionedClause<ACCC_Tile>,
+ VersionedClause<ACCC_Vector>,
+ VersionedClause<ACCC_Wait>,
+ VersionedClause<ACCC_Worker>
];
let allowedOnceClauses = [
VersionedClause<ACCC_Async>,
- VersionedClause<ACCC_Collapse>,
VersionedClause<ACCC_Default>,
- VersionedClause<ACCC_Gang>,
VersionedClause<ACCC_If>,
- VersionedClause<ACCC_Self>,
- VersionedClause<ACCC_Tile>,
- VersionedClause<ACCC_Vector>,
- VersionedClause<ACCC_Worker>
+ VersionedClause<ACCC_Self>
];
let allowedExclusiveClauses = [
VersionedClause<ACCC_Auto>,
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 9088168b4c67..acff5c20b1b9 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -2708,8 +2708,8 @@ class SVE2p1_Single_Store_Quadword
: DefaultAttrsIntrinsic<[],
[llvm_anyvector_ty, llvm_nxv1i1_ty, llvm_ptr_ty],
[IntrWriteMem, IntrArgMemOnly]>;
-def int_aarch64_sve_st1uwq : SVE2p1_Single_Store_Quadword;
-def int_aarch64_sve_st1udq : SVE2p1_Single_Store_Quadword;
+def int_aarch64_sve_st1wq : SVE2p1_Single_Store_Quadword;
+def int_aarch64_sve_st1dq : SVE2p1_Single_Store_Quadword;
def int_aarch64_sve_ld2q_sret : AdvSIMD_2Vec_PredLoad_Intrinsic;
@@ -3617,7 +3617,7 @@ def int_aarch64_sve_tbxq : AdvSIMD_SVE2_TBX_Intrinsic;
// SVE2.1 - Extract vector segment from each pair of quadword segments.
//
-def int_aarch64_sve_extq_lane : AdvSIMD_2VectorArgIndexed_Intrinsic;
+def int_aarch64_sve_extq : AdvSIMD_2VectorArgIndexed_Intrinsic;
//
// SVE2.1 - Move predicate to/from vector
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index 6fd8e80013ce..cf50f2a59f60 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -4710,4 +4710,14 @@ def int_nvvm_is_explicit_cluster
[IntrNoMem, IntrSpeculatable, NoUndef<RetIndex>],
"llvm.nvvm.is_explicit_cluster">;
+// Setmaxnreg inc/dec intrinsics
+def int_nvvm_setmaxnreg_inc_sync_aligned_u32
+ : DefaultAttrsIntrinsic<[], [llvm_i32_ty],
+ [IntrConvergent, IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<0>>],
+ "llvm.nvvm.setmaxnreg.inc.sync.aligned.u32">;
+def int_nvvm_setmaxnreg_dec_sync_aligned_u32
+ : DefaultAttrsIntrinsic<[], [llvm_i32_ty],
+ [IntrConvergent, IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<0>>],
+ "llvm.nvvm.setmaxnreg.dec.sync.aligned.u32">;
+
} // let TargetPrefix = "nvvm"
diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h
index e72f74ad4adb..66c7d10d823d 100644
--- a/llvm/include/llvm/IR/ModuleSummaryIndex.h
+++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h
@@ -1011,6 +1011,12 @@ public:
return *Callsites;
}
+ void addCallsite(CallsiteInfo &Callsite) {
+ if (!Callsites)
+ Callsites = std::make_unique<CallsitesTy>();
+ Callsites->push_back(Callsite);
+ }
+
ArrayRef<AllocInfo> allocs() const {
if (Allocs)
return *Allocs;
diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h
index 447ac0f2aa61..90d99a6031c8 100644
--- a/llvm/include/llvm/IR/PatternMatch.h
+++ b/llvm/include/llvm/IR/PatternMatch.h
@@ -1495,6 +1495,36 @@ struct ThreeOps_match {
}
};
+/// Matches instructions with Opcode and any number of operands
+template <unsigned Opcode, typename... OperandTypes> struct AnyOps_match {
+ std::tuple<OperandTypes...> Operands;
+
+ AnyOps_match(const OperandTypes &...Ops) : Operands(Ops...) {}
+
+ // Operand matching works by recursively calling match_operands, matching the
+ // operands left to right. The first version is called for each operand but
+ // the last, for which the second version is called. The second version of
+ // match_operands is also used to match each individual operand.
+ template <int Idx, int Last>
+ std::enable_if_t<Idx != Last, bool> match_operands(const Instruction *I) {
+ return match_operands<Idx, Idx>(I) && match_operands<Idx + 1, Last>(I);
+ }
+
+ template <int Idx, int Last>
+ std::enable_if_t<Idx == Last, bool> match_operands(const Instruction *I) {
+ return std::get<Idx>(Operands).match(I->getOperand(Idx));
+ }
+
+ template <typename OpTy> bool match(OpTy *V) {
+ if (V->getValueID() == Value::InstructionVal + Opcode) {
+ auto *I = cast<Instruction>(V);
+ return I->getNumOperands() == sizeof...(OperandTypes) &&
+ match_operands<0, sizeof...(OperandTypes) - 1>(I);
+ }
+ return false;
+ }
+};
+
/// Matches SelectInst.
template <typename Cond, typename LHS, typename RHS>
inline ThreeOps_match<Cond, LHS, RHS, Instruction::Select>
@@ -1611,6 +1641,12 @@ m_Store(const ValueOpTy &ValueOp, const PointerOpTy &PointerOp) {
PointerOp);
}
+/// Matches GetElementPtrInst.
+template <typename... OperandTypes>
+inline auto m_GEP(const OperandTypes &...Ops) {
+ return AnyOps_match<Instruction::GetElementPtr, OperandTypes...>(Ops...);
+}
+
//===----------------------------------------------------------------------===//
// Matchers for CastInst classes
//
diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index 36be2e7d869e..87e7bbbd727e 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -1035,7 +1035,8 @@ const HashT HashType = HashT::MD5;
inline uint64_t ComputeHash(StringRef K) { return ComputeHash(HashType, K); }
// This structure defines the file header of the LLVM profile
-// data file in indexed-format.
+// data file in indexed-format. Please update llvm/docs/InstrProfileFormat.rst
+// as appropriate when updating the indexed profile format.
struct Header {
uint64_t Magic;
uint64_t Version;
diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc
index f5de23ff4b94..25df899b3f36 100644
--- a/llvm/include/llvm/ProfileData/InstrProfData.inc
+++ b/llvm/include/llvm/ProfileData/InstrProfData.inc
@@ -123,6 +123,8 @@ INSTR_PROF_VALUE_NODE(PtrToNodeT, llvm::PointerType::getUnqual(Ctx), Next, \
/* INSTR_PROF_RAW_HEADER start */
/* Definition of member fields of the raw profile header data structure. */
+/* Please update llvm/docs/InstrProfileFormat.rst as appropriate when updating
+ raw profile format. */
#ifndef INSTR_PROF_RAW_HEADER
#define INSTR_PROF_RAW_HEADER(Type, Name, Initializer)
#else
diff --git a/llvm/include/llvm/Support/RISCVISAInfo.h b/llvm/include/llvm/Support/RISCVISAInfo.h
index c539448683d3..46df93d75226 100644
--- a/llvm/include/llvm/Support/RISCVISAInfo.h
+++ b/llvm/include/llvm/Support/RISCVISAInfo.h
@@ -18,11 +18,6 @@
#include <vector>
namespace llvm {
-struct RISCVExtensionInfo {
- unsigned MajorVersion;
- unsigned MinorVersion;
-};
-
void riscvExtensionsHelp(StringMap<StringRef> DescMap);
class RISCVISAInfo {
@@ -30,6 +25,12 @@ public:
RISCVISAInfo(const RISCVISAInfo &) = delete;
RISCVISAInfo &operator=(const RISCVISAInfo &) = delete;
+ /// Represents the major and version number components of a RISC-V extension.
+ struct ExtensionVersion {
+ unsigned Major;
+ unsigned Minor;
+ };
+
static bool compareExtension(const std::string &LHS, const std::string &RHS);
/// Helper class for OrderedExtensionMap.
@@ -41,7 +42,7 @@ public:
/// OrderedExtensionMap is std::map, it's specialized to keep entries
/// in canonical order of extension.
- typedef std::map<std::string, RISCVExtensionInfo, ExtensionComparator>
+ typedef std::map<std::string, ExtensionVersion, ExtensionComparator>
OrderedExtensionMap;
RISCVISAInfo(unsigned XLen, OrderedExtensionMap &Exts)
@@ -71,10 +72,10 @@ public:
std::vector<std::string> toFeatures(bool AddAllExtensions = false,
bool IgnoreUnknown = true) const;
- const OrderedExtensionMap &getExtensions() const { return Exts; };
+ const OrderedExtensionMap &getExtensions() const { return Exts; }
- unsigned getXLen() const { return XLen; };
- unsigned getFLen() const { return FLen; };
+ unsigned getXLen() const { return XLen; }
+ unsigned getFLen() const { return FLen; }
unsigned getMinVLen() const { return MinVLen; }
unsigned getMaxVLen() const { return 65536; }
unsigned getMaxELen() const { return MaxELen; }
@@ -104,8 +105,7 @@ private:
OrderedExtensionMap Exts;
- void addExtension(StringRef ExtName, unsigned MajorVersion,
- unsigned MinorVersion);
+ void addExtension(StringRef ExtName, ExtensionVersion Version);
Error checkDependency();
diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def
index 3824b1c66951..c005218c80f4 100644
--- a/llvm/include/llvm/Support/TargetOpcodes.def
+++ b/llvm/include/llvm/Support/TargetOpcodes.def
@@ -687,6 +687,9 @@ HANDLE_TARGET_OPCODE(G_FMINIMUM)
HANDLE_TARGET_OPCODE(G_FMAXIMUM)
/// Access to FP environment.
+HANDLE_TARGET_OPCODE(G_GET_FPENV)
+HANDLE_TARGET_OPCODE(G_SET_FPENV)
+HANDLE_TARGET_OPCODE(G_RESET_FPENV)
HANDLE_TARGET_OPCODE(G_GET_FPMODE)
HANDLE_TARGET_OPCODE(G_SET_FPMODE)
HANDLE_TARGET_OPCODE(G_RESET_FPMODE)
diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td
index 73e38b15bf67..2c73b67f9e1a 100644
--- a/llvm/include/llvm/Target/GenericOpcodes.td
+++ b/llvm/include/llvm/Target/GenericOpcodes.td
@@ -1020,6 +1020,27 @@ def G_FNEARBYINT : GenericInstruction {
// it is modeled as a side effect, because constrained intrinsics use the same
// method.
+// Reading floating-point environment.
+def G_GET_FPENV : GenericInstruction {
+ let OutOperandList = (outs type0:$dst);
+ let InOperandList = (ins);
+ let hasSideEffects = true;
+}
+
+// Setting floating-point environment.
+def G_SET_FPENV : GenericInstruction {
+ let OutOperandList = (outs);
+ let InOperandList = (ins type0:$src);
+ let hasSideEffects = true;
+}
+
+// Setting default floating-point environment.
+def G_RESET_FPENV : GenericInstruction {
+ let OutOperandList = (outs);
+ let InOperandList = (ins);
+ let hasSideEffects = true;
+}
+
// Reading floating-point control modes.
def G_GET_FPMODE : GenericInstruction {
let OutOperandList = (outs type0:$dst);
diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index 5e704f0b9a75..f792237203b4 100644
--- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -116,6 +116,9 @@ def : GINodeEquiv<G_INTRINSIC, intrinsic_wo_chain> {
let IfConvergent = G_INTRINSIC_CONVERGENT;
}
+def : GINodeEquiv<G_GET_FPENV, get_fpenv>;
+def : GINodeEquiv<G_SET_FPENV, set_fpenv>;
+def : GINodeEquiv<G_RESET_FPENV, reset_fpenv>;
def : GINodeEquiv<G_GET_FPMODE, get_fpmode>;
def : GINodeEquiv<G_SET_FPMODE, set_fpmode>;
def : GINodeEquiv<G_RESET_FPMODE, reset_fpmode>;
diff --git a/llvm/include/llvm/Target/TargetPfmCounters.td b/llvm/include/llvm/Target/TargetPfmCounters.td
index b00f3e19c35f..33dff741fa2a 100644
--- a/llvm/include/llvm/Target/TargetPfmCounters.td
+++ b/llvm/include/llvm/Target/TargetPfmCounters.td
@@ -28,6 +28,24 @@ class PfmIssueCounter<string resource_name, string counter>
string ResourceName = resource_name;
}
+// Definition of a validation event. A validation event represents a specific
+// event that can be measured using performance counters that is interesting
+// in regard to the snippet state.
+class ValidationEvent <int event_number> {
+ int EventNumber = event_number;
+}
+
+def InstructionRetired : ValidationEvent<0>;
+
+// PfmValidationCounter provides a mapping between the events that are
+// are interesting in regards to the snippet execution environment and
+// a concrete performance counter name that can be looked up in libpfm.
+class PfmValidationCounter<ValidationEvent event_type, string counter>
+ : PfmCounter<counter> {
+ // The name of the event that the validation counter detects.
+ ValidationEvent EventType = event_type;
+}
+
def NoPfmCounter : PfmCounter <""> {}
// Set of PfmCounters for measuring sched model characteristics.
@@ -38,6 +56,9 @@ class ProcPfmCounters {
PfmCounter UopsCounter = NoPfmCounter;
// Processors can define how to measure issued uops by defining IssueCounters.
list<PfmIssueCounter> IssueCounters = [];
+ // Processor can list mappings between validation events and real counters
+ // to measure the specified events.
+ list<PfmValidationCounter> ValidationCounters = [];
}
// A binding of a set of counters to a CPU.
diff --git a/llvm/include/llvm/TargetParser/ARMTargetParserCommon.h b/llvm/include/llvm/TargetParser/ARMTargetParserCommon.h
index 1e4187c6fb11..8ae553ca80dd 100644
--- a/llvm/include/llvm/TargetParser/ARMTargetParserCommon.h
+++ b/llvm/include/llvm/TargetParser/ARMTargetParserCommon.h
@@ -42,6 +42,7 @@ struct ParsedBranchProtection {
StringRef Key;
bool BranchTargetEnforcement;
bool BranchProtectionPAuthLR;
+ bool GuardedControlStack;
};
bool parseBranchProtection(StringRef Spec, ParsedBranchProtection &PBP,
diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp
index 360fc594ef7c..b948eb6ebd12 100644
--- a/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -539,10 +539,13 @@ void LazyValueInfoImpl::solve() {
}
std::pair<BasicBlock *, Value *> e = BlockValueStack.back();
assert(BlockValueSet.count(e) && "Stack value should be in BlockValueSet!");
+ unsigned StackSize = BlockValueStack.size();
+ (void) StackSize;
if (solveBlockValue(e.second, e.first)) {
// The work item was completely processed.
- assert(BlockValueStack.back() == e && "Nothing should have been pushed!");
+ assert(BlockValueStack.size() == StackSize &&
+ BlockValueStack.back() == e && "Nothing should have been pushed!");
#ifndef NDEBUG
std::optional<ValueLatticeElement> BBLV =
TheCache.getCachedValueInfo(e.second, e.first);
@@ -556,7 +559,8 @@ void LazyValueInfoImpl::solve() {
BlockValueSet.erase(e);
} else {
// More work needs to be done before revisiting.
- assert(BlockValueStack.back() != e && "Stack should have been pushed!");
+ assert(BlockValueStack.size() == StackSize + 1 &&
+ "Exactly one element should have been pushed!");
}
}
}
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 8fca569a391b..a5fc267b1883 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -459,9 +459,24 @@ public:
// Record all stack id indices actually used in the summary entries being
// written, so that we can compact them in the case of distributed ThinLTO
// indexes.
- for (auto &CI : FS->callsites())
+ for (auto &CI : FS->callsites()) {
+ // If the stack id list is empty, this callsite info was synthesized for
+ // a missing tail call frame. Ensure that the callee's GUID gets a value
+ // id. Normally we only generate these for defined summaries, which in
+ // the case of distributed ThinLTO is only the functions already defined
+ // in the module or that we want to import. We don't bother to include
+ // all the callee symbols as they aren't normally needed in the backend.
+ // However, for the synthesized callsite infos we do need the callee
+ // GUID in the backend so that we can correlate the identified callee
+ // with this callsite info (which for non-tail calls is done by the
+ // ordering of the callsite infos and verified via stack ids).
+ if (CI.StackIdIndices.empty()) {
+ GUIDToValueIdMap[CI.Callee.getGUID()] = ++GlobalValueId;
+ continue;
+ }
for (auto Idx : CI.StackIdIndices)
StackIdIndices.push_back(Idx);
+ }
for (auto &AI : FS->allocs())
for (auto &MIB : AI.MIBs)
for (auto Idx : MIB.StackIdIndices)
diff --git a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp
index ad3ad9928987..eb372655e5f1 100644
--- a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp
+++ b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp
@@ -1,3 +1,11 @@
+//===-- AssignmentTrackingAnalysis.cpp ------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
#include "llvm/CodeGen/AssignmentTrackingAnalysis.h"
#include "LiveDebugValues/LiveDebugValues.h"
#include "llvm/ADT/BitVector.h"
@@ -2553,6 +2561,32 @@ static void analyzeFunction(Function &Fn, const DataLayout &Layout,
}
}
+FunctionVarLocs
+DebugAssignmentTrackingAnalysis::run(Function &F,
+ FunctionAnalysisManager &FAM) {
+ if (!isAssignmentTrackingEnabled(*F.getParent()))
+ return FunctionVarLocs();
+
+ auto &DL = F.getParent()->getDataLayout();
+
+ FunctionVarLocsBuilder Builder;
+ analyzeFunction(F, DL, &Builder);
+
+ // Save these results.
+ FunctionVarLocs Results;
+ Results.init(Builder);
+ return Results;
+}
+
+AnalysisKey DebugAssignmentTrackingAnalysis::Key;
+
+PreservedAnalyses
+DebugAssignmentTrackingPrinterPass::run(Function &F,
+ FunctionAnalysisManager &FAM) {
+ FAM.getResult<DebugAssignmentTrackingAnalysis>(F).print(OS, F);
+ return PreservedAnalyses::all();
+}
+
bool AssignmentTrackingAnalysis::runOnFunction(Function &F) {
if (!isAssignmentTrackingEnabled(*F.getParent()))
return false;
diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
index 0801296cab49..599b7c72b2f5 100644
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -1363,6 +1363,14 @@ ReoptimizeBlock:
MachineBasicBlock *Pred = *(MBB->pred_end()-1);
Pred->ReplaceUsesOfBlockWith(MBB, &*FallThrough);
}
+ // Add rest successors of MBB to successors of FallThrough. Those
+ // successors are not directly reachable via MBB, so it should be
+ // landing-pad.
+ for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI)
+ if (*SI != &*FallThrough && !FallThrough->isSuccessor(*SI)) {
+ assert((*SI)->isEHPad() && "Bad CFG");
+ FallThrough->copySuccessor(MBB, SI);
+ }
// If MBB was the target of a jump table, update jump tables to go to the
// fallthrough instead.
if (MachineJumpTableInfo *MJTI = MF.getJumpTableInfo())
@@ -1624,6 +1632,15 @@ ReoptimizeBlock:
} else {
DidChange = true;
PMBB->ReplaceUsesOfBlockWith(MBB, CurTBB);
+ // Add rest successors of MBB to successors of CurTBB. Those
+ // successors are not directly reachable via MBB, so it should be
+ // landing-pad.
+ for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE;
+ ++SI)
+ if (*SI != CurTBB && !CurTBB->isSuccessor(*SI)) {
+ assert((*SI)->isEHPad() && "Bad CFG");
+ CurTBB->copySuccessor(MBB, SI);
+ }
// If this change resulted in PMBB ending in a conditional
// branch where both conditions go to the same destination,
// change this to an unconditional branch.
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 8b15bdb0aca3..fc2793bd7a13 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -6548,6 +6548,87 @@ bool CombinerHelper::tryFoldBoolSelectToLogic(GSelect *Select,
return false;
}
+bool CombinerHelper::tryFoldSelectToIntMinMax(GSelect *Select,
+ BuildFnTy &MatchInfo) {
+ Register DstReg = Select->getReg(0);
+ Register Cond = Select->getCondReg();
+ Register True = Select->getTrueReg();
+ Register False = Select->getFalseReg();
+ LLT DstTy = MRI.getType(DstReg);
+
+ // We need an G_ICMP on the condition register.
+ GICmp *Cmp = getOpcodeDef<GICmp>(Cond, MRI);
+ if (!Cmp)
+ return false;
+
+ // We want to fold the icmp and replace the select.
+ if (!MRI.hasOneNonDBGUse(Cmp->getReg(0)))
+ return false;
+
+ CmpInst::Predicate Pred = Cmp->getCond();
+ // We need a larger or smaller predicate for
+ // canonicalization.
+ if (CmpInst::isEquality(Pred))
+ return false;
+
+ Register CmpLHS = Cmp->getLHSReg();
+ Register CmpRHS = Cmp->getRHSReg();
+
+ // We can swap CmpLHS and CmpRHS for higher hitrate.
+ if (True == CmpRHS && False == CmpLHS) {
+ std::swap(CmpLHS, CmpRHS);
+ Pred = CmpInst::getSwappedPredicate(Pred);
+ }
+
+ // (icmp X, Y) ? X : Y -> integer minmax.
+ // see matchSelectPattern in ValueTracking.
+ // Legality between G_SELECT and integer minmax can differ.
+ if (True == CmpLHS && False == CmpRHS) {
+ switch (Pred) {
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_UGE: {
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMAX, DstTy}))
+ return false;
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildUMax(DstReg, True, False);
+ };
+ return true;
+ }
+ case ICmpInst::ICMP_SGT:
+ case ICmpInst::ICMP_SGE: {
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMAX, DstTy}))
+ return false;
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildSMax(DstReg, True, False);
+ };
+ return true;
+ }
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_ULE: {
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_UMIN, DstTy}))
+ return false;
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildUMin(DstReg, True, False);
+ };
+ return true;
+ }
+ case ICmpInst::ICMP_SLT:
+ case ICmpInst::ICMP_SLE: {
+ if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SMIN, DstTy}))
+ return false;
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildSMin(DstReg, True, False);
+ };
+ return true;
+ }
+ default:
+ return false;
+ }
+ }
+
+ return false;
+}
+
bool CombinerHelper::matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) {
GSelect *Select = cast<GSelect>(&MI);
@@ -6557,5 +6638,8 @@ bool CombinerHelper::matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) {
if (tryFoldBoolSelectToLogic(Select, MatchInfo))
return true;
+ if (tryFoldSelectToIntMinMax(Select, MatchInfo))
+ return true;
+
return false;
}
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 6708f2baa5ed..8a6bfdc5ee66 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -1919,6 +1919,8 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
return TargetOpcode::G_LROUND;
case Intrinsic::llround:
return TargetOpcode::G_LLROUND;
+ case Intrinsic::get_fpenv:
+ return TargetOpcode::G_GET_FPENV;
case Intrinsic::get_fpmode:
return TargetOpcode::G_GET_FPMODE;
}
@@ -2502,6 +2504,16 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
return true;
}
+ case Intrinsic::set_fpenv: {
+ Value *FPEnv = CI.getOperand(0);
+ MIRBuilder.buildInstr(TargetOpcode::G_SET_FPENV, {},
+ {getOrCreateVReg(*FPEnv)});
+ return true;
+ }
+ case Intrinsic::reset_fpenv: {
+ MIRBuilder.buildInstr(TargetOpcode::G_RESET_FPENV, {}, {});
+ return true;
+ }
case Intrinsic::set_fpmode: {
Value *FPState = CI.getOperand(0);
MIRBuilder.buildInstr(TargetOpcode::G_SET_FPMODE, {},
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index def7f6ebeb01..21947a55874a 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -958,6 +958,13 @@ static RTLIB::Libcall
getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI) {
RTLIB::Libcall RTLibcall;
switch (MI.getOpcode()) {
+ case TargetOpcode::G_GET_FPENV:
+ RTLibcall = RTLIB::FEGETENV;
+ break;
+ case TargetOpcode::G_SET_FPENV:
+ case TargetOpcode::G_RESET_FPENV:
+ RTLibcall = RTLIB::FESETENV;
+ break;
case TargetOpcode::G_GET_FPMODE:
RTLibcall = RTLIB::FEGETMODE;
break;
@@ -1232,18 +1239,21 @@ LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
MI.eraseFromParent();
return Result;
}
+ case TargetOpcode::G_GET_FPENV:
case TargetOpcode::G_GET_FPMODE: {
LegalizeResult Result = createGetStateLibcall(MIRBuilder, MI, LocObserver);
if (Result != Legalized)
return Result;
break;
}
+ case TargetOpcode::G_SET_FPENV:
case TargetOpcode::G_SET_FPMODE: {
LegalizeResult Result = createSetStateLibcall(MIRBuilder, MI, LocObserver);
if (Result != Legalized)
return Result;
break;
}
+ case TargetOpcode::G_RESET_FPENV:
case TargetOpcode::G_RESET_FPMODE: {
LegalizeResult Result =
createResetStateLibcall(MIRBuilder, MI, LocObserver);
diff --git a/llvm/lib/CodeGen/GlobalISel/Localizer.cpp b/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
index 55984423e5bc..ae58e135931f 100644
--- a/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Localizer.cpp
@@ -13,6 +13,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -58,18 +59,18 @@ bool Localizer::isLocalUse(MachineOperand &MOUse, const MachineInstr &Def,
return InsertMBB == Def.getParent();
}
-bool Localizer::isNonUniquePhiValue(MachineOperand &Op) const {
- MachineInstr *MI = Op.getParent();
- if (!MI->isPHI())
- return false;
+unsigned Localizer::getNumPhiUses(MachineOperand &Op) const {
+ auto *MI = dyn_cast<GPhi>(&*Op.getParent());
+ if (!MI)
+ return 0;
Register SrcReg = Op.getReg();
- for (unsigned Idx = 1; Idx < MI->getNumOperands(); Idx += 2) {
- auto &MO = MI->getOperand(Idx);
- if (&MO != &Op && MO.isReg() && MO.getReg() == SrcReg)
- return true;
+ unsigned NumUses = 0;
+ for (unsigned I = 0, NumVals = MI->getNumIncomingValues(); I < NumVals; ++I) {
+ if (MI->getIncomingValue(I) == SrcReg)
+ ++NumUses;
}
- return false;
+ return NumUses;
}
bool Localizer::localizeInterBlock(MachineFunction &MF,
@@ -108,11 +109,12 @@ bool Localizer::localizeInterBlock(MachineFunction &MF,
continue;
}
- // If the use is a phi operand that's not unique, don't try to localize.
- // If we do, we can cause unnecessary instruction bloat by duplicating
- // into each predecessor block, when the existing one is sufficient and
- // allows for easier optimization later.
- if (isNonUniquePhiValue(MOUse))
+ // PHIs look like a single user but can use the same register in multiple
+ // edges, causing remat into each predecessor. Allow this to a certain
+ // extent.
+ unsigned NumPhiUses = getNumPhiUses(MOUse);
+ const unsigned PhiThreshold = 2; // FIXME: Tune this more.
+ if (NumPhiUses > PhiThreshold)
continue;
LLVM_DEBUG(dbgs() << "Fixing non-local use\n");
@@ -164,19 +166,22 @@ bool Localizer::localizeIntraBlock(LocalizedSetVecT &LocalizedInstrs) {
if (!UseMI.isPHI())
Users.insert(&UseMI);
}
- // If all the users were PHIs then they're not going to be in our block,
- // don't try to move this instruction.
- if (Users.empty())
- continue;
-
MachineBasicBlock::iterator II(MI);
- ++II;
- while (II != MBB.end() && !Users.count(&*II))
+ // If all the users were PHIs then they're not going to be in our block, we
+ // may still benefit from sinking, especially since the value might be live
+ // across a call.
+ if (Users.empty()) {
+ // Make sure we don't sink in between two terminator sequences by scanning
+ // forward, not backward.
+ II = MBB.getFirstTerminatorForward();
+ LLVM_DEBUG(dbgs() << "Only phi users: moving inst to end: " << *MI);
+ } else {
++II;
-
- assert(II != MBB.end() && "Didn't find the user in the MBB");
- LLVM_DEBUG(dbgs() << "Intra-block: moving " << *MI << " before " << *II
- << '\n');
+ while (II != MBB.end() && !Users.count(&*II))
+ ++II;
+ assert(II != MBB.end() && "Didn't find the user in the MBB");
+ LLVM_DEBUG(dbgs() << "Intra-block: moving " << *MI << " before " << *II);
+ }
MI->removeFromParent();
MBB.insert(II, MI);
diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
index 9037f752dc4f..cfc8c28b99e5 100644
--- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
+++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
@@ -2403,8 +2403,15 @@ bool InstrRefBasedLDV::mlocJoin(
llvm::sort(BlockOrders, Cmp);
// Skip entry block.
- if (BlockOrders.size() == 0)
+ if (BlockOrders.size() == 0) {
+ // FIXME: We don't use assert here to prevent instr-ref-unreachable.mir
+ // failing.
+ LLVM_DEBUG(if (!MBB.isEntryBlock()) dbgs()
+ << "Found not reachable block " << MBB.getFullName()
+ << " from entry which may lead out of "
+ "bound access to VarLocs\n");
return false;
+ }
// Step through all machine locations, look at each predecessor and test
// whether we can eliminate redundant PHIs.
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2327664516cc..ecdf9ab9e989 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4380,7 +4380,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
} else {
N1IsConst = isa<ConstantSDNode>(N1);
if (N1IsConst) {
- ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
+ ConstValue1 = N1->getAsAPIntVal();
N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
}
}
@@ -10999,8 +10999,8 @@ SDValue DAGCombiner::visitBSWAP(SDNode *N) {
SDLoc DL(N);
// fold (bswap c1) -> c2
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
- return DAG.getNode(ISD::BSWAP, DL, VT, N0);
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::BSWAP, DL, VT, {N0}))
+ return C;
// fold (bswap (bswap x)) -> x
if (N0.getOpcode() == ISD::BSWAP)
return N0.getOperand(0);
@@ -11059,10 +11059,11 @@ SDValue DAGCombiner::visitBSWAP(SDNode *N) {
SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ SDLoc DL(N);
// fold (bitreverse c1) -> c2
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
- return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::BITREVERSE, DL, VT, {N0}))
+ return C;
// fold (bitreverse (bitreverse x)) -> x
if (N0.getOpcode() == ISD::BITREVERSE)
return N0.getOperand(0);
@@ -11072,16 +11073,16 @@ SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
SDValue DAGCombiner::visitCTLZ(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ SDLoc DL(N);
// fold (ctlz c1) -> c2
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
- return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTLZ, DL, VT, {N0}))
+ return C;
// If the value is known never to be zero, switch to the undef version.
- if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
+ if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT))
if (DAG.isKnownNeverZero(N0))
- return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
- }
+ return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, DL, VT, N0);
return SDValue();
}
@@ -11089,26 +11090,28 @@ SDValue DAGCombiner::visitCTLZ(SDNode *N) {
SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ SDLoc DL(N);
// fold (ctlz_zero_undef c1) -> c2
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
- return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
+ if (SDValue C =
+ DAG.FoldConstantArithmetic(ISD::CTLZ_ZERO_UNDEF, DL, VT, {N0}))
+ return C;
return SDValue();
}
SDValue DAGCombiner::visitCTTZ(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ SDLoc DL(N);
// fold (cttz c1) -> c2
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
- return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTTZ, DL, VT, {N0}))
+ return C;
// If the value is known never to be zero, switch to the undef version.
- if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
+ if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT))
if (DAG.isKnownNeverZero(N0))
- return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
- }
+ return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, DL, VT, N0);
return SDValue();
}
@@ -11116,20 +11119,23 @@ SDValue DAGCombiner::visitCTTZ(SDNode *N) {
SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ SDLoc DL(N);
// fold (cttz_zero_undef c1) -> c2
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
- return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
+ if (SDValue C =
+ DAG.FoldConstantArithmetic(ISD::CTTZ_ZERO_UNDEF, DL, VT, {N0}))
+ return C;
return SDValue();
}
SDValue DAGCombiner::visitCTPOP(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ SDLoc DL(N);
// fold (ctpop c1) -> c2
- if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
- return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
+ if (SDValue C = DAG.FoldConstantArithmetic(ISD::CTPOP, DL, VT, {N0}))
+ return C;
return SDValue();
}
@@ -12087,8 +12093,8 @@ SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
if (N1Elt.getValueType() != N2Elt.getValueType())
continue;
- const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
- const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
+ const APInt &C1 = N1Elt->getAsAPIntVal();
+ const APInt &C2 = N2Elt->getAsAPIntVal();
if (C1 != C2 + 1)
AllAddOne = false;
if (C1 != C2 - 1)
@@ -12764,7 +12770,7 @@ static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
SDLoc DL(Op);
// Get the constant value and if needed trunc it to the size of the type.
// Nodes like build_vector might have constants wider than the scalar type.
- APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
+ APInt C = Op->getAsAPIntVal().zextOrTrunc(EVTBits);
if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
else
@@ -13375,9 +13381,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
SDValue N00 = N0.getOperand(0);
EVT ExtVT = cast<VTSDNode>(N0->getOperand(1))->getVT();
- if (N00.getOpcode() == ISD::TRUNCATE &&
+ if ((N00.getOpcode() == ISD::TRUNCATE || TLI.isTruncateFree(N00, ExtVT)) &&
(!LegalTypes || TLI.isTypeLegal(ExtVT))) {
- SDValue T = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N00.getOperand(0));
+ SDValue T = DAG.getNode(ISD::TRUNCATE, DL, ExtVT, N00);
return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, T);
}
}
@@ -17942,10 +17948,10 @@ SDValue DAGCombiner::rebuildSetCC(SDValue N) {
SDValue AndOp1 = Op0.getOperand(1);
if (AndOp1.getOpcode() == ISD::Constant) {
- const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
+ const APInt &AndConst = AndOp1->getAsAPIntVal();
if (AndConst.isPowerOf2() &&
- cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
+ Op1->getAsAPIntVal() == AndConst.logBase2()) {
SDLoc DL(N);
return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
Op0, DAG.getConstant(0, DL, Op0.getValueType()),
@@ -18266,7 +18272,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
auto *CN = cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
const APInt &Offset0 = CN->getAPIntValue();
- const APInt &Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
+ const APInt &Offset1 = Offset->getAsAPIntVal();
int X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
int Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
int X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
@@ -19573,7 +19579,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
// Find the type to narrow it the load / op / store to.
SDValue N1 = Value.getOperand(1);
unsigned BitWidth = N1.getValueSizeInBits();
- APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
+ APInt Imm = N1->getAsAPIntVal();
if (Opc == ISD::AND)
Imm ^= APInt::getAllOnes(BitWidth);
if (Imm == 0 || Imm.isAllOnes())
@@ -26542,10 +26548,10 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
}
APInt Bits;
- if (isa<ConstantSDNode>(Elt))
- Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
- else if (isa<ConstantFPSDNode>(Elt))
- Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
+ if (auto *Cst = dyn_cast<ConstantSDNode>(Elt))
+ Bits = Cst->getAPIntValue();
+ else if (auto *CstFP = dyn_cast<ConstantFPSDNode>(Elt))
+ Bits = CstFP->getValueAPF().bitcastToAPInt();
else
return SDValue();
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index ec74d2940099..c278bdc07360 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1854,7 +1854,7 @@ void DAGTypeLegalizer::SplitVecRes_STEP_VECTOR(SDNode *N, SDValue &Lo,
// Hi = Lo + (EltCnt * Step)
EVT EltVT = Step.getValueType();
- APInt StepVal = cast<ConstantSDNode>(Step)->getAPIntValue();
+ APInt StepVal = Step->getAsAPIntVal();
SDValue StartOfHi =
DAG.getVScale(dl, EltVT, StepVal * LoVT.getVectorMinNumElements());
StartOfHi = DAG.getSExtOrTrunc(StartOfHi, dl, HiVT.getVectorElementType());
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index b39be64c06f9..01d31806c844 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -327,7 +327,7 @@ bool ISD::isVectorShrinkable(const SDNode *N, unsigned NewEltSize,
if (!isa<ConstantSDNode>(Op))
return false;
- APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().trunc(EltSize);
+ APInt C = Op->getAsAPIntVal().trunc(EltSize);
if (Signed && C.trunc(NewEltSize).sext(EltSize) != C)
return false;
if (!Signed && C.trunc(NewEltSize).zext(EltSize) != C)
@@ -7200,7 +7200,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
(N2VT.getVectorMinNumElements() + N3->getAsZExtVal()) <=
VT.getVectorMinNumElements()) &&
"Insert subvector overflow!");
- assert(cast<ConstantSDNode>(N3)->getAPIntValue().getBitWidth() ==
+ assert(N3->getAsAPIntVal().getBitWidth() ==
TLI->getVectorIdxTy(getDataLayout()).getFixedSizeInBits() &&
"Constant index for INSERT_SUBVECTOR has an invalid size");
@@ -9304,7 +9304,7 @@ SDValue SelectionDAG::getGatherVP(SDVTList VTs, EVT VT, const SDLoc &dl,
N->getValueType(0).getVectorElementCount()) &&
"Vector width mismatch between index and data");
assert(isa<ConstantSDNode>(N->getScale()) &&
- cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() &&
+ N->getScale()->getAsAPIntVal().isPowerOf2() &&
"Scale should be a constant power of 2");
CSEMap.InsertNode(N, IP);
@@ -9348,7 +9348,7 @@ SDValue SelectionDAG::getScatterVP(SDVTList VTs, EVT VT, const SDLoc &dl,
N->getValue().getValueType().getVectorElementCount()) &&
"Vector width mismatch between index and data");
assert(isa<ConstantSDNode>(N->getScale()) &&
- cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() &&
+ N->getScale()->getAsAPIntVal().isPowerOf2() &&
"Scale should be a constant power of 2");
CSEMap.InsertNode(N, IP);
@@ -9490,7 +9490,7 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT MemVT, const SDLoc &dl,
N->getValueType(0).getVectorElementCount()) &&
"Vector width mismatch between index and data");
assert(isa<ConstantSDNode>(N->getScale()) &&
- cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() &&
+ N->getScale()->getAsAPIntVal().isPowerOf2() &&
"Scale should be a constant power of 2");
CSEMap.InsertNode(N, IP);
@@ -9536,7 +9536,7 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT MemVT, const SDLoc &dl,
N->getValue().getValueType().getVectorElementCount()) &&
"Vector width mismatch between index and data");
assert(isa<ConstantSDNode>(N->getScale()) &&
- cast<ConstantSDNode>(N->getScale())->getAPIntValue().isPowerOf2() &&
+ N->getScale()->getAsAPIntVal().isPowerOf2() &&
"Scale should be a constant power of 2");
CSEMap.InsertNode(N, IP);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 9acfc76d7d5e..678d273e4bd6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -2697,9 +2697,14 @@ LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckChildSame(
/// CheckPatternPredicate - Implements OP_CheckPatternPredicate.
LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
-CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
- const SelectionDAGISel &SDISel, bool TwoBytePredNo) {
- unsigned PredNo = MatcherTable[MatcherIndex++];
+CheckPatternPredicate(unsigned Opcode, const unsigned char *MatcherTable,
+ unsigned &MatcherIndex, const SelectionDAGISel &SDISel) {
+ bool TwoBytePredNo =
+ Opcode == SelectionDAGISel::OPC_CheckPatternPredicateTwoByte;
+ unsigned PredNo =
+ TwoBytePredNo || Opcode == SelectionDAGISel::OPC_CheckPatternPredicate
+ ? MatcherTable[MatcherIndex++]
+ : Opcode - SelectionDAGISel::OPC_CheckPatternPredicate0;
if (TwoBytePredNo)
PredNo |= MatcherTable[MatcherIndex++] << 8;
return SDISel.CheckPatternPredicate(PredNo);
@@ -2851,10 +2856,16 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table,
Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Same);
return Index;
case SelectionDAGISel::OPC_CheckPatternPredicate:
+ case SelectionDAGISel::OPC_CheckPatternPredicate0:
+ case SelectionDAGISel::OPC_CheckPatternPredicate1:
case SelectionDAGISel::OPC_CheckPatternPredicate2:
- Result = !::CheckPatternPredicate(
- Table, Index, SDISel,
- Table[Index - 1] == SelectionDAGISel::OPC_CheckPatternPredicate2);
+ case SelectionDAGISel::OPC_CheckPatternPredicate3:
+ case SelectionDAGISel::OPC_CheckPatternPredicate4:
+ case SelectionDAGISel::OPC_CheckPatternPredicate5:
+ case SelectionDAGISel::OPC_CheckPatternPredicate6:
+ case SelectionDAGISel::OPC_CheckPatternPredicate7:
+ case SelectionDAGISel::OPC_CheckPatternPredicateTwoByte:
+ Result = !::CheckPatternPredicate(Opcode, Table, Index, SDISel);
return Index;
case SelectionDAGISel::OPC_CheckPredicate:
Result = !::CheckNodePredicate(Table, Index, SDISel, N.getNode());
@@ -3336,9 +3347,16 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
continue;
case OPC_CheckPatternPredicate:
+ case OPC_CheckPatternPredicate0:
+ case OPC_CheckPatternPredicate1:
case OPC_CheckPatternPredicate2:
- if (!::CheckPatternPredicate(MatcherTable, MatcherIndex, *this,
- Opcode == OPC_CheckPatternPredicate2))
+ case OPC_CheckPatternPredicate3:
+ case OPC_CheckPatternPredicate4:
+ case OPC_CheckPatternPredicate5:
+ case OPC_CheckPatternPredicate6:
+ case OPC_CheckPatternPredicate7:
+ case OPC_CheckPatternPredicateTwoByte:
+ if (!::CheckPatternPredicate(Opcode, MatcherTable, MatcherIndex, *this))
break;
continue;
case OPC_CheckPredicate:
@@ -3358,8 +3376,18 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
break;
continue;
}
- case OPC_CheckComplexPat: {
- unsigned CPNum = MatcherTable[MatcherIndex++];
+ case OPC_CheckComplexPat:
+ case OPC_CheckComplexPat0:
+ case OPC_CheckComplexPat1:
+ case OPC_CheckComplexPat2:
+ case OPC_CheckComplexPat3:
+ case OPC_CheckComplexPat4:
+ case OPC_CheckComplexPat5:
+ case OPC_CheckComplexPat6:
+ case OPC_CheckComplexPat7: {
+ unsigned CPNum = Opcode == OPC_CheckComplexPat
+ ? MatcherTable[MatcherIndex++]
+ : Opcode - OPC_CheckComplexPat0;
unsigned RecNo = MatcherTable[MatcherIndex++];
assert(RecNo < RecordedNodes.size() && "Invalid CheckComplexPat");
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index e3e3e375d6a6..3bbef6e6d85d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1108,7 +1108,7 @@ bool TargetLowering::SimplifyDemandedBits(
if (Op.getOpcode() == ISD::Constant) {
// We know all of the bits for a constant!
- Known = KnownBits::makeConstant(cast<ConstantSDNode>(Op)->getAPIntValue());
+ Known = KnownBits::makeConstant(Op->getAsAPIntVal());
return false;
}
@@ -6350,8 +6350,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
LeadingZeros = DAG.computeKnownBits(N0).countMinLeadingZeros();
// UnsignedDivisionByConstantInfo doesn't work correctly if leading zeros in
// the dividend exceeds the leading zeros for the divisor.
- LeadingZeros = std::min(
- LeadingZeros, cast<ConstantSDNode>(N1)->getAPIntValue().countl_zero());
+ LeadingZeros = std::min(LeadingZeros, N1->getAsAPIntVal().countl_zero());
}
bool UseNPQ = false, UsePreShift = false, UsePostShift = false;
diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 6e69dc66429d..a69b71451736 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -1669,9 +1669,18 @@ static int getSelectionForCOFF(const GlobalValue *GV) {
MCSection *TargetLoweringObjectFileCOFF::getExplicitSectionGlobal(
const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
+ StringRef Name = GO->getSection();
+ if (Name == getInstrProfSectionName(IPSK_covmap, Triple::COFF,
+ /*AddSegmentInfo=*/false) ||
+ Name == getInstrProfSectionName(IPSK_covfun, Triple::COFF,
+ /*AddSegmentInfo=*/false) ||
+ Name == getInstrProfSectionName(IPSK_covdata, Triple::COFF,
+ /*AddSegmentInfo=*/false) ||
+ Name == getInstrProfSectionName(IPSK_covname, Triple::COFF,
+ /*AddSegmentInfo=*/false))
+ Kind = SectionKind::getMetadata();
int Selection = 0;
unsigned Characteristics = getCOFFSectionFlags(Kind, TM);
- StringRef Name = GO->getSection();
StringRef COMDATSymName = "";
if (GO->hasComdat()) {
Selection = getSelectionForCOFF(GO);
diff --git a/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp b/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp
index 8d76c3bcf672..ac2c26e52240 100644
--- a/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp
+++ b/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp
@@ -14,6 +14,7 @@
#include "llvm/CodeGen/NonRelocatableStringpool.h"
#include "llvm/DWARFLinker/Classic/DWARFLinkerDeclContext.h"
#include "llvm/DWARFLinker/Classic/DWARFStreamer.h"
+#include "llvm/DWARFLinker/Utils.h"
#include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h"
#include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
@@ -176,20 +177,6 @@ static void resolveRelativeObjectPath(SmallVectorImpl<char> &Buf, DWARFDie CU) {
sys::path::append(Buf, dwarf::toString(CU.find(dwarf::DW_AT_comp_dir), ""));
}
-/// Make a best effort to guess the
-/// Xcode.app/Contents/Developer/Toolchains/ path from an SDK path.
-static SmallString<128> guessToolchainBaseDir(StringRef SysRoot) {
- SmallString<128> Result;
- // Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk
- StringRef Base = sys::path::parent_path(SysRoot);
- if (sys::path::filename(Base) != "SDKs")
- return Result;
- Base = sys::path::parent_path(Base);
- Result = Base;
- Result += "/Toolchains";
- return Result;
-}
-
/// Collect references to parseable Swift interfaces in imported
/// DW_TAG_module blocks.
static void analyzeImportedModule(
diff --git a/llvm/lib/DWARFLinker/Parallel/AcceleratorRecordsSaver.cpp b/llvm/lib/DWARFLinker/Parallel/AcceleratorRecordsSaver.cpp
index 3af574c70561..9af222354551 100644
--- a/llvm/lib/DWARFLinker/Parallel/AcceleratorRecordsSaver.cpp
+++ b/llvm/lib/DWARFLinker/Parallel/AcceleratorRecordsSaver.cpp
@@ -7,7 +7,7 @@
//===----------------------------------------------------------------------===//
#include "AcceleratorRecordsSaver.h"
-#include "Utils.h"
+#include "llvm/DWARFLinker/Utils.h"
#include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h"
#include "llvm/Support/DJB.h"
diff --git a/llvm/lib/DWARFLinker/Parallel/DWARFLinkerCompileUnit.cpp b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerCompileUnit.cpp
index ffcf9f365aec..6ed284a66a85 100644
--- a/llvm/lib/DWARFLinker/Parallel/DWARFLinkerCompileUnit.cpp
+++ b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerCompileUnit.cpp
@@ -12,6 +12,7 @@
#include "DIEGenerator.h"
#include "DependencyTracker.h"
#include "SyntheticTypeNameBuilder.h"
+#include "llvm/DWARFLinker/Utils.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugMacro.h"
#include "llvm/Support/DJB.h"
@@ -247,20 +248,6 @@ void CompileUnit::cleanupDataAfterClonning() {
getOrigUnit().clear();
}
-/// Make a best effort to guess the
-/// Xcode.app/Contents/Developer/Toolchains/ path from an SDK path.
-static SmallString<128> guessToolchainBaseDir(StringRef SysRoot) {
- SmallString<128> Result;
- // Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk
- StringRef Base = sys::path::parent_path(SysRoot);
- if (sys::path::filename(Base) != "SDKs")
- return Result;
- Base = sys::path::parent_path(Base);
- Result = Base;
- Result += "/Toolchains";
- return Result;
-}
-
/// Collect references to parseable Swift interfaces in imported
/// DW_TAG_module blocks.
void CompileUnit::analyzeImportedModule(const DWARFDebugInfoEntry *DieEntry) {
@@ -1698,14 +1685,6 @@ CompileUnit::getDirAndFilenameFromLineTable(
return getDirAndFilenameFromLineTable(FileIdx);
}
-static bool isPathAbsoluteOnWindowsOrPosix(const Twine &Path) {
- // Debug info can contain paths from any OS, not necessarily
- // an OS we're currently running on. Moreover different compilation units can
- // be compiled on different operating systems and linked together later.
- return sys::path::is_absolute(Path, sys::path::Style::posix) ||
- sys::path::is_absolute(Path, sys::path::Style::windows);
-}
-
std::optional<std::pair<StringRef, StringRef>>
CompileUnit::getDirAndFilenameFromLineTable(uint64_t FileIdx) {
FileNamesCache::iterator FileData = FileNames.find(FileIdx);
diff --git a/llvm/lib/DWARFLinker/Parallel/DWARFLinkerImpl.cpp b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerImpl.cpp
index bb59cbfdb347..b0b819cf9778 100644
--- a/llvm/lib/DWARFLinker/Parallel/DWARFLinkerImpl.cpp
+++ b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerImpl.cpp
@@ -9,7 +9,7 @@
#include "DWARFLinkerImpl.h"
#include "DIEGenerator.h"
#include "DependencyTracker.h"
-#include "Utils.h"
+#include "llvm/DWARFLinker/Utils.h"
#include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/Parallel.h"
diff --git a/llvm/lib/DWARFLinker/Parallel/DebugLineSectionEmitter.h b/llvm/lib/DWARFLinker/Parallel/DebugLineSectionEmitter.h
index 545d04cfbe43..1839164dcec1 100644
--- a/llvm/lib/DWARFLinker/Parallel/DebugLineSectionEmitter.h
+++ b/llvm/lib/DWARFLinker/Parallel/DebugLineSectionEmitter.h
@@ -193,24 +193,39 @@ private:
Section.emitString(Include.getForm(), *IncludeStr);
}
+ bool HasChecksums = P.ContentTypes.HasMD5;
+ bool HasInlineSources = P.ContentTypes.HasSource;
+
+ dwarf::Form FileNameForm = dwarf::DW_FORM_string;
+ dwarf::Form LLVMSourceForm = dwarf::DW_FORM_string;
+
if (P.FileNames.empty()) {
// file_name_entry_format_count (ubyte).
Section.emitIntVal(0, 1);
} else {
+ FileNameForm = P.FileNames[0].Name.getForm();
+ LLVMSourceForm = P.FileNames[0].Source.getForm();
+
// file_name_entry_format_count (ubyte).
- Section.emitIntVal(2 + (P.ContentTypes.HasMD5 ? 1 : 0), 1);
+ Section.emitIntVal(
+ 2 + (HasChecksums ? 1 : 0) + (HasInlineSources ? 1 : 0), 1);
// file_name_entry_format (sequence of ULEB128 pairs).
encodeULEB128(dwarf::DW_LNCT_path, Section.OS);
- encodeULEB128(P.FileNames[0].Name.getForm(), Section.OS);
+ encodeULEB128(FileNameForm, Section.OS);
encodeULEB128(dwarf::DW_LNCT_directory_index, Section.OS);
encodeULEB128(dwarf::DW_FORM_data1, Section.OS);
- if (P.ContentTypes.HasMD5) {
+ if (HasChecksums) {
encodeULEB128(dwarf::DW_LNCT_MD5, Section.OS);
encodeULEB128(dwarf::DW_FORM_data16, Section.OS);
}
+
+ if (HasInlineSources) {
+ encodeULEB128(dwarf::DW_LNCT_LLVM_source, Section.OS);
+ encodeULEB128(LLVMSourceForm, Section.OS);
+ }
}
// file_names_count (ULEB128).
@@ -226,14 +241,27 @@ private:
// A null-terminated string containing the full or relative path name of a
// source file.
- Section.emitString(File.Name.getForm(), *FileNameStr);
+ Section.emitString(FileNameForm, *FileNameStr);
Section.emitIntVal(File.DirIdx, 1);
- if (P.ContentTypes.HasMD5) {
+ if (HasChecksums) {
+ assert((File.Checksum.size() == 16) &&
+ "checksum size is not equal to 16 bytes.");
Section.emitBinaryData(
StringRef(reinterpret_cast<const char *>(File.Checksum.data()),
File.Checksum.size()));
}
+
+ if (HasInlineSources) {
+ std::optional<const char *> FileSourceStr =
+ dwarf::toString(File.Source);
+ if (!FileSourceStr) {
+ U.warn("cann't read string from line table.");
+ return;
+ }
+
+ Section.emitString(LLVMSourceForm, *FileSourceStr);
+ }
}
}
diff --git a/llvm/lib/DWARFLinker/Utils.cpp b/llvm/lib/DWARFLinker/Utils.cpp
index e8b0fe303aae..52508c998532 100644
--- a/llvm/lib/DWARFLinker/Utils.cpp
+++ b/llvm/lib/DWARFLinker/Utils.cpp
@@ -5,3 +5,5 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
+
+#include "llvm/DWARFLinker/Utils.h"
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index aeaca21a99cc..b6ad85b2d46e 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -96,6 +96,7 @@
#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsARM.h"
+#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/IR/IntrinsicsWebAssembly.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
@@ -6031,6 +6032,16 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
"Value for inactive lanes must be a VGPR function argument", &Call);
break;
}
+ case Intrinsic::nvvm_setmaxnreg_inc_sync_aligned_u32:
+ case Intrinsic::nvvm_setmaxnreg_dec_sync_aligned_u32: {
+ Value *V = Call.getArgOperand(0);
+ unsigned RegCount = cast<ConstantInt>(V)->getZExtValue();
+ Check(RegCount % 8 == 0,
+ "reg_count argument to nvvm.setmaxnreg must be in multiples of 8");
+ Check((RegCount >= 24 && RegCount <= 256),
+ "reg_count argument to nvvm.setmaxnreg must be within [24, 256]");
+ break;
+ }
case Intrinsic::experimental_convergence_entry:
LLVM_FALLTHROUGH;
case Intrinsic::experimental_convergence_anchor:
diff --git a/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/llvm/lib/MC/MCParser/ELFAsmParser.cpp
index 93e1d2f44b8c..d4c4bcb85648 100644
--- a/llvm/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/ELFAsmParser.cpp
@@ -616,12 +616,12 @@ bool ELFAsmParser::ParseSectionArguments(bool IsPush, SMLoc loc) {
if (Mergeable)
if (parseMergeSize(Size))
return true;
- if (Group)
- if (parseGroup(GroupName, IsComdat))
- return true;
if (Flags & ELF::SHF_LINK_ORDER)
if (parseLinkedToSym(LinkedToSym))
return true;
+ if (Group)
+ if (parseGroup(GroupName, IsComdat))
+ return true;
if (maybeParseUniqueID(UniqueID))
return true;
}
diff --git a/llvm/lib/MC/MCSectionELF.cpp b/llvm/lib/MC/MCSectionELF.cpp
index 95fdf3352207..b1efb839ba75 100644
--- a/llvm/lib/MC/MCSectionELF.cpp
+++ b/llvm/lib/MC/MCSectionELF.cpp
@@ -90,8 +90,6 @@ void MCSectionELF::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
OS << 'e';
if (Flags & ELF::SHF_EXECINSTR)
OS << 'x';
- if (Flags & ELF::SHF_GROUP)
- OS << 'G';
if (Flags & ELF::SHF_WRITE)
OS << 'w';
if (Flags & ELF::SHF_MERGE)
@@ -102,6 +100,8 @@ void MCSectionELF::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
OS << 'T';
if (Flags & ELF::SHF_LINK_ORDER)
OS << 'o';
+ if (Flags & ELF::SHF_GROUP)
+ OS << 'G';
if (Flags & ELF::SHF_GNU_RETAIN)
OS << 'R';
@@ -183,13 +183,6 @@ void MCSectionELF::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
OS << "," << EntrySize;
}
- if (Flags & ELF::SHF_GROUP) {
- OS << ",";
- printName(OS, Group.getPointer()->getName());
- if (isComdat())
- OS << ",comdat";
- }
-
if (Flags & ELF::SHF_LINK_ORDER) {
OS << ",";
if (LinkedToSym)
@@ -198,6 +191,13 @@ void MCSectionELF::printSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
OS << '0';
}
+ if (Flags & ELF::SHF_GROUP) {
+ OS << ",";
+ printName(OS, Group.getPointer()->getName());
+ if (isComdat())
+ OS << ",comdat";
+ }
+
if (isUnique())
OS << ",unique," << UniqueID;
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 27bfe12127cc..bfc97d5464c0 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -72,6 +72,7 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
#include "llvm/Analysis/UniformityAnalysis.h"
+#include "llvm/CodeGen/AssignmentTrackingAnalysis.h"
#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
#include "llvm/CodeGen/CallBrPrepare.h"
#include "llvm/CodeGen/CodeGenPrepare.h"
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index bda36bd8c107..0b53b59787dd 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -235,6 +235,7 @@ FUNCTION_ANALYSIS("block-freq", BlockFrequencyAnalysis())
FUNCTION_ANALYSIS("branch-prob", BranchProbabilityAnalysis())
FUNCTION_ANALYSIS("cycles", CycleAnalysis())
FUNCTION_ANALYSIS("da", DependenceAnalysis())
+FUNCTION_ANALYSIS("debug-ata", DebugAssignmentTrackingAnalysis())
FUNCTION_ANALYSIS("demanded-bits", DemandedBitsAnalysis())
FUNCTION_ANALYSIS("domfrontier", DominanceFrontierAnalysis())
FUNCTION_ANALYSIS("domtree", DominatorTreeAnalysis())
@@ -384,6 +385,7 @@ FUNCTION_PASS("print<branch-prob>", BranchProbabilityPrinterPass(dbgs()))
FUNCTION_PASS("print<cost-model>", CostModelPrinterPass(dbgs()))
FUNCTION_PASS("print<cycles>", CycleInfoPrinterPass(dbgs()))
FUNCTION_PASS("print<da>", DependenceAnalysisPrinterPass(dbgs()))
+FUNCTION_PASS("print<debug-ata>", DebugAssignmentTrackingPrinterPass(dbgs()))
FUNCTION_PASS("print<delinearization>", DelinearizationPrinterPass(dbgs()))
FUNCTION_PASS("print<demanded-bits>", DemandedBitsPrinterPass(dbgs()))
FUNCTION_PASS("print<domfrontier>", DominanceFrontierPrinterPass(dbgs()))
@@ -421,7 +423,7 @@ FUNCTION_PASS("structurizecfg", StructurizeCFGPass())
FUNCTION_PASS("tailcallelim", TailCallElimPass())
FUNCTION_PASS("tlshoist", TLSVariableHoistPass())
FUNCTION_PASS("transform-warning", WarnMissedTransformationsPass())
-FUNCTION_PASS("trigger-verifier-error", TriggerVerifierErrorPass())
+FUNCTION_PASS("trigger-verifier-error", TriggerVerifierErrorPass())
FUNCTION_PASS("tsan", ThreadSanitizerPass())
FUNCTION_PASS("typepromotion", TypePromotionPass(TM))
FUNCTION_PASS("unify-loop-exits", UnifyLoopExitsPass())
diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp
index 70f531e40b90..390d950486a7 100644
--- a/llvm/lib/Support/RISCVISAInfo.cpp
+++ b/llvm/lib/Support/RISCVISAInfo.cpp
@@ -24,16 +24,11 @@
using namespace llvm;
namespace {
-/// Represents the major and version number components of a RISC-V extension
-struct RISCVExtensionVersion {
- unsigned Major;
- unsigned Minor;
-};
struct RISCVSupportedExtension {
const char *Name;
/// Supported version.
- RISCVExtensionVersion Version;
+ RISCVISAInfo::ExtensionVersion Version;
bool operator<(const RISCVSupportedExtension &RHS) const {
return StringRef(Name) < StringRef(RHS.Name);
@@ -50,161 +45,161 @@ static const char *RISCVGImplications[] = {
// NOTE: This table should be sorted alphabetically by extension name.
static const RISCVSupportedExtension SupportedExtensions[] = {
- {"a", RISCVExtensionVersion{2, 1}},
- {"c", RISCVExtensionVersion{2, 0}},
- {"d", RISCVExtensionVersion{2, 2}},
- {"e", RISCVExtensionVersion{2, 0}},
- {"f", RISCVExtensionVersion{2, 2}},
- {"h", RISCVExtensionVersion{1, 0}},
- {"i", RISCVExtensionVersion{2, 1}},
- {"m", RISCVExtensionVersion{2, 0}},
+ {"a", {2, 1}},
+ {"c", {2, 0}},
+ {"d", {2, 2}},
+ {"e", {2, 0}},
+ {"f", {2, 2}},
+ {"h", {1, 0}},
+ {"i", {2, 1}},
+ {"m", {2, 0}},
- {"smaia", RISCVExtensionVersion{1, 0}},
- {"ssaia", RISCVExtensionVersion{1, 0}},
- {"svinval", RISCVExtensionVersion{1, 0}},
- {"svnapot", RISCVExtensionVersion{1, 0}},
- {"svpbmt", RISCVExtensionVersion{1, 0}},
+ {"smaia", {1, 0}},
+ {"ssaia", {1, 0}},
+ {"svinval", {1, 0}},
+ {"svnapot", {1, 0}},
+ {"svpbmt", {1, 0}},
- {"v", RISCVExtensionVersion{1, 0}},
+ {"v", {1, 0}},
// vendor-defined ('X') extensions
- {"xcvalu", RISCVExtensionVersion{1, 0}},
- {"xcvbi", RISCVExtensionVersion{1, 0}},
- {"xcvbitmanip", RISCVExtensionVersion{1, 0}},
- {"xcvelw", RISCVExtensionVersion{1, 0}},
- {"xcvmac", RISCVExtensionVersion{1, 0}},
- {"xcvmem", RISCVExtensionVersion{1, 0}},
- {"xcvsimd", RISCVExtensionVersion{1, 0}},
- {"xsfvcp", RISCVExtensionVersion{1, 0}},
- {"xsfvfnrclipxfqf", RISCVExtensionVersion{1, 0}},
- {"xsfvfwmaccqqq", RISCVExtensionVersion{1, 0}},
- {"xsfvqmaccdod", RISCVExtensionVersion{1, 0}},
- {"xsfvqmaccqoq", RISCVExtensionVersion{1, 0}},
- {"xtheadba", RISCVExtensionVersion{1, 0}},
- {"xtheadbb", RISCVExtensionVersion{1, 0}},
- {"xtheadbs", RISCVExtensionVersion{1, 0}},
- {"xtheadcmo", RISCVExtensionVersion{1, 0}},
- {"xtheadcondmov", RISCVExtensionVersion{1, 0}},
- {"xtheadfmemidx", RISCVExtensionVersion{1, 0}},
- {"xtheadmac", RISCVExtensionVersion{1, 0}},
- {"xtheadmemidx", RISCVExtensionVersion{1, 0}},
- {"xtheadmempair", RISCVExtensionVersion{1, 0}},
- {"xtheadsync", RISCVExtensionVersion{1, 0}},
- {"xtheadvdot", RISCVExtensionVersion{1, 0}},
- {"xventanacondops", RISCVExtensionVersion{1, 0}},
+ {"xcvalu", {1, 0}},
+ {"xcvbi", {1, 0}},
+ {"xcvbitmanip", {1, 0}},
+ {"xcvelw", {1, 0}},
+ {"xcvmac", {1, 0}},
+ {"xcvmem", {1, 0}},
+ {"xcvsimd", {1, 0}},
+ {"xsfvcp", {1, 0}},
+ {"xsfvfnrclipxfqf", {1, 0}},
+ {"xsfvfwmaccqqq", {1, 0}},
+ {"xsfvqmaccdod", {1, 0}},
+ {"xsfvqmaccqoq", {1, 0}},
+ {"xtheadba", {1, 0}},
+ {"xtheadbb", {1, 0}},
+ {"xtheadbs", {1, 0}},
+ {"xtheadcmo", {1, 0}},
+ {"xtheadcondmov", {1, 0}},
+ {"xtheadfmemidx", {1, 0}},
+ {"xtheadmac", {1, 0}},
+ {"xtheadmemidx", {1, 0}},
+ {"xtheadmempair", {1, 0}},
+ {"xtheadsync", {1, 0}},
+ {"xtheadvdot", {1, 0}},
+ {"xventanacondops", {1, 0}},
- {"zawrs", RISCVExtensionVersion{1, 0}},
+ {"zawrs", {1, 0}},
- {"zba", RISCVExtensionVersion{1, 0}},
- {"zbb", RISCVExtensionVersion{1, 0}},
- {"zbc", RISCVExtensionVersion{1, 0}},
- {"zbkb", RISCVExtensionVersion{1, 0}},
- {"zbkc", RISCVExtensionVersion{1, 0}},
- {"zbkx", RISCVExtensionVersion{1, 0}},
- {"zbs", RISCVExtensionVersion{1, 0}},
+ {"zba", {1, 0}},
+ {"zbb", {1, 0}},
+ {"zbc", {1, 0}},
+ {"zbkb", {1, 0}},
+ {"zbkc", {1, 0}},
+ {"zbkx", {1, 0}},
+ {"zbs", {1, 0}},
- {"zca", RISCVExtensionVersion{1, 0}},
- {"zcb", RISCVExtensionVersion{1, 0}},
- {"zcd", RISCVExtensionVersion{1, 0}},
- {"zce", RISCVExtensionVersion{1, 0}},
- {"zcf", RISCVExtensionVersion{1, 0}},
- {"zcmp", RISCVExtensionVersion{1, 0}},
- {"zcmt", RISCVExtensionVersion{1, 0}},
+ {"zca", {1, 0}},
+ {"zcb", {1, 0}},
+ {"zcd", {1, 0}},
+ {"zce", {1, 0}},
+ {"zcf", {1, 0}},
+ {"zcmp", {1, 0}},
+ {"zcmt", {1, 0}},
- {"zdinx", RISCVExtensionVersion{1, 0}},
+ {"zdinx", {1, 0}},
- {"zfa", RISCVExtensionVersion{1, 0}},
- {"zfh", RISCVExtensionVersion{1, 0}},
- {"zfhmin", RISCVExtensionVersion{1, 0}},
- {"zfinx", RISCVExtensionVersion{1, 0}},
+ {"zfa", {1, 0}},
+ {"zfh", {1, 0}},
+ {"zfhmin", {1, 0}},
+ {"zfinx", {1, 0}},
- {"zhinx", RISCVExtensionVersion{1, 0}},
- {"zhinxmin", RISCVExtensionVersion{1, 0}},
+ {"zhinx", {1, 0}},
+ {"zhinxmin", {1, 0}},
- {"zicbom", RISCVExtensionVersion{1, 0}},
- {"zicbop", RISCVExtensionVersion{1, 0}},
- {"zicboz", RISCVExtensionVersion{1, 0}},
- {"zicntr", RISCVExtensionVersion{2, 0}},
- {"zicsr", RISCVExtensionVersion{2, 0}},
- {"zifencei", RISCVExtensionVersion{2, 0}},
- {"zihintntl", RISCVExtensionVersion{1, 0}},
- {"zihintpause", RISCVExtensionVersion{2, 0}},
- {"zihpm", RISCVExtensionVersion{2, 0}},
+ {"zicbom", {1, 0}},
+ {"zicbop", {1, 0}},
+ {"zicboz", {1, 0}},
+ {"zicntr", {2, 0}},
+ {"zicsr", {2, 0}},
+ {"zifencei", {2, 0}},
+ {"zihintntl", {1, 0}},
+ {"zihintpause", {2, 0}},
+ {"zihpm", {2, 0}},
- {"zk", RISCVExtensionVersion{1, 0}},
- {"zkn", RISCVExtensionVersion{1, 0}},
- {"zknd", RISCVExtensionVersion{1, 0}},
- {"zkne", RISCVExtensionVersion{1, 0}},
- {"zknh", RISCVExtensionVersion{1, 0}},
- {"zkr", RISCVExtensionVersion{1, 0}},
- {"zks", RISCVExtensionVersion{1, 0}},
- {"zksed", RISCVExtensionVersion{1, 0}},
- {"zksh", RISCVExtensionVersion{1, 0}},
- {"zkt", RISCVExtensionVersion{1, 0}},
+ {"zk", {1, 0}},
+ {"zkn", {1, 0}},
+ {"zknd", {1, 0}},
+ {"zkne", {1, 0}},
+ {"zknh", {1, 0}},
+ {"zkr", {1, 0}},
+ {"zks", {1, 0}},
+ {"zksed", {1, 0}},
+ {"zksh", {1, 0}},
+ {"zkt", {1, 0}},
- {"zmmul", RISCVExtensionVersion{1, 0}},
+ {"zmmul", {1, 0}},
- {"zvbb", RISCVExtensionVersion{1, 0}},
- {"zvbc", RISCVExtensionVersion{1, 0}},
+ {"zvbb", {1, 0}},
+ {"zvbc", {1, 0}},
- {"zve32f", RISCVExtensionVersion{1, 0}},
- {"zve32x", RISCVExtensionVersion{1, 0}},
- {"zve64d", RISCVExtensionVersion{1, 0}},
- {"zve64f", RISCVExtensionVersion{1, 0}},
- {"zve64x", RISCVExtensionVersion{1, 0}},
+ {"zve32f", {1, 0}},
+ {"zve32x", {1, 0}},
+ {"zve64d", {1, 0}},
+ {"zve64f", {1, 0}},
+ {"zve64x", {1, 0}},
- {"zvfh", RISCVExtensionVersion{1, 0}},
- {"zvfhmin", RISCVExtensionVersion{1, 0}},
+ {"zvfh", {1, 0}},
+ {"zvfhmin", {1, 0}},
// vector crypto
- {"zvkb", RISCVExtensionVersion{1, 0}},
- {"zvkg", RISCVExtensionVersion{1, 0}},
- {"zvkn", RISCVExtensionVersion{1, 0}},
- {"zvknc", RISCVExtensionVersion{1, 0}},
- {"zvkned", RISCVExtensionVersion{1, 0}},
- {"zvkng", RISCVExtensionVersion{1, 0}},
- {"zvknha", RISCVExtensionVersion{1, 0}},
- {"zvknhb", RISCVExtensionVersion{1, 0}},
- {"zvks", RISCVExtensionVersion{1, 0}},
- {"zvksc", RISCVExtensionVersion{1, 0}},
- {"zvksed", RISCVExtensionVersion{1, 0}},
- {"zvksg", RISCVExtensionVersion{1, 0}},
- {"zvksh", RISCVExtensionVersion{1, 0}},
- {"zvkt", RISCVExtensionVersion{1, 0}},
+ {"zvkb", {1, 0}},
+ {"zvkg", {1, 0}},
+ {"zvkn", {1, 0}},
+ {"zvknc", {1, 0}},
+ {"zvkned", {1, 0}},
+ {"zvkng", {1, 0}},
+ {"zvknha", {1, 0}},
+ {"zvknhb", {1, 0}},
+ {"zvks", {1, 0}},
+ {"zvksc", {1, 0}},
+ {"zvksed", {1, 0}},
+ {"zvksg", {1, 0}},
+ {"zvksh", {1, 0}},
+ {"zvkt", {1, 0}},
- {"zvl1024b", RISCVExtensionVersion{1, 0}},
- {"zvl128b", RISCVExtensionVersion{1, 0}},
- {"zvl16384b", RISCVExtensionVersion{1, 0}},
- {"zvl2048b", RISCVExtensionVersion{1, 0}},
- {"zvl256b", RISCVExtensionVersion{1, 0}},
- {"zvl32768b", RISCVExtensionVersion{1, 0}},
- {"zvl32b", RISCVExtensionVersion{1, 0}},
- {"zvl4096b", RISCVExtensionVersion{1, 0}},
- {"zvl512b", RISCVExtensionVersion{1, 0}},
- {"zvl64b", RISCVExtensionVersion{1, 0}},
- {"zvl65536b", RISCVExtensionVersion{1, 0}},
- {"zvl8192b", RISCVExtensionVersion{1, 0}},
+ {"zvl1024b", {1, 0}},
+ {"zvl128b", {1, 0}},
+ {"zvl16384b", {1, 0}},
+ {"zvl2048b", {1, 0}},
+ {"zvl256b", {1, 0}},
+ {"zvl32768b", {1, 0}},
+ {"zvl32b", {1, 0}},
+ {"zvl4096b", {1, 0}},
+ {"zvl512b", {1, 0}},
+ {"zvl64b", {1, 0}},
+ {"zvl65536b", {1, 0}},
+ {"zvl8192b", {1, 0}},
};
// NOTE: This table should be sorted alphabetically by extension name.
static const RISCVSupportedExtension SupportedExperimentalExtensions[] = {
- {"zacas", RISCVExtensionVersion{1, 0}},
+ {"zacas", {1, 0}},
- {"zcmop", RISCVExtensionVersion{0, 2}},
+ {"zcmop", {0, 2}},
- {"zfbfmin", RISCVExtensionVersion{0, 8}},
+ {"zfbfmin", {0, 8}},
- {"zicfilp", RISCVExtensionVersion{0, 4}},
- {"zicfiss", RISCVExtensionVersion{0, 4}},
+ {"zicfilp", {0, 4}},
+ {"zicfiss", {0, 4}},
- {"zicond", RISCVExtensionVersion{1, 0}},
+ {"zicond", {1, 0}},
- {"zimop", RISCVExtensionVersion{0, 1}},
+ {"zimop", {0, 1}},
- {"ztso", RISCVExtensionVersion{0, 1}},
+ {"ztso", {0, 1}},
- {"zvfbfmin", RISCVExtensionVersion{0, 8}},
- {"zvfbfwma", RISCVExtensionVersion{0, 8}},
+ {"zvfbfmin", {0, 8}},
+ {"zvfbfwma", {0, 8}},
};
static void verifyTables() {
@@ -237,8 +232,8 @@ void llvm::riscvExtensionsHelp(StringMap<StringRef> DescMap) {
for (const auto &E : SupportedExtensions)
ExtMap[E.Name] = {E.Version.Major, E.Version.Minor};
for (const auto &E : ExtMap) {
- std::string Version = std::to_string(E.second.MajorVersion) + "." +
- std::to_string(E.second.MinorVersion);
+ std::string Version =
+ std::to_string(E.second.Major) + "." + std::to_string(E.second.Minor);
PrintExtension(E.first, Version, DescMap[E.first]);
}
@@ -247,8 +242,8 @@ void llvm::riscvExtensionsHelp(StringMap<StringRef> DescMap) {
for (const auto &E : SupportedExperimentalExtensions)
ExtMap[E.Name] = {E.Version.Major, E.Version.Minor};
for (const auto &E : ExtMap) {
- std::string Version = std::to_string(E.second.MajorVersion) + "." +
- std::to_string(E.second.MinorVersion);
+ std::string Version =
+ std::to_string(E.second.Major) + "." + std::to_string(E.second.Minor);
PrintExtension(E.first, Version, DescMap["experimental-" + E.first]);
}
@@ -293,7 +288,7 @@ struct LessExtName {
};
} // namespace
-static std::optional<RISCVExtensionVersion>
+static std::optional<RISCVISAInfo::ExtensionVersion>
findDefaultVersion(StringRef ExtName) {
// Find default version of an extension.
// TODO: We might set default version based on profile or ISA spec.
@@ -309,12 +304,9 @@ findDefaultVersion(StringRef ExtName) {
return std::nullopt;
}
-void RISCVISAInfo::addExtension(StringRef ExtName, unsigned MajorVersion,
- unsigned MinorVersion) {
- RISCVExtensionInfo Ext;
- Ext.MajorVersion = MajorVersion;
- Ext.MinorVersion = MinorVersion;
- Exts[ExtName.str()] = Ext;
+void RISCVISAInfo::addExtension(StringRef ExtName,
+ RISCVISAInfo::ExtensionVersion Version) {
+ Exts[ExtName.str()] = Version;
}
static StringRef getExtensionTypeDesc(StringRef Ext) {
@@ -337,7 +329,7 @@ static StringRef getExtensionType(StringRef Ext) {
return StringRef();
}
-static std::optional<RISCVExtensionVersion>
+static std::optional<RISCVISAInfo::ExtensionVersion>
isExperimentalExtension(StringRef Ext) {
auto I =
llvm::lower_bound(SupportedExperimentalExtensions, Ext, LessExtName());
@@ -634,8 +626,7 @@ RISCVISAInfo::parseFeatures(unsigned XLen,
continue;
if (Add)
- ISAInfo->addExtension(ExtName, ExtensionInfoIterator->Version.Major,
- ExtensionInfoIterator->Version.Minor);
+ ISAInfo->addExtension(ExtName, ExtensionInfoIterator->Version);
else
ISAInfo->Exts.erase(ExtName.str());
}
@@ -696,7 +687,7 @@ RISCVISAInfo::parseNormalizedArchString(StringRef Arch) {
if (MajorVersionStr.getAsInteger(10, MajorVersion))
return createStringError(errc::invalid_argument,
"failed to parse major version number");
- ISAInfo->addExtension(ExtName, MajorVersion, MinorVersion);
+ ISAInfo->addExtension(ExtName, {MajorVersion, MinorVersion});
}
ISAInfo->updateFLen();
ISAInfo->updateMinVLen();
@@ -775,7 +766,7 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
// ISA spec.
for (const auto *Ext : RISCVGImplications) {
if (auto Version = findDefaultVersion(Ext))
- ISAInfo->addExtension(Ext, Version->Major, Version->Minor);
+ ISAInfo->addExtension(Ext, *Version);
else
llvm_unreachable("Default extension version not found?");
}
@@ -794,7 +785,7 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
Minor = Version->Minor;
}
- ISAInfo->addExtension(StringRef(&Baseline, 1), Major, Minor);
+ ISAInfo->addExtension(StringRef(&Baseline, 1), {Major, Minor});
}
// Consume the base ISA version number and any '_' between rvxxx and the
@@ -860,7 +851,7 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
"unsupported standard user-level extension '%c'",
C);
}
- ISAInfo->addExtension(StringRef(&C, 1), Major, Minor);
+ ISAInfo->addExtension(StringRef(&C, 1), {Major, Minor});
// Consume full extension name and version, including any optional '_'
// between this extension and the next
@@ -928,7 +919,7 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension,
if (IgnoreUnknown && !isSupportedExtension(Name))
continue;
- ISAInfo->addExtension(Name, Major, Minor);
+ ISAInfo->addExtension(Name, {Major, Minor});
// Extension format is correct, keep parsing the extensions.
// TODO: Save Type, Name, Major, Minor to avoid parsing them later.
AllExts.push_back(Name);
@@ -1143,7 +1134,7 @@ void RISCVISAInfo::updateImplication() {
// implied
if (!HasE && !HasI) {
auto Version = findDefaultVersion("i");
- addExtension("i", Version->Major, Version->Minor);
+ addExtension("i", Version.value());
}
assert(llvm::is_sorted(ImpliedExts) && "Table not sorted by Name");
@@ -1164,7 +1155,7 @@ void RISCVISAInfo::updateImplication() {
if (Exts.count(ImpliedExt))
continue;
auto Version = findDefaultVersion(ImpliedExt);
- addExtension(ImpliedExt, Version->Major, Version->Minor);
+ addExtension(ImpliedExt, Version.value());
WorkList.insert(ImpliedExt);
}
}
@@ -1174,7 +1165,7 @@ void RISCVISAInfo::updateImplication() {
if (XLen == 32 && Exts.count("zce") && Exts.count("f") &&
!Exts.count("zcf")) {
auto Version = findDefaultVersion("zcf");
- addExtension("zcf", Version->Major, Version->Minor);
+ addExtension("zcf", Version.value());
}
}
@@ -1209,7 +1200,7 @@ void RISCVISAInfo::updateCombination() {
IsAllRequiredFeatureExist &= hasExtension(Ext);
if (IsAllRequiredFeatureExist) {
auto Version = findDefaultVersion(CombineExt);
- addExtension(CombineExt, Version->Major, Version->Minor);
+ addExtension(CombineExt, Version.value());
IsNewCombine = true;
}
}
@@ -1266,7 +1257,7 @@ std::string RISCVISAInfo::toString() const {
StringRef ExtName = Ext.first;
auto ExtInfo = Ext.second;
Arch << LS << ExtName;
- Arch << ExtInfo.MajorVersion << "p" << ExtInfo.MinorVersion;
+ Arch << ExtInfo.Major << "p" << ExtInfo.Minor;
}
return Arch.str();
diff --git a/llvm/lib/TableGen/Record.cpp b/llvm/lib/TableGen/Record.cpp
index aa981fdab4b3..2b3e8a0c7f84 100644
--- a/llvm/lib/TableGen/Record.cpp
+++ b/llvm/lib/TableGen/Record.cpp
@@ -923,15 +923,16 @@ Init *UnOpInit::Fold(Record *CurRec, bool IsFinal) const {
case GETDAGOP:
if (DagInit *Dag = dyn_cast<DagInit>(LHS)) {
- DefInit *DI = DefInit::get(Dag->getOperatorAsDef({}));
- if (!DI->getType()->typeIsA(getType())) {
+ // TI is not necessarily a def due to the late resolution in multiclasses,
+ // but has to be a TypedInit.
+ auto *TI = cast<TypedInit>(Dag->getOperator());
+ if (!TI->getType()->typeIsA(getType())) {
PrintFatalError(CurRec->getLoc(),
- Twine("Expected type '") +
- getType()->getAsString() + "', got '" +
- DI->getType()->getAsString() + "' in: " +
- getAsString() + "\n");
+ Twine("Expected type '") + getType()->getAsString() +
+ "', got '" + TI->getType()->getAsString() +
+ "' in: " + getAsString() + "\n");
} else {
- return DI;
+ return Dag->getOperator();
}
}
break;
diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 90e1ce9ddf66..7d2ff146a340 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -256,6 +256,11 @@ void AArch64AsmPrinter::emitStartOfAsmFile(Module &M) {
if (BTE->getZExtValue())
Flags |= ELF::GNU_PROPERTY_AARCH64_FEATURE_1_BTI;
+ if (const auto *GCS = mdconst::extract_or_null<ConstantInt>(
+ M.getModuleFlag("guarded-control-stack")))
+ if (GCS->getZExtValue())
+ Flags |= ELF::GNU_PROPERTY_AARCH64_FEATURE_1_GCS;
+
if (const auto *Sign = mdconst::extract_or_null<ConstantInt>(
M.getModuleFlag("sign-return-address")))
if (Sign->getZExtValue())
diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index edc8cc7d4d1e..ea5679b4d5e3 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -6834,10 +6834,10 @@ static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) {
return getPackedVectorTypeFromPredicateType(
Ctx, Root->getOperand(6)->getValueType(0), /*NumVec=*/4);
case Intrinsic::aarch64_sve_ld1udq:
- case Intrinsic::aarch64_sve_st1udq:
+ case Intrinsic::aarch64_sve_st1dq:
return EVT(MVT::nxv1i64);
case Intrinsic::aarch64_sve_ld1uwq:
- case Intrinsic::aarch64_sve_st1uwq:
+ case Intrinsic::aarch64_sve_st1wq:
return EVT(MVT::nxv1i32);
}
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 47e665176e8b..e2d07a096496 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -4513,8 +4513,7 @@ static SDValue skipExtensionForVectorMULL(SDValue N, SelectionDAG &DAG) {
SDLoc dl(N);
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i != NumElts; ++i) {
- ConstantSDNode *C = cast<ConstantSDNode>(N.getOperand(i));
- const APInt &CInt = C->getAPIntValue();
+ const APInt &CInt = N.getConstantOperandAPInt(i);
// Element types smaller than 32 bits are not legal, so use i32 elements.
// The values are implicitly truncated so sext vs. zext doesn't matter.
Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 1cfbf4737a6f..42b7a6418032 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -4214,6 +4214,9 @@ static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
switch (FirstOpc) {
default:
return false;
+ case AArch64::LDRQui:
+ case AArch64::LDURQi:
+ return SecondOpc == AArch64::LDRQui || SecondOpc == AArch64::LDURQi;
case AArch64::LDRWui:
case AArch64::LDURWi:
return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index b435b3ce03e7..e90b8a8ca7ac 100644
--- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -1326,10 +1326,14 @@ static int alignTo(int Num, int PowOf2) {
static bool mayAlias(MachineInstr &MIa,
SmallVectorImpl<MachineInstr *> &MemInsns,
AliasAnalysis *AA) {
- for (MachineInstr *MIb : MemInsns)
- if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false))
+ for (MachineInstr *MIb : MemInsns) {
+ if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false)) {
+ LLVM_DEBUG(dbgs() << "Aliasing with: "; MIb->dump());
return true;
+ }
+ }
+ LLVM_DEBUG(dbgs() << "No aliases found\n");
return false;
}
@@ -1757,9 +1761,11 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// Remember any instructions that read/write memory between FirstMI and MI.
SmallVector<MachineInstr *, 4> MemInsns;
+ LLVM_DEBUG(dbgs() << "Find match for: "; FirstMI.dump());
for (unsigned Count = 0; MBBI != E && Count < Limit;
MBBI = next_nodbg(MBBI, E)) {
MachineInstr &MI = *MBBI;
+ LLVM_DEBUG(dbgs() << "Analysing 2nd insn: "; MI.dump());
UsedInBetween.accumulate(MI);
@@ -1859,6 +1865,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
UsedRegUnits, TRI);
MemInsns.push_back(&MI);
+ LLVM_DEBUG(dbgs() << "Offset doesn't fit in immediate, "
+ << "keep looking.\n");
continue;
}
// If the alignment requirements of the paired (scaled) instruction
@@ -1868,6 +1876,9 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
UsedRegUnits, TRI);
MemInsns.push_back(&MI);
+ LLVM_DEBUG(dbgs()
+ << "Offset doesn't fit due to alignment requirements, "
+ << "keep looking.\n");
continue;
}
}
@@ -1884,14 +1895,22 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
const bool SameLoadReg = MayLoad && TRI->isSuperOrSubRegisterEq(
Reg, getLdStRegOp(MI).getReg());
- // If the Rt of the second instruction was not modified or used between
- // the two instructions and none of the instructions between the second
- // and first alias with the second, we can combine the second into the
- // first.
- if (ModifiedRegUnits.available(getLdStRegOp(MI).getReg()) &&
- !(MI.mayLoad() && !SameLoadReg &&
- !UsedRegUnits.available(getLdStRegOp(MI).getReg())) &&
- !mayAlias(MI, MemInsns, AA)) {
+ // If the Rt of the second instruction (destination register of the
+ // load) was not modified or used between the two instructions and none
+ // of the instructions between the second and first alias with the
+ // second, we can combine the second into the first.
+ bool RtNotModified =
+ ModifiedRegUnits.available(getLdStRegOp(MI).getReg());
+ bool RtNotUsed = !(MI.mayLoad() && !SameLoadReg &&
+ !UsedRegUnits.available(getLdStRegOp(MI).getReg()));
+
+ LLVM_DEBUG(dbgs() << "Checking, can combine 2nd into 1st insn:\n"
+ << "Reg '" << getLdStRegOp(MI) << "' not modified: "
+ << (RtNotModified ? "true" : "false") << "\n"
+ << "Reg '" << getLdStRegOp(MI) << "' not used: "
+ << (RtNotUsed ? "true" : "false") << "\n");
+
+ if (RtNotModified && RtNotUsed && !mayAlias(MI, MemInsns, AA)) {
// For pairs loading into the same reg, try to find a renaming
// opportunity to allow the renaming of Reg between FirstMI and MI
// and combine MI into FirstMI; otherwise bail and keep looking.
@@ -1904,6 +1923,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits,
UsedRegUnits, TRI);
MemInsns.push_back(&MI);
+ LLVM_DEBUG(dbgs() << "Can't find reg for renaming, "
+ << "keep looking.\n");
continue;
}
Flags.setRenameReg(*RenameReg);
@@ -1919,10 +1940,15 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// between the two instructions and none of the instructions between the
// first and the second alias with the first, we can combine the first
// into the second.
- if (!(MayLoad &&
- !UsedRegUnits.available(getLdStRegOp(FirstMI).getReg())) &&
- !mayAlias(FirstMI, MemInsns, AA)) {
+ RtNotModified = !(
+ MayLoad && !UsedRegUnits.available(getLdStRegOp(FirstMI).getReg()));
+
+ LLVM_DEBUG(dbgs() << "Checking, can combine 1st into 2nd insn:\n"
+ << "Reg '" << getLdStRegOp(FirstMI)
+ << "' not modified: "
+ << (RtNotModified ? "true" : "false") << "\n");
+ if (RtNotModified && !mayAlias(FirstMI, MemInsns, AA)) {
if (ModifiedRegUnits.available(getLdStRegOp(FirstMI).getReg())) {
Flags.setMergeForward(true);
Flags.clearRenameReg();
@@ -1938,8 +1964,8 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
MBBIWithRenameReg = MBBI;
}
}
- // Unable to combine these instructions due to interference in between.
- // Keep looking.
+ LLVM_DEBUG(dbgs() << "Unable to combine these instructions due to "
+ << "interference in between, keep looking.\n");
}
}
@@ -1948,16 +1974,20 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
// If the instruction wasn't a matching load or store. Stop searching if we
// encounter a call instruction that might modify memory.
- if (MI.isCall())
+ if (MI.isCall()) {
+ LLVM_DEBUG(dbgs() << "Found a call, stop looking.\n");
return E;
+ }
// Update modified / uses register units.
LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI);
// Otherwise, if the base register is modified, we have no match, so
// return early.
- if (!ModifiedRegUnits.available(BaseReg))
+ if (!ModifiedRegUnits.available(BaseReg)) {
+ LLVM_DEBUG(dbgs() << "Base reg is modified, stop looking.\n");
return E;
+ }
// Update list of instructions that read/write memory.
if (MI.mayLoadOrStore())
diff --git a/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp b/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp
index 6fcd9c290e9c..6c6cd120b035 100644
--- a/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LoopIdiomTransform.cpp
@@ -53,7 +53,7 @@ using namespace PatternMatch;
#define DEBUG_TYPE "aarch64-loop-idiom-transform"
static cl::opt<bool>
- DisableAll("disable-aarch64-lit-all", cl::Hidden, cl::init(true),
+ DisableAll("disable-aarch64-lit-all", cl::Hidden, cl::init(false),
cl::desc("Disable AArch64 Loop Idiom Transform Pass."));
static cl::opt<bool> DisableByteCmp(
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index ee10a7d1c706..4782ad076c60 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1397,17 +1397,17 @@ let Predicates = [HasSVEorSME] in {
(RegImmInst Z_q:$Zt, PPR3bAny:$Pg, GPR64sp:$base, (i64 0))>;
}
- // ld1quw/st1quw
+ // ld1quw/st1qw
defm : sve_ld1q_pat<nxv4i32, nxv1i1, int_aarch64_sve_ld1uwq, LD1W_Q, LD1W_Q_IMM, am_sve_regreg_lsl2>;
defm : sve_ld1q_pat<nxv4f32, nxv1i1, int_aarch64_sve_ld1uwq, LD1W_Q, LD1W_Q_IMM, am_sve_regreg_lsl2>;
- defm : sve_st1q_pat<nxv4i32, nxv1i1, int_aarch64_sve_st1uwq, ST1W_Q, ST1W_Q_IMM, am_sve_regreg_lsl2>;
- defm : sve_st1q_pat<nxv4f32, nxv1i1, int_aarch64_sve_st1uwq, ST1W_Q, ST1W_Q_IMM, am_sve_regreg_lsl2>;
+ defm : sve_st1q_pat<nxv4i32, nxv1i1, int_aarch64_sve_st1wq, ST1W_Q, ST1W_Q_IMM, am_sve_regreg_lsl2>;
+ defm : sve_st1q_pat<nxv4f32, nxv1i1, int_aarch64_sve_st1wq, ST1W_Q, ST1W_Q_IMM, am_sve_regreg_lsl2>;
- // ld1qud/st1qud
+ // ld1qud/st1qd
defm : sve_ld1q_pat<nxv2i64, nxv1i1, int_aarch64_sve_ld1udq, LD1D_Q, LD1D_Q_IMM, am_sve_regreg_lsl3>;
defm : sve_ld1q_pat<nxv2f64, nxv1i1, int_aarch64_sve_ld1udq, LD1D_Q, LD1D_Q_IMM, am_sve_regreg_lsl3>;
- defm : sve_st1q_pat<nxv2i64, nxv1i1, int_aarch64_sve_st1udq, ST1D_Q, ST1D_Q_IMM, am_sve_regreg_lsl3>;
- defm : sve_st1q_pat<nxv2f64, nxv1i1, int_aarch64_sve_st1udq, ST1D_Q, ST1D_Q_IMM, am_sve_regreg_lsl3>;
+ defm : sve_st1q_pat<nxv2i64, nxv1i1, int_aarch64_sve_st1dq, ST1D_Q, ST1D_Q_IMM, am_sve_regreg_lsl3>;
+ defm : sve_st1q_pat<nxv2f64, nxv1i1, int_aarch64_sve_st1dq, ST1D_Q, ST1D_Q_IMM, am_sve_regreg_lsl3>;
} // End HasSVEorSME
@@ -4006,7 +4006,9 @@ defm WHILEHS_CXX : sve2p1_int_while_rr_pn<"whilehs", 0b100>;
defm WHILEHI_CXX : sve2p1_int_while_rr_pn<"whilehi", 0b101>;
defm WHILELO_CXX : sve2p1_int_while_rr_pn<"whilelo", 0b110>;
defm WHILELS_CXX : sve2p1_int_while_rr_pn<"whilels", 0b111>;
+} // End HasSVE2p1_or_HasSME2
+let Predicates = [HasSVEorSME] in {
// Aliases for existing SVE instructions for which predicate-as-counter are
// accepted as an operand to the instruction
@@ -4025,7 +4027,7 @@ def : InstAlias<"mov $Pd, $Pn",
def : InstAlias<"pfalse\t$Pd", (PFALSE PNRasPPR8:$Pd), 0>;
-} // End HasSVE2p1_or_HasSME2
+}
//===----------------------------------------------------------------------===//
// Non-widening BFloat16 to BFloat16 instructions
@@ -4095,7 +4097,7 @@ defm FMAXQV : sve2p1_fp_reduction_q<0b110, "fmaxqv", int_aarch64_sve_fmaxqv>;
defm FMINQV : sve2p1_fp_reduction_q<0b111, "fminqv", int_aarch64_sve_fminqv>;
defm DUPQ_ZZI : sve2p1_dupq<"dupq">;
-defm EXTQ_ZZI : sve2p1_extq<"extq", int_aarch64_sve_extq_lane>;
+defm EXTQ_ZZI : sve2p1_extq<"extq", int_aarch64_sve_extq>;
defm PMOV_PZI : sve2p1_vector_to_pred<"pmov", int_aarch64_sve_pmov_to_pred_lane, int_aarch64_sve_pmov_to_pred_lane_zero>;
defm PMOV_ZIP : sve2p1_pred_to_vector<"pmov", int_aarch64_sve_pmov_to_vector_lane_merging, int_aarch64_sve_pmov_to_vector_lane_zeroing>;
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index b5b8b6829178..13b5e578391d 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1406,9 +1406,23 @@ static std::optional<Instruction *> instCombineSVEAllActive(IntrinsicInst &II,
return &II;
}
+// Simplify operations where predicate has all inactive lanes or try to replace
+// with _u form when all lanes are active
+static std::optional<Instruction *>
+instCombineSVEAllOrNoActive(InstCombiner &IC, IntrinsicInst &II,
+ Intrinsic::ID IID) {
+ if (match(II.getOperand(0), m_ZeroInt())) {
+ // llvm_ir, pred(0), op1, op2 - Spec says to return op1 when all lanes are
+ // inactive for sv[func]_m
+ return IC.replaceInstUsesWith(II, II.getOperand(1));
+ }
+ return instCombineSVEAllActive(II, IID);
+}
+
static std::optional<Instruction *> instCombineSVEVectorAdd(InstCombiner &IC,
IntrinsicInst &II) {
- if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_add_u))
+ if (auto II_U =
+ instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_add_u))
return II_U;
if (auto MLA = instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul,
Intrinsic::aarch64_sve_mla>(
@@ -1423,7 +1437,8 @@ static std::optional<Instruction *> instCombineSVEVectorAdd(InstCombiner &IC,
static std::optional<Instruction *>
instCombineSVEVectorFAdd(InstCombiner &IC, IntrinsicInst &II) {
- if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fadd_u))
+ if (auto II_U =
+ instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fadd_u))
return II_U;
if (auto FMLA =
instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul,
@@ -1465,7 +1480,8 @@ instCombineSVEVectorFAddU(InstCombiner &IC, IntrinsicInst &II) {
static std::optional<Instruction *>
instCombineSVEVectorFSub(InstCombiner &IC, IntrinsicInst &II) {
- if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fsub_u))
+ if (auto II_U =
+ instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fsub_u))
return II_U;
if (auto FMLS =
instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul,
@@ -1507,7 +1523,8 @@ instCombineSVEVectorFSubU(InstCombiner &IC, IntrinsicInst &II) {
static std::optional<Instruction *> instCombineSVEVectorSub(InstCombiner &IC,
IntrinsicInst &II) {
- if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_sub_u))
+ if (auto II_U =
+ instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_sub_u))
return II_U;
if (auto MLS = instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul,
Intrinsic::aarch64_sve_mls>(
@@ -1523,11 +1540,6 @@ static std::optional<Instruction *> instCombineSVEVectorMul(InstCombiner &IC,
auto *OpMultiplicand = II.getOperand(1);
auto *OpMultiplier = II.getOperand(2);
- // Canonicalise a non _u intrinsic only.
- if (II.getIntrinsicID() != IID)
- if (auto II_U = instCombineSVEAllActive(II, IID))
- return II_U;
-
// Return true if a given instruction is a unit splat value, false otherwise.
auto IsUnitSplat = [](auto *I) {
auto *SplatValue = getSplatValue(I);
@@ -1891,34 +1903,38 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
case Intrinsic::aarch64_sve_ptest_last:
return instCombineSVEPTest(IC, II);
case Intrinsic::aarch64_sve_fabd:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fabd_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fabd_u);
case Intrinsic::aarch64_sve_fadd:
return instCombineSVEVectorFAdd(IC, II);
case Intrinsic::aarch64_sve_fadd_u:
return instCombineSVEVectorFAddU(IC, II);
case Intrinsic::aarch64_sve_fdiv:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fdiv_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fdiv_u);
case Intrinsic::aarch64_sve_fmax:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmax_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmax_u);
case Intrinsic::aarch64_sve_fmaxnm:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmaxnm_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmaxnm_u);
case Intrinsic::aarch64_sve_fmin:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmin_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmin_u);
case Intrinsic::aarch64_sve_fminnm:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fminnm_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fminnm_u);
case Intrinsic::aarch64_sve_fmla:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmla_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmla_u);
case Intrinsic::aarch64_sve_fmls:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmls_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmls_u);
case Intrinsic::aarch64_sve_fmul:
+ if (auto II_U =
+ instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmul_u))
+ return II_U;
+ return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_fmul_u);
case Intrinsic::aarch64_sve_fmul_u:
return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_fmul_u);
case Intrinsic::aarch64_sve_fmulx:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmulx_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmulx_u);
case Intrinsic::aarch64_sve_fnmla:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fnmla_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fnmla_u);
case Intrinsic::aarch64_sve_fnmls:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fnmls_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fnmls_u);
case Intrinsic::aarch64_sve_fsub:
return instCombineSVEVectorFSub(IC, II);
case Intrinsic::aarch64_sve_fsub_u:
@@ -1930,20 +1946,24 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
Intrinsic::aarch64_sve_mla_u>(
IC, II, true);
case Intrinsic::aarch64_sve_mla:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_mla_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_mla_u);
case Intrinsic::aarch64_sve_mls:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_mls_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_mls_u);
case Intrinsic::aarch64_sve_mul:
+ if (auto II_U =
+ instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_mul_u))
+ return II_U;
+ return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_mul_u);
case Intrinsic::aarch64_sve_mul_u:
return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_mul_u);
case Intrinsic::aarch64_sve_sabd:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_sabd_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_sabd_u);
case Intrinsic::aarch64_sve_smax:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_smax_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_smax_u);
case Intrinsic::aarch64_sve_smin:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_smin_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_smin_u);
case Intrinsic::aarch64_sve_smulh:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_smulh_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_smulh_u);
case Intrinsic::aarch64_sve_sub:
return instCombineSVEVectorSub(IC, II);
case Intrinsic::aarch64_sve_sub_u:
@@ -1951,31 +1971,31 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
Intrinsic::aarch64_sve_mls_u>(
IC, II, true);
case Intrinsic::aarch64_sve_uabd:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_uabd_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_uabd_u);
case Intrinsic::aarch64_sve_umax:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_umax_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_umax_u);
case Intrinsic::aarch64_sve_umin:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_umin_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_umin_u);
case Intrinsic::aarch64_sve_umulh:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_umulh_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_umulh_u);
case Intrinsic::aarch64_sve_asr:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_asr_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_asr_u);
case Intrinsic::aarch64_sve_lsl:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_lsl_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_lsl_u);
case Intrinsic::aarch64_sve_lsr:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_lsr_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_lsr_u);
case Intrinsic::aarch64_sve_and:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_and_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_and_u);
case Intrinsic::aarch64_sve_bic:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_bic_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_bic_u);
case Intrinsic::aarch64_sve_eor:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_eor_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_eor_u);
case Intrinsic::aarch64_sve_orr:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_orr_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_orr_u);
case Intrinsic::aarch64_sve_sqsub:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_sqsub_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_sqsub_u);
case Intrinsic::aarch64_sve_uqsub:
- return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_uqsub_u);
+ return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_uqsub_u);
case Intrinsic::aarch64_sve_tbl:
return instCombineSVETBL(IC, II);
case Intrinsic::aarch64_sve_uunpkhi:
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index b657a0954d78..302116447efc 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1166,7 +1166,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
getActionDefinitionsBuilder(G_FMAD).lower();
// Access to floating-point environment.
- getActionDefinitionsBuilder({G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE})
+ getActionDefinitionsBuilder({G_GET_FPENV, G_SET_FPENV, G_RESET_FPENV,
+ G_GET_FPMODE, G_SET_FPMODE, G_RESET_FPMODE})
.libcall();
getActionDefinitionsBuilder(G_IS_FPCLASS).lower();
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
index 496ab18e9b19..6e074b6a63c4 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@@ -120,7 +120,8 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
assert((!Target.getSymA() ||
Target.getSymA()->getKind() == MCSymbolRefExpr::VK_None ||
- Target.getSymA()->getKind() == MCSymbolRefExpr::VK_PLT) &&
+ Target.getSymA()->getKind() == MCSymbolRefExpr::VK_PLT ||
+ Target.getSymA()->getKind() == MCSymbolRefExpr::VK_GOTPCREL) &&
"Should only be expression-level modifiers here");
assert((!Target.getSymB() ||
@@ -206,7 +207,10 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
case FK_Data_2:
return R_CLS(ABS16);
case FK_Data_4:
- return R_CLS(ABS32);
+ return (!IsILP32 &&
+ Target.getAccessVariant() == MCSymbolRefExpr::VK_GOTPCREL)
+ ? ELF::R_AARCH64_GOTPCREL32
+ : R_CLS(ABS32);
case FK_Data_8:
if (IsILP32) {
Ctx.reportError(Fixup.getLoc(),
diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td
index 70f3c2c99f0f..44d9a8ac7cb6 100644
--- a/llvm/lib/Target/AArch64/SMEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td
@@ -1268,7 +1268,7 @@ multiclass sve2_int_perm_revd<string asm, SDPatternOperator op> {
}
class sve2_clamp<string asm, bits<2> sz, bit U, ZPRRegOp zpr_ty>
- : I<(outs zpr_ty:$Zd), (ins zpr_ty:$Zn, zpr_ty:$Zm, zpr_ty:$_Zd),
+ : I<(outs zpr_ty:$Zd), (ins zpr_ty:$_Zd, zpr_ty:$Zn, zpr_ty:$Zm),
asm, "\t$Zd, $Zn, $Zm", "", []>,
Sched<[]> {
bits<5> Zm;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index 0c77fe725958..b9411e205212 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -111,7 +111,7 @@ def smulu64 : GICombineRule<
[{ return matchCombine_s_mul_u64(*${smul}, ${matchinfo}); }]),
(apply [{ applyCombine_s_mul_u64(*${smul}, ${matchinfo}); }])>;
-def sign_exension_in_reg_matchdata : GIDefMatchData<"MachineInstr *">;
+def sign_exension_in_reg_matchdata : GIDefMatchData<"std::pair<MachineInstr *, unsigned>">;
def sign_extension_in_reg : GICombineRule<
(defs root:$sign_inreg, sign_exension_in_reg_matchdata:$matchinfo),
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 719ae2e8750c..41462d7a133e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -1579,13 +1579,9 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
bool AMDGPUDAGToDAGISel::SelectBUFSOffset(SDValue ByteOffsetNode,
SDValue &SOffset) const {
- if (Subtarget->hasRestrictedSOffset()) {
- if (auto SOffsetConst = dyn_cast<ConstantSDNode>(ByteOffsetNode)) {
- if (SOffsetConst->isZero()) {
- SOffset = CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32);
- return true;
- }
- }
+ if (Subtarget->hasRestrictedSOffset() && isNullConstant(ByteOffsetNode)) {
+ SOffset = CurDAG->getRegister(AMDGPU::SGPR_NULL, MVT::i32);
+ return true;
}
SOffset = ByteOffsetNode;
@@ -2483,7 +2479,7 @@ void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) {
SDValue PtrBase = Ptr.getOperand(0);
SDValue PtrOffset = Ptr.getOperand(1);
- const APInt &OffsetVal = cast<ConstantSDNode>(PtrOffset)->getAPIntValue();
+ const APInt &OffsetVal = PtrOffset->getAsAPIntVal();
if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue())) {
N = glueCopyToM0(N, PtrBase);
Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
index d2a02143e4e7..5762f1906a16 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp
@@ -1026,6 +1026,51 @@ public:
return N;
}
+ /// Strip "amdgpu-no-lds-kernel-id" from any functions where we may have
+ /// introduced its use. If AMDGPUAttributor ran prior to the pass, we inferred
+ /// the lack of llvm.amdgcn.lds.kernel.id calls.
+ void removeNoLdsKernelIdFromReachable(CallGraph &CG, Function *KernelRoot) {
+ KernelRoot->removeFnAttr("amdgpu-no-lds-kernel-id");
+
+ SmallVector<Function *> Tmp({CG[KernelRoot]->getFunction()});
+ if (!Tmp.back())
+ return;
+
+ SmallPtrSet<Function *, 8> Visited;
+ bool SeenUnknownCall = false;
+
+ do {
+ Function *F = Tmp.pop_back_val();
+
+ for (auto &N : *CG[F]) {
+ if (!N.second)
+ continue;
+
+ Function *Callee = N.second->getFunction();
+ if (!Callee) {
+ if (!SeenUnknownCall) {
+ SeenUnknownCall = true;
+
+ // If we see any indirect calls, assume nothing about potential
+ // targets.
+ // TODO: This could be refined to possible LDS global users.
+ for (auto &N : *CG.getExternalCallingNode()) {
+ Function *PotentialCallee = N.second->getFunction();
+ if (!isKernelLDS(PotentialCallee))
+ PotentialCallee->removeFnAttr("amdgpu-no-lds-kernel-id");
+ }
+
+ continue;
+ }
+ }
+
+ Callee->removeFnAttr("amdgpu-no-lds-kernel-id");
+ if (Visited.insert(Callee).second)
+ Tmp.push_back(Callee);
+ }
+ } while (!Tmp.empty());
+ }
+
DenseMap<Function *, GlobalVariable *> lowerDynamicLDSVariables(
Module &M, LDSUsesInfoTy &LDSUsesInfo,
DenseSet<Function *> const &KernelsThatIndirectlyAllocateDynamicLDS,
@@ -1175,6 +1220,13 @@ public:
M, TableLookupVariablesOrdered, OrderedKernels, KernelToReplacement);
replaceUsesInInstructionsWithTableLookup(M, TableLookupVariablesOrdered,
LookupTable);
+
+ // Strip amdgpu-no-lds-kernel-id from all functions reachable from the
+ // kernel. We may have inferred this wasn't used prior to the pass.
+ //
+ // TODO: We could filter out subgraphs that do not access LDS globals.
+ for (Function *F : KernelsThatAllocateTableLDS)
+ removeNoLdsKernelIdFromReachable(CG, F);
}
DenseMap<Function *, GlobalVariable *> KernelToCreatedDynamicLDS =
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
index 21bfab52c6c4..bb1d6cb72e80 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
@@ -99,10 +99,10 @@ public:
// Combine unsigned buffer load and signed extension instructions to generate
// signed buffer laod instructions.
- bool matchCombineSignExtendInReg(MachineInstr &MI,
- MachineInstr *&MatchInfo) const;
- void applyCombineSignExtendInReg(MachineInstr &MI,
- MachineInstr *&MatchInfo) const;
+ bool matchCombineSignExtendInReg(
+ MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchInfo) const;
+ void applyCombineSignExtendInReg(
+ MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchInfo) const;
// Find the s_mul_u64 instructions where the higher bits are either
// zero-extended or sign-extended.
@@ -395,34 +395,36 @@ bool AMDGPUPostLegalizerCombinerImpl::matchRemoveFcanonicalize(
// Identify buffer_load_{u8, u16}.
bool AMDGPUPostLegalizerCombinerImpl::matchCombineSignExtendInReg(
- MachineInstr &MI, MachineInstr *&SubwordBufferLoad) const {
- Register Op0Reg = MI.getOperand(1).getReg();
- SubwordBufferLoad = MRI.getVRegDef(Op0Reg);
-
- if (!MRI.hasOneNonDBGUse(Op0Reg))
+ MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchData) const {
+ Register LoadReg = MI.getOperand(1).getReg();
+ if (!MRI.hasOneNonDBGUse(LoadReg))
return false;
// Check if the first operand of the sign extension is a subword buffer load
// instruction.
- return SubwordBufferLoad->getOpcode() == AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE ||
- SubwordBufferLoad->getOpcode() == AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT;
+ MachineInstr *LoadMI = MRI.getVRegDef(LoadReg);
+ int64_t Width = MI.getOperand(2).getImm();
+ switch (LoadMI->getOpcode()) {
+ case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
+ MatchData = {LoadMI, AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE};
+ return Width == 8;
+ case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
+ MatchData = {LoadMI, AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT};
+ return Width == 16;
+ }
+ return false;
}
// Combine buffer_load_{u8, u16} and the sign extension instruction to generate
// buffer_load_{i8, i16}.
void AMDGPUPostLegalizerCombinerImpl::applyCombineSignExtendInReg(
- MachineInstr &MI, MachineInstr *&SubwordBufferLoad) const {
- // Modify the opcode and the destination of buffer_load_{u8, u16}:
- // Replace the opcode.
- unsigned Opc =
- SubwordBufferLoad->getOpcode() == AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE
- ? AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE
- : AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT;
- SubwordBufferLoad->setDesc(TII.get(Opc));
- // Update the destination register of SubwordBufferLoad with the destination
- // register of the sign extension.
+ MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchData) const {
+ auto [LoadMI, NewOpcode] = MatchData;
+ LoadMI->setDesc(TII.get(NewOpcode));
+ // Update the destination register of the load with the destination register
+ // of the sign extension.
Register SignExtendInsnDst = MI.getOperand(0).getReg();
- SubwordBufferLoad->getOperand(0).setReg(SignExtendInsnDst);
+ LoadMI->getOperand(0).setReg(SignExtendInsnDst);
// Remove the sign extension.
MI.eraseFromParent();
}
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index b7f043860115..ba79affe683d 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1342,10 +1342,8 @@ private:
unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
bool ParseRegRange(unsigned& Num, unsigned& Width);
- unsigned getRegularReg(RegisterKind RegKind,
- unsigned RegNum,
- unsigned RegWidth,
- SMLoc Loc);
+ unsigned getRegularReg(RegisterKind RegKind, unsigned RegNum, unsigned SubReg,
+ unsigned RegWidth, SMLoc Loc);
bool isRegister();
bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
@@ -2616,6 +2614,8 @@ AMDGPUAsmParser::isRegister(const AsmToken &Token,
StringRef RegName = Reg->Name;
StringRef RegSuffix = Str.substr(RegName.size());
if (!RegSuffix.empty()) {
+ RegSuffix.consume_back(".l");
+ RegSuffix.consume_back(".h");
unsigned Num;
// A single register with an index: rXX
if (getRegNum(RegSuffix, Num))
@@ -2636,12 +2636,9 @@ AMDGPUAsmParser::isRegister()
return isRegister(getToken(), peekToken());
}
-unsigned
-AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
- unsigned RegNum,
- unsigned RegWidth,
- SMLoc Loc) {
-
+unsigned AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, unsigned RegNum,
+ unsigned SubReg, unsigned RegWidth,
+ SMLoc Loc) {
assert(isRegularReg(RegKind));
unsigned AlignSize = 1;
@@ -2670,7 +2667,17 @@ AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
return AMDGPU::NoRegister;
}
- return RC.getRegister(RegIdx);
+ unsigned Reg = RC.getRegister(RegIdx);
+
+ if (SubReg) {
+ Reg = TRI->getSubReg(Reg, SubReg);
+
+ // Currently all regular registers have their .l and .h subregisters, so
+ // we should never need to generate an error here.
+ assert(Reg && "Invalid subregister!");
+ }
+
+ return Reg;
}
bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) {
@@ -2748,7 +2755,17 @@ unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
RegKind = RI->Kind;
StringRef RegSuffix = RegName.substr(RI->Name.size());
+ unsigned SubReg = NoSubRegister;
if (!RegSuffix.empty()) {
+ // We don't know the opcode till we are done parsing, so we don't know if
+ // registers should be 16 or 32 bit. It is therefore mandatory to put .l or
+ // .h to correctly specify 16 bit registers. We also can't determine class
+ // VGPR_16_Lo128 or VGPR_16, so always parse them as VGPR_16.
+ if (RegSuffix.consume_back(".l"))
+ SubReg = AMDGPU::lo16;
+ else if (RegSuffix.consume_back(".h"))
+ SubReg = AMDGPU::hi16;
+
// Single 32-bit register: vXX.
if (!getRegNum(RegSuffix, RegNum)) {
Error(Loc, "invalid register index");
@@ -2761,7 +2778,7 @@ unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
return AMDGPU::NoRegister;
}
- return getRegularReg(RegKind, RegNum, RegWidth, Loc);
+ return getRegularReg(RegKind, RegNum, SubReg, RegWidth, Loc);
}
unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
@@ -2813,7 +2830,7 @@ unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
}
if (isRegularReg(RegKind))
- Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
+ Reg = getRegularReg(RegKind, RegNum, NoSubRegister, RegWidth, ListLoc);
return Reg;
}
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index a7d8ff0242b8..bcd93e30d6c2 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -1450,20 +1450,27 @@ bool GCNHazardRecognizer::fixLdsDirectVMEMHazard(MachineInstr *MI) {
return false;
return I.readsRegister(VDSTReg, &TRI) || I.modifiesRegister(VDSTReg, &TRI);
};
- auto IsExpiredFn = [](const MachineInstr &I, int) {
+ bool LdsdirCanWait = ST.hasLdsWaitVMSRC();
+ auto IsExpiredFn = [this, LdsdirCanWait](const MachineInstr &I, int) {
return SIInstrInfo::isVALU(I) || SIInstrInfo::isEXP(I) ||
(I.getOpcode() == AMDGPU::S_WAITCNT && !I.getOperand(0).getImm()) ||
(I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
- AMDGPU::DepCtr::decodeFieldVmVsrc(I.getOperand(0).getImm()) == 0);
+ AMDGPU::DepCtr::decodeFieldVmVsrc(I.getOperand(0).getImm()) == 0) ||
+ (LdsdirCanWait && SIInstrInfo::isLDSDIR(I) &&
+ !TII.getNamedOperand(I, AMDGPU::OpName::waitvsrc)->getImm());
};
if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
std::numeric_limits<int>::max())
return false;
- BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
- TII.get(AMDGPU::S_WAITCNT_DEPCTR))
- .addImm(AMDGPU::DepCtr::encodeFieldVmVsrc(0));
+ if (LdsdirCanWait) {
+ TII.getNamedOperand(*MI, AMDGPU::OpName::waitvsrc)->setImm(0);
+ } else {
+ BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+ TII.get(AMDGPU::S_WAITCNT_DEPCTR))
+ .addImm(AMDGPU::DepCtr::encodeFieldVmVsrc(0));
+ }
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index f6f37f5170a4..85d062a9a6f5 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1128,6 +1128,8 @@ public:
bool hasLdsDirect() const { return getGeneration() >= GFX11; }
+ bool hasLdsWaitVMSRC() const { return getGeneration() >= GFX12; }
+
bool hasVALUPartialForwardingHazard() const {
return getGeneration() >= GFX11;
}
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
index d539d75fdff0..201cc8d01e2d 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
@@ -31,7 +31,6 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT,
InlineAsmEnd = ";#ASMEND";
//===--- Data Emission Directives -------------------------------------===//
- SunStyleELFSectionSwitchSyntax = true;
UsesELFSectionDirectiveForBSS = true;
//===--- Global Variable Emission Directives --------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 6ddc7e864fb2..5a9222e91588 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -8181,12 +8181,8 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
// SGPR_NULL to avoid generating an extra s_mov with zero.
static SDValue selectSOffset(SDValue SOffset, SelectionDAG &DAG,
const GCNSubtarget *Subtarget) {
- if (Subtarget->hasRestrictedSOffset())
- if (auto SOffsetConst = dyn_cast<ConstantSDNode>(SOffset)) {
- if (SOffsetConst->isZero()) {
- return DAG.getRegister(AMDGPU::SGPR_NULL, MVT::i32);
- }
- }
+ if (Subtarget->hasRestrictedSOffset() && isNullConstant(SOffset))
+ return DAG.getRegister(AMDGPU::SGPR_NULL, MVT::i32);
return SOffset;
}
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 1cb1d32707f2..1f480c248154 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -292,7 +292,7 @@ public:
VgprVmemTypes[GprNo] = 0;
}
- void setNonKernelFunctionInitialState() {
+ void setStateOnFunctionEntryOrReturn() {
setScoreUB(VS_CNT, getWaitCountMax(VS_CNT));
PendingEvents |= WaitEventMaskForInst[VS_CNT];
}
@@ -1487,6 +1487,7 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
if (callWaitsOnFunctionReturn(Inst)) {
// Act as a wait on everything
ScoreBrackets->applyWaitcnt(AMDGPU::Waitcnt::allZeroExceptVsCnt());
+ ScoreBrackets->setStateOnFunctionEntryOrReturn();
} else {
// May need to way wait for anything.
ScoreBrackets->applyWaitcnt(AMDGPU::Waitcnt());
@@ -1879,7 +1880,7 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
auto NonKernelInitialState =
std::make_unique<WaitcntBrackets>(ST, Limits, Encoding);
- NonKernelInitialState->setNonKernelFunctionInitialState();
+ NonKernelInitialState->setStateOnFunctionEntryOrReturn();
BlockInfos[&EntryBB].Incoming = std::move(NonKernelInitialState);
Modified = true;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index fee900b3efb2..e50f5f28e030 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -5276,10 +5276,15 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
case AMDGPU::S_FLOOR_F32: return AMDGPU::V_FLOOR_F32_e64;
case AMDGPU::S_TRUNC_F32: return AMDGPU::V_TRUNC_F32_e64;
case AMDGPU::S_RNDNE_F32: return AMDGPU::V_RNDNE_F32_e64;
- case AMDGPU::S_CEIL_F16: return AMDGPU::V_CEIL_F16_t16_e64;
- case AMDGPU::S_FLOOR_F16: return AMDGPU::V_FLOOR_F16_t16_e64;
- case AMDGPU::S_TRUNC_F16: return AMDGPU::V_TRUNC_F16_t16_e64;
- case AMDGPU::S_RNDNE_F16: return AMDGPU::V_RNDNE_F16_t16_e64;
+ case AMDGPU::S_CEIL_F16:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_CEIL_F16_t16_e64
+ : AMDGPU::V_CEIL_F16_fake16_e64;
+ case AMDGPU::S_FLOOR_F16:
+ return AMDGPU::V_FLOOR_F16_fake16_e64;
+ case AMDGPU::S_TRUNC_F16:
+ return AMDGPU::V_TRUNC_F16_fake16_e64;
+ case AMDGPU::S_RNDNE_F16:
+ return AMDGPU::V_RNDNE_F16_fake16_e64;
case AMDGPU::S_ADD_F32: return AMDGPU::V_ADD_F32_e64;
case AMDGPU::S_SUB_F32: return AMDGPU::V_SUB_F32_e64;
case AMDGPU::S_MIN_F32: return AMDGPU::V_MIN_F32_e64;
@@ -5328,15 +5333,15 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
case AMDGPU::S_CMP_NEQ_F16: return AMDGPU::V_CMP_NEQ_F16_t16_e64;
case AMDGPU::S_CMP_NLT_F16: return AMDGPU::V_CMP_NLT_F16_t16_e64;
case AMDGPU::V_S_EXP_F32_e64: return AMDGPU::V_EXP_F32_e64;
- case AMDGPU::V_S_EXP_F16_e64: return AMDGPU::V_EXP_F16_t16_e64;
+ case AMDGPU::V_S_EXP_F16_e64: return AMDGPU::V_EXP_F16_fake16_e64;
case AMDGPU::V_S_LOG_F32_e64: return AMDGPU::V_LOG_F32_e64;
- case AMDGPU::V_S_LOG_F16_e64: return AMDGPU::V_LOG_F16_t16_e64;
+ case AMDGPU::V_S_LOG_F16_e64: return AMDGPU::V_LOG_F16_fake16_e64;
case AMDGPU::V_S_RCP_F32_e64: return AMDGPU::V_RCP_F32_e64;
- case AMDGPU::V_S_RCP_F16_e64: return AMDGPU::V_RCP_F16_t16_e64;
+ case AMDGPU::V_S_RCP_F16_e64: return AMDGPU::V_RCP_F16_fake16_e64;
case AMDGPU::V_S_RSQ_F32_e64: return AMDGPU::V_RSQ_F32_e64;
- case AMDGPU::V_S_RSQ_F16_e64: return AMDGPU::V_RSQ_F16_t16_e64;
+ case AMDGPU::V_S_RSQ_F16_e64: return AMDGPU::V_RSQ_F16_fake16_e64;
case AMDGPU::V_S_SQRT_F32_e64: return AMDGPU::V_SQRT_F32_e64;
- case AMDGPU::V_S_SQRT_F16_e64: return AMDGPU::V_SQRT_F16_t16_e64;
+ case AMDGPU::V_S_SQRT_F16_e64: return AMDGPU::V_SQRT_F16_fake16_e64;
}
llvm_unreachable(
"Unexpected scalar opcode without corresponding vector one!");
@@ -7266,8 +7271,14 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
if (AMDGPU::getNamedOperandIdx(NewOpcode,
AMDGPU::OpName::src0_modifiers) >= 0)
NewInstr.addImm(0);
- if (AMDGPU::getNamedOperandIdx(NewOpcode, AMDGPU::OpName::src0) >= 0)
- NewInstr->addOperand(Inst.getOperand(1));
+ if (AMDGPU::hasNamedOperand(NewOpcode, AMDGPU::OpName::src0)) {
+ MachineOperand Src = Inst.getOperand(1);
+ if (AMDGPU::isTrue16Inst(NewOpcode) && ST.useRealTrue16Insts() &&
+ Src.isReg() && RI.isVGPR(MRI, Src.getReg()))
+ NewInstr.addReg(Src.getReg(), 0, AMDGPU::lo16);
+ else
+ NewInstr->addOperand(Src);
+ }
if (Opcode == AMDGPU::S_SEXT_I32_I8 || Opcode == AMDGPU::S_SEXT_I32_I16) {
// We are converting these to a BFE, so we need to add the missing
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index f07b8fa0ea4c..04c92155f5aa 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1773,28 +1773,27 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
class getInsVOP3Base<RegisterOperand Src0RC, RegisterOperand Src1RC,
RegisterOperand Src2RC, int NumSrcArgs,
bit HasClamp, bit HasModifiers, bit HasSrc2Mods, bit HasOMod,
- Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOpSel,
- bit IsVOP3P> {
+ Operand Src0Mod, Operand Src1Mod, Operand Src2Mod, bit HasOpSel> {
// getInst64 handles clamp and omod. implicit mutex between vop3p and omod
dag base = getIns64 <Src0RC, Src1RC, Src2RC, NumSrcArgs,
HasClamp, HasModifiers, HasSrc2Mods, HasOMod,
Src0Mod, Src1Mod, Src2Mod>.ret;
dag opsel = (ins op_sel0:$op_sel);
- dag vop3pOpsel = (ins op_sel_hi0:$op_sel_hi);
- dag vop3pFields = !con(!if(HasOpSel, vop3pOpsel, (ins)), (ins neg_lo0:$neg_lo, neg_hi0:$neg_hi));
-
- dag ret = !con(base,
- !if(HasOpSel, opsel,(ins)),
- !if(IsVOP3P, vop3pFields,(ins)));
+ dag ret = !con(base, !if(HasOpSel, opsel, (ins)));
}
class getInsVOP3P <RegisterOperand Src0RC, RegisterOperand Src1RC,
RegisterOperand Src2RC, int NumSrcArgs, bit HasClamp, bit HasOpSel,
Operand Src0Mod, Operand Src1Mod, Operand Src2Mod> {
- dag ret = getInsVOP3Base<Src0RC, Src1RC, Src2RC, NumSrcArgs,
+ dag base = getInsVOP3Base<Src0RC, Src1RC, Src2RC, NumSrcArgs,
HasClamp, 1/*HasModifiers*/, 1/*HasSrc2Mods*/,
- 0/*HasOMod*/, Src0Mod, Src1Mod, Src2Mod,
- HasOpSel, 1/*IsVOP3P*/>.ret;
+ 0/*HasOMod*/, Src0Mod, Src1Mod, Src2Mod, HasOpSel>.ret;
+
+ dag vop3pOpsel = (ins op_sel_hi0:$op_sel_hi);
+ dag vop3p_neg = (ins neg_lo0:$neg_lo, neg_hi0:$neg_hi);
+
+ dag vop3pFields = !con(!if(HasOpSel, vop3pOpsel, (ins)), vop3p_neg);
+ dag ret = !con(base, vop3pFields);
}
class getInsVOP3OpSel <RegisterOperand Src0RC, RegisterOperand Src1RC,
@@ -1804,7 +1803,7 @@ class getInsVOP3OpSel <RegisterOperand Src0RC, RegisterOperand Src1RC,
dag ret = getInsVOP3Base<Src0RC, Src1RC,
Src2RC, NumSrcArgs,
HasClamp, 1/*HasModifiers*/, 1/*HasSrc2Mods*/, HasOMod,
- Src0Mod, Src1Mod, Src2Mod, 1/*HasOpSel*/, 0>.ret;
+ Src0Mod, Src1Mod, Src2Mod, /*HasOpSel=*/1>.ret;
}
class getInsDPPBase <RegisterOperand OldRC, RegisterClass Src0RC, RegisterClass Src1RC,
@@ -2390,9 +2389,15 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
field dag InsDPP8 = getInsDPP8<DstRCDPP, Src0DPP, Src1DPP, Src2DPP,
NumSrcArgs, HasModifiers,
Src0ModDPP, Src1ModDPP, Src2ModDPP>.ret;
- field dag InsVOP3Base = getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP,
+ defvar InsVOP3DPPBase = getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP,
Src2VOP3DPP, NumSrcArgs, HasClamp, HasModifiers, HasSrc2Mods, HasOMod,
- Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP, HasOpSel, IsVOP3P>.ret;
+ Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP, HasOpSel>.ret;
+ defvar InsVOP3PDPPBase = getInsVOP3P<Src0VOP3DPP, Src1VOP3DPP,
+ Src2VOP3DPP, NumSrcArgs, HasClamp, HasOpSel,
+ Src0ModVOP3DPP, Src1ModVOP3DPP, Src2ModVOP3DPP>.ret;
+
+ field dag InsVOP3Base = !if(IsVOP3P, InsVOP3PDPPBase, InsVOP3DPPBase);
+
field dag InsVOP3DPP = getInsVOP3DPP<InsVOP3Base, DstRCVOP3DPP, NumSrcArgs>.ret;
field dag InsVOP3DPP16 = getInsVOP3DPP16<InsVOP3Base, DstRCVOP3DPP, NumSrcArgs>.ret;
field dag InsVOP3DPP8 = getInsVOP3DPP8<InsVOP3Base, DstRCVOP3DPP, NumSrcArgs>.ret;
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 27a7c29cb1ac..99960c94e598 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -74,6 +74,7 @@ class VOP1_Real <VOP1_Pseudo ps, int EncodingFamily, string real_name = ps.Mnemo
// copy relevant pseudo op flags
let SubtargetPredicate = ps.SubtargetPredicate;
+ let OtherPredicates = ps.OtherPredicates;
let AsmMatchConverter = ps.AsmMatchConverter;
let AsmVariantName = ps.AsmVariantName;
let Constraints = ps.Constraints;
@@ -157,8 +158,11 @@ multiclass VOP1Inst_t16<string opName,
let OtherPredicates = [NotHasTrue16BitInsts, Has16BitInsts] in {
defm NAME : VOP1Inst<opName, P, node>;
}
- let OtherPredicates = [HasTrue16BitInsts] in {
- defm _t16 : VOP1Inst<opName#"_t16", VOPProfile_Fake16<P>, node>;
+ let OtherPredicates = [UseRealTrue16Insts] in {
+ defm _t16 : VOP1Inst<opName#"_t16", VOPProfile_True16<P>, node>;
+ }
+ let OtherPredicates = [UseFakeTrue16Insts] in {
+ defm _fake16 : VOP1Inst<opName#"_fake16", VOPProfile_Fake16<P>, node>;
}
}
@@ -679,6 +683,7 @@ class VOP1_DPP<bits<8> op, VOP1_DPP_Pseudo ps, VOPProfile p = ps.Pfl, bit isDPP1
let SchedRW = ps.SchedRW;
let Uses = ps.Uses;
let TRANS = ps.TRANS;
+ let OtherPredicates = ps.OtherPredicates;
bits<8> vdst;
let Inst{8-0} = 0xfa;
@@ -707,6 +712,7 @@ class VOP1_DPP8<bits<8> op, VOP1_Pseudo ps, VOPProfile p = ps.Pfl> :
let Defs = ps.Defs;
let SchedRW = ps.SchedRW;
let Uses = ps.Uses;
+ let OtherPredicates = ps.OtherPredicates;
bits<8> vdst;
let Inst{8-0} = fi;
@@ -742,7 +748,9 @@ multiclass VOP1_Real_e32<GFXGen Gen, bits<9> op, string opName = NAME> {
multiclass VOP1_Real_e32_with_name<GFXGen Gen, bits<9> op, string opName,
string asmName> {
defvar ps = !cast<VOP1_Pseudo>(opName#"_e32");
- let AsmString = asmName # ps.AsmOperands in {
+ let AsmString = asmName # ps.AsmOperands,
+ DecoderNamespace = Gen.DecoderNamespace #
+ !if(ps.Pfl.IsRealTrue16, "", "_FAKE16") in {
defm NAME : VOP1_Real_e32<Gen, op, opName>;
}
}
@@ -761,7 +769,9 @@ multiclass VOP1_Real_dpp<GFXGen Gen, bits<9> op, string opName = NAME> {
multiclass VOP1_Real_dpp_with_name<GFXGen Gen, bits<9> op, string opName,
string asmName> {
defvar ps = !cast<VOP1_Pseudo>(opName#"_e32");
- let AsmString = asmName # ps.Pfl.AsmDPP16 in {
+ let AsmString = asmName # ps.Pfl.AsmDPP16,
+ DecoderNamespace = "DPP" # Gen.DecoderNamespace #
+ !if(ps.Pfl.IsRealTrue16, "", "_FAKE16") in {
defm NAME : VOP1_Real_dpp<Gen, op, opName>;
}
}
@@ -774,7 +784,9 @@ multiclass VOP1_Real_dpp8<GFXGen Gen, bits<9> op, string opName = NAME> {
multiclass VOP1_Real_dpp8_with_name<GFXGen Gen, bits<9> op, string opName,
string asmName> {
defvar ps = !cast<VOP1_Pseudo>(opName#"_e32");
- let AsmString = asmName # ps.Pfl.AsmDPP8 in {
+ let AsmString = asmName # ps.Pfl.AsmDPP8,
+ DecoderNamespace = "DPP8" # Gen.DecoderNamespace #
+ !if(ps.Pfl.IsRealTrue16, "", "_FAKE16") in {
defm NAME : VOP1_Real_dpp8<Gen, op, opName>;
}
}
@@ -854,29 +866,30 @@ defm V_CLS_I32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x03b,
"V_FFBH_I32", "v_cls_i32">;
defm V_PERMLANE64_B32 : VOP1Only_Real_gfx11_gfx12<0x067>;
defm V_MOV_B16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x01c, "v_mov_b16">;
-defm V_NOT_B16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x069, "v_not_b16">;
-defm V_CVT_I32_I16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06a, "v_cvt_i32_i16">;
-defm V_CVT_U32_U16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06b, "v_cvt_u32_u16">;
+defm V_NOT_B16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x069, "v_not_b16">;
+defm V_CVT_I32_I16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06a, "v_cvt_i32_i16">;
+defm V_CVT_U32_U16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06b, "v_cvt_u32_u16">;
defm V_CVT_F16_U16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x050, "v_cvt_f16_u16">;
defm V_CVT_F16_I16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x051, "v_cvt_f16_i16">;
defm V_CVT_U16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x052, "v_cvt_u16_f16">;
defm V_CVT_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x053, "v_cvt_i16_f16">;
-defm V_RCP_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x054, "v_rcp_f16">;
-defm V_SQRT_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x055, "v_sqrt_f16">;
-defm V_RSQ_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x056, "v_rsq_f16">;
-defm V_LOG_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x057, "v_log_f16">;
-defm V_EXP_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x058, "v_exp_f16">;
-defm V_FREXP_MANT_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x059, "v_frexp_mant_f16">;
+defm V_RCP_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x054, "v_rcp_f16">;
+defm V_SQRT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x055, "v_sqrt_f16">;
+defm V_RSQ_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x056, "v_rsq_f16">;
+defm V_LOG_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x057, "v_log_f16">;
+defm V_EXP_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x058, "v_exp_f16">;
+defm V_FREXP_MANT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x059, "v_frexp_mant_f16">;
defm V_FREXP_EXP_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05a, "v_frexp_exp_i16_f16">;
-defm V_FLOOR_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">;
+defm V_FLOOR_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">;
defm V_CEIL_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05c, "v_ceil_f16">;
-defm V_TRUNC_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05d, "v_trunc_f16">;
-defm V_RNDNE_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05e, "v_rndne_f16">;
-defm V_FRACT_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05f, "v_fract_f16">;
-defm V_SIN_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x060, "v_sin_f16">;
-defm V_COS_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x061, "v_cos_f16">;
-defm V_SAT_PK_U8_I16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x062, "v_sat_pk_u8_i16">;
+defm V_CEIL_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05c, "v_ceil_f16">;
+defm V_TRUNC_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05d, "v_trunc_f16">;
+defm V_RNDNE_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05e, "v_rndne_f16">;
+defm V_FRACT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05f, "v_fract_f16">;
+defm V_SIN_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x060, "v_sin_f16">;
+defm V_COS_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x061, "v_cos_f16">;
+defm V_SAT_PK_U8_I16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x062, "v_sat_pk_u8_i16">;
defm V_CVT_NORM_I16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x063, "v_cvt_norm_i16_f16">;
defm V_CVT_NORM_U16_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x064, "v_cvt_norm_u16_f16">;
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index ecee61daa1c8..48d4e259bc1c 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -111,8 +111,8 @@ class VOP2_Real <VOP2_Pseudo ps, int EncodingFamily, string real_name = ps.Mnemo
class VOP2_Real_Gen <VOP2_Pseudo ps, GFXGen Gen, string real_name = ps.Mnemonic> :
VOP2_Real <ps, Gen.Subtarget, real_name> {
- let AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts,
- Gen.AssemblerPredicate);
+ let AssemblerPredicate = Gen.AssemblerPredicate;
+ let OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], []);
let DecoderNamespace = Gen.DecoderNamespace#
!if(ps.Pfl.IsRealTrue16, "", "_FAKE16");
}
@@ -437,7 +437,7 @@ class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, v
let InsDPP16 = !con(InsDPP, (ins FI:$fi));
let InsVOP3Base = getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP, RegisterOperand<VGPR_32>, 3,
0, HasModifiers, HasModifiers, HasOMod,
- Src0ModVOP3DPP, Src1ModVOP3DPP, Src2Mod, HasOpSel, 0/*IsVOP3P*/>.ret;
+ Src0ModVOP3DPP, Src1ModVOP3DPP, Src2Mod, HasOpSel>.ret;
// We need a dummy src2 tied to dst to track the use of that register for s_delay_alu
let InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X, VGPRSrc_32:$src2X);
let InsVOPDXDeferred =
@@ -1275,8 +1275,8 @@ class VOP2_DPP16<bits<6> op, VOP2_DPP_Pseudo ps, int subtarget,
class VOP2_DPP16_Gen<bits<6> op, VOP2_DPP_Pseudo ps, GFXGen Gen,
string opName = ps.OpName, VOPProfile p = ps.Pfl> :
VOP2_DPP16<op, ps, Gen.Subtarget, opName, p> {
- let AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts,
- Gen.AssemblerPredicate);
+ let AssemblerPredicate = Gen.AssemblerPredicate;
+ let OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], []);
let DecoderNamespace = "DPP"#Gen.DecoderNamespace#
!if(ps.Pfl.IsRealTrue16, "", "_FAKE16");
}
@@ -1304,8 +1304,8 @@ class VOP2_DPP8<bits<6> op, VOP2_Pseudo ps,
class VOP2_DPP8_Gen<bits<6> op, VOP2_Pseudo ps, GFXGen Gen,
VOPProfile p = ps.Pfl> :
VOP2_DPP8<op, ps, p> {
- let AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts,
- Gen.AssemblerPredicate);
+ let AssemblerPredicate = Gen.AssemblerPredicate;
+ let OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], []);
let DecoderNamespace = "DPP8"#Gen.DecoderNamespace#
!if(ps.Pfl.IsRealTrue16, "", "_FAKE16");
}
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index fd4626d902ac..c4b9e7063093 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -208,8 +208,8 @@ class VOP3_Real <VOP_Pseudo ps, int EncodingFamily, string asm_name = ps.Mnemoni
class VOP3_Real_Gen <VOP_Pseudo ps, GFXGen Gen, string asm_name = ps.Mnemonic> :
VOP3_Real <ps, Gen.Subtarget, asm_name> {
- let AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts,
- Gen.AssemblerPredicate);
+ let AssemblerPredicate = Gen.AssemblerPredicate;
+ let OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], []);
let DecoderNamespace = Gen.DecoderNamespace#
!if(ps.Pfl.IsRealTrue16, "", "_FAKE16");
}
@@ -1340,8 +1340,8 @@ class VOP3_DPP16<bits<10> op, VOP_DPP_Pseudo ps, int subtarget,
class VOP3_DPP16_Gen<bits<10> op, VOP_DPP_Pseudo ps, GFXGen Gen,
string opName = ps.OpName> :
VOP3_DPP16 <op, ps, Gen.Subtarget, opName> {
- let AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts,
- Gen.AssemblerPredicate);
+ let AssemblerPredicate = Gen.AssemblerPredicate;
+ let OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts], []);
let DecoderNamespace = "DPP"#Gen.DecoderNamespace#
!if(ps.Pfl.IsRealTrue16, "", "_FAKE16");
}
@@ -1470,9 +1470,8 @@ multiclass VOP3_Real_dpp8_with_name<GFXGen Gen, bits<10> op, string opName,
let AsmString = asmName # ps.Pfl.AsmVOP3DPP8,
DecoderNamespace = "DPP8"#Gen.DecoderNamespace#
!if(ps.Pfl.IsRealTrue16, "", "_FAKE16"),
- AssemblerPredicate = !if(ps.Pfl.IsRealTrue16, UseRealTrue16Insts,
- Gen.AssemblerPredicate) in {
-
+ OtherPredicates = !if(ps.Pfl.IsRealTrue16, [UseRealTrue16Insts],
+ [TruePredicate]) in {
defm NAME : VOP3_Real_dpp8_Base<Gen, op, opName>;
}
}
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 568085bd0ab3..f8a281032c77 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -9577,8 +9577,7 @@ static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) {
SmallVector<SDValue, 8> Ops;
SDLoc dl(N);
for (unsigned i = 0; i != NumElts; ++i) {
- ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
- const APInt &CInt = C->getAPIntValue();
+ const APInt &CInt = N->getConstantOperandAPInt(i);
// Element types smaller than 32 bits are not legal, so use i32 elements.
// The values are implicitly truncated so sext vs. zext doesn't matter.
Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
@@ -18080,8 +18079,7 @@ SDValue ARMTargetLowering::PerformCMOVToBFICombine(SDNode *CMOV, SelectionDAG &D
SDValue Op0 = CMOV->getOperand(0);
SDValue Op1 = CMOV->getOperand(1);
- auto CCNode = cast<ConstantSDNode>(CMOV->getOperand(2));
- auto CC = CCNode->getAPIntValue().getLimitedValue();
+ auto CC = CMOV->getConstantOperandAPInt(2).getLimitedValue();
SDValue CmpZ = CMOV->getOperand(4);
// The compare must be against zero.
@@ -20109,8 +20107,7 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
// The operand to BFI is already a mask suitable for removing the bits it
// sets.
- ConstantSDNode *CI = cast<ConstantSDNode>(Op.getOperand(2));
- const APInt &Mask = CI->getAPIntValue();
+ const APInt &Mask = Op.getConstantOperandAPInt(2);
Known.Zero &= Mask;
Known.One &= Mask;
return;
diff --git a/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp b/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
index e68904863cfc..fc066f001316 100644
--- a/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -1149,15 +1149,10 @@ SDValue MSP430TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
// but they are different from CMP.
// FIXME: since we're doing a post-processing, use a pseudoinstr here, so
// lowering & isel wouldn't diverge.
- bool andCC = false;
- if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
- if (RHSC->isZero() && LHS.hasOneUse() &&
- (LHS.getOpcode() == ISD::AND ||
- (LHS.getOpcode() == ISD::TRUNCATE &&
- LHS.getOperand(0).getOpcode() == ISD::AND))) {
- andCC = true;
- }
- }
+ bool andCC = isNullConstant(RHS) && LHS.hasOneUse() &&
+ (LHS.getOpcode() == ISD::AND ||
+ (LHS.getOpcode() == ISD::TRUNCATE &&
+ LHS.getOperand(0).getOpcode() == ISD::AND));
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
SDValue TargetCC;
SDValue Flag = EmitCMP(LHS, RHS, TargetCC, CC, dl, DAG);
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index c65090d915ef..34c5569b8076 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -2019,9 +2019,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
DL, RetTy, Args, Outs, retAlignment,
HasVAArgs
? std::optional<std::pair<unsigned, const APInt &>>(std::make_pair(
- CLI.NumFixedArgs,
- cast<ConstantSDNode>(VADeclareParam->getOperand(1))
- ->getAPIntValue()))
+ CLI.NumFixedArgs, VADeclareParam->getConstantOperandAPInt(1)))
: std::nullopt,
*CB, UniqueCallSite);
const char *ProtoStr = nvTM->getStrPool().save(Proto).data();
@@ -2297,7 +2295,7 @@ SDValue NVPTXTargetLowering::LowerBUILD_VECTOR(SDValue Op,
if (VT == MVT::v2f16 || VT == MVT::v2bf16)
Value = cast<ConstantFPSDNode>(Operand)->getValueAPF().bitcastToAPInt();
else if (VT == MVT::v2i16 || VT == MVT::v4i8)
- Value = cast<ConstantSDNode>(Operand)->getAPIntValue();
+ Value = Operand->getAsAPIntVal();
else
llvm_unreachable("Unsupported type");
// i8 values are carried around as i16, so we need to zero out upper bits,
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 13665985f52e..e1cced327544 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -164,6 +164,9 @@ def True : Predicate<"true">;
class hasPTX<int version>: Predicate<"Subtarget->getPTXVersion() >= " # version>;
class hasSM<int version>: Predicate<"Subtarget->getSmVersion() >= " # version>;
+// Explicit records for arch-accelerated SM versions
+def hasSM90a : Predicate<"Subtarget->getFullSmVersion() == 901">;
+
// non-sync shfl instructions are not available on sm_70+ in PTX6.4+
def hasSHFL : Predicate<"!(Subtarget->getSmVersion() >= 70"
"&& Subtarget->getPTXVersion() >= 64)">;
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 85eae44f349a..6b062a7f3912 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -6727,3 +6727,16 @@ def is_explicit_cluster: NVPTXInst<(outs Int1Regs:$d), (ins),
"mov.pred\t$d, %is_explicit_cluster;",
[(set Int1Regs:$d, (int_nvvm_is_explicit_cluster))]>,
Requires<[hasSM<90>, hasPTX<78>]>;
+
+// setmaxnreg inc/dec intrinsics
+let isConvergent = true in {
+multiclass SET_MAXNREG<string Action, Intrinsic Intr> {
+ def : NVPTXInst<(outs), (ins i32imm:$reg_count),
+ "setmaxnreg." # Action # ".sync.aligned.u32 $reg_count;",
+ [(Intr timm:$reg_count)]>,
+ Requires<[hasSM90a, hasPTX<80>]>;
+}
+
+defm INT_SET_MAXNREG_INC : SET_MAXNREG<"inc", int_nvvm_setmaxnreg_inc_sync_aligned_u32>;
+defm INT_SET_MAXNREG_DEC : SET_MAXNREG<"dec", int_nvvm_setmaxnreg_dec_sync_aligned_u32>;
+} // isConvergent
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 235df1880b37..4e164fda1d8d 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -16241,7 +16241,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
// Since we are doing this pre-legalize, the RHS can be a constant of
// arbitrary bitwidth which may cause issues when trying to get the value
// from the underlying APInt.
- auto RHSAPInt = cast<ConstantSDNode>(RHS)->getAPIntValue();
+ auto RHSAPInt = RHS->getAsAPIntVal();
if (!RHSAPInt.isIntN(64))
break;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index b1601739fd45..bf756e39bd5d 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -1909,7 +1909,7 @@ def STWAT : X_RD5_RS5_IM5<31, 710, (outs), (ins gprc:$RST, gprc:$RA, u5imm:$RB),
"stwat $RST, $RA, $RB", IIC_LdStStore>,
Requires<[IsISA3_0]>;
-let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in
+let isTrap = 1, hasCtrlDep = 1 in
def TRAP : XForm_24<31, 4, (outs), (ins), "trap", IIC_LdStLoad, [(trap)]>;
def TWI : DForm_base<3, (outs), (ins u5imm:$RST, gprc:$RA, s16imm:$D, variable_ops),
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index d616aaeddf41..7d42481db57f 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -199,6 +199,8 @@ class RISCVAsmParser : public MCTargetAsmParser {
ParseStatus parseInsnDirectiveOpcode(OperandVector &Operands);
ParseStatus parseInsnCDirectiveOpcode(OperandVector &Operands);
ParseStatus parseGPRAsFPR(OperandVector &Operands);
+ template <bool IsRV64Inst> ParseStatus parseGPRPair(OperandVector &Operands);
+ ParseStatus parseGPRPair(OperandVector &Operands, bool IsRV64Inst);
ParseStatus parseFRMArg(OperandVector &Operands);
ParseStatus parseFenceArg(OperandVector &Operands);
ParseStatus parseReglist(OperandVector &Operands);
@@ -466,6 +468,12 @@ public:
bool isGPRAsFPR() const { return isGPR() && Reg.IsGPRAsFPR; }
+ bool isGPRPair() const {
+ return Kind == KindTy::Register &&
+ RISCVMCRegisterClasses[RISCV::GPRPairRegClassID].contains(
+ Reg.RegNum);
+ }
+
static bool evaluateConstantImm(const MCExpr *Expr, int64_t &Imm,
RISCVMCExpr::VariantKind &VK) {
if (auto *RE = dyn_cast<RISCVMCExpr>(Expr)) {
@@ -1295,11 +1303,15 @@ unsigned RISCVAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
for (unsigned I = 0; I < MCID.NumOperands; ++I) {
- if (MCID.operands()[I].RegClass == RISCV::GPRPF64RegClassID) {
+ if (MCID.operands()[I].RegClass == RISCV::GPRPairRegClassID) {
const auto &Op = Inst.getOperand(I);
assert(Op.isReg());
MCRegister Reg = Op.getReg();
+ if (RISCVMCRegisterClasses[RISCV::GPRPairRegClassID].contains(Reg))
+ continue;
+
+ // FIXME: We should form a paired register during parsing/matching.
if (((Reg.id() - RISCV::X0) & 1) != 0)
return Match_RequiresEvenGPRs;
}
@@ -2222,6 +2234,48 @@ ParseStatus RISCVAsmParser::parseGPRAsFPR(OperandVector &Operands) {
return ParseStatus::Success;
}
+template <bool IsRV64>
+ParseStatus RISCVAsmParser::parseGPRPair(OperandVector &Operands) {
+ return parseGPRPair(Operands, IsRV64);
+}
+
+ParseStatus RISCVAsmParser::parseGPRPair(OperandVector &Operands,
+ bool IsRV64Inst) {
+ // If this is not an RV64 GPRPair instruction, don't parse as a GPRPair on
+ // RV64 as it will prevent matching the RV64 version of the same instruction
+ // that doesn't use a GPRPair.
+ // If this is an RV64 GPRPair instruction, there is no RV32 version so we can
+ // still parse as a pair.
+ if (!IsRV64Inst && isRV64())
+ return ParseStatus::NoMatch;
+
+ if (getLexer().isNot(AsmToken::Identifier))
+ return ParseStatus::NoMatch;
+
+ StringRef Name = getLexer().getTok().getIdentifier();
+ MCRegister RegNo = matchRegisterNameHelper(isRVE(), Name);
+
+ if (!RegNo)
+ return ParseStatus::NoMatch;
+
+ if (!RISCVMCRegisterClasses[RISCV::GPRRegClassID].contains(RegNo))
+ return ParseStatus::NoMatch;
+
+ if ((RegNo - RISCV::X0) & 1)
+ return TokError("register must be even");
+
+ SMLoc S = getLoc();
+ SMLoc E = SMLoc::getFromPointer(S.getPointer() + Name.size());
+ getLexer().Lex();
+
+ const MCRegisterInfo *RI = getContext().getRegisterInfo();
+ unsigned Pair = RI->getMatchingSuperReg(
+ RegNo, RISCV::sub_gpr_even,
+ &RISCVMCRegisterClasses[RISCV::GPRPairRegClassID]);
+ Operands.push_back(RISCVOperand::createReg(Pair, S, E));
+ return ParseStatus::Success;
+}
+
ParseStatus RISCVAsmParser::parseFRMArg(OperandVector &Operands) {
if (getLexer().isNot(AsmToken::Identifier))
return TokError(
@@ -3335,27 +3389,6 @@ bool RISCVAsmParser::validateInstruction(MCInst &Inst,
return Error(Loc, "Operand must be constant 4.");
}
- bool IsAMOCAS_D = Opcode == RISCV::AMOCAS_D || Opcode == RISCV::AMOCAS_D_AQ ||
- Opcode == RISCV::AMOCAS_D_RL ||
- Opcode == RISCV::AMOCAS_D_AQ_RL;
- bool IsAMOCAS_Q = Opcode == RISCV::AMOCAS_Q || Opcode == RISCV::AMOCAS_Q_AQ ||
- Opcode == RISCV::AMOCAS_Q_RL ||
- Opcode == RISCV::AMOCAS_Q_AQ_RL;
- if ((!isRV64() && IsAMOCAS_D) || IsAMOCAS_Q) {
- unsigned Rd = Inst.getOperand(0).getReg();
- unsigned Rs2 = Inst.getOperand(2).getReg();
- assert(Rd >= RISCV::X0 && Rd <= RISCV::X31);
- if ((Rd - RISCV::X0) % 2 != 0) {
- SMLoc Loc = Operands[1]->getStartLoc();
- return Error(Loc, "The destination register must be even.");
- }
- assert(Rs2 >= RISCV::X0 && Rs2 <= RISCV::X31);
- if ((Rs2 - RISCV::X0) % 2 != 0) {
- SMLoc Loc = Operands[2]->getStartLoc();
- return Error(Loc, "The source register must be even.");
- }
- }
-
const MCInstrDesc &MCID = MII.get(Opcode);
if (!(MCID.TSFlags & RISCVII::ConstraintMask))
return false;
diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
index ed80da14c795..4dd039159e29 100644
--- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
+++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
@@ -171,7 +171,7 @@ static DecodeStatus DecodeGPRCRegisterClass(MCInst &Inst, uint32_t RegNo,
return MCDisassembler::Success;
}
-static DecodeStatus DecodeGPRPF64RegisterClass(MCInst &Inst, uint32_t RegNo,
+static DecodeStatus DecodeGPRPairRegisterClass(MCInst &Inst, uint32_t RegNo,
uint64_t Address,
const MCDisassembler *Decoder) {
if (RegNo >= 32 || RegNo & 1)
@@ -546,6 +546,10 @@ DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
!STI.hasFeature(RISCV::Feature64Bit),
DecoderTableRV32Zdinx32,
"RV32Zdinx table (Double in Integer and rv32)");
+ TRY_TO_DECODE(STI.hasFeature(RISCV::FeatureStdExtZacas) &&
+ !STI.hasFeature(RISCV::Feature64Bit),
+ DecoderTableRV32Zacas32,
+ "RV32Zacas table (Compare-And-Swap and rv32)");
TRY_TO_DECODE_FEATURE(RISCV::FeatureStdExtZfinx, DecoderTableRVZfinx32,
"RVZfinx table (Float in Integer)");
TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXVentanaCondOps,
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index ab8070772fe5..ae02e86baf6e 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -47,10 +47,50 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
const LLT s32 = LLT::scalar(32);
const LLT s64 = LLT::scalar(64);
+ const LLT nxv1s8 = LLT::scalable_vector(1, s8);
+ const LLT nxv2s8 = LLT::scalable_vector(2, s8);
+ const LLT nxv4s8 = LLT::scalable_vector(4, s8);
+ const LLT nxv8s8 = LLT::scalable_vector(8, s8);
+ const LLT nxv16s8 = LLT::scalable_vector(16, s8);
+ const LLT nxv32s8 = LLT::scalable_vector(32, s8);
+ const LLT nxv64s8 = LLT::scalable_vector(64, s8);
+
+ const LLT nxv1s16 = LLT::scalable_vector(1, s16);
+ const LLT nxv2s16 = LLT::scalable_vector(2, s16);
+ const LLT nxv4s16 = LLT::scalable_vector(4, s16);
+ const LLT nxv8s16 = LLT::scalable_vector(8, s16);
+ const LLT nxv16s16 = LLT::scalable_vector(16, s16);
+ const LLT nxv32s16 = LLT::scalable_vector(32, s16);
+
+ const LLT nxv1s32 = LLT::scalable_vector(1, s32);
+ const LLT nxv2s32 = LLT::scalable_vector(2, s32);
+ const LLT nxv4s32 = LLT::scalable_vector(4, s32);
+ const LLT nxv8s32 = LLT::scalable_vector(8, s32);
+ const LLT nxv16s32 = LLT::scalable_vector(16, s32);
+
+ const LLT nxv1s64 = LLT::scalable_vector(1, s64);
+ const LLT nxv2s64 = LLT::scalable_vector(2, s64);
+ const LLT nxv4s64 = LLT::scalable_vector(4, s64);
+ const LLT nxv8s64 = LLT::scalable_vector(8, s64);
+
using namespace TargetOpcode;
+ auto AllVecTys = {nxv1s8, nxv2s8, nxv4s8, nxv8s8, nxv16s8, nxv32s8,
+ nxv64s8, nxv1s16, nxv2s16, nxv4s16, nxv8s16, nxv16s16,
+ nxv32s16, nxv1s32, nxv2s32, nxv4s32, nxv8s32, nxv16s32,
+ nxv1s64, nxv2s64, nxv4s64, nxv8s64};
+
getActionDefinitionsBuilder({G_ADD, G_SUB, G_AND, G_OR, G_XOR})
.legalFor({s32, sXLen})
+ .legalIf(all(
+ typeInSet(0, AllVecTys),
+ LegalityPredicate([=, &ST](const LegalityQuery &Query) {
+ return ST.hasVInstructions() &&
+ (Query.Types[0].getScalarSizeInBits() != 64 ||
+ ST.hasVInstructionsI64()) &&
+ (Query.Types[0].getElementCount().getKnownMinValue() != 1 ||
+ ST.getELen() == 64);
+ })))
.widenScalarToNextPow2(0)
.clampScalar(0, s32, sXLen);
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
index 0799267eaf7c..76e5b3ed4025 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
@@ -106,6 +106,8 @@ unsigned RISCVELFObjectWriter::getRelocType(MCContext &Ctx,
if (Expr->getKind() == MCExpr::Target &&
cast<RISCVMCExpr>(Expr)->getKind() == RISCVMCExpr::VK_RISCV_32_PCREL)
return ELF::R_RISCV_32_PCREL;
+ if (Target.getSymA()->getKind() == MCSymbolRefExpr::VK_GOTPCREL)
+ return ELF::R_RISCV_GOT32_PCREL;
return ELF::R_RISCV_32;
case FK_Data_8:
return ELF::R_RISCV_64;
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
index 9db5148208b3..961b8f0afe22 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
@@ -37,6 +37,13 @@ RISCVTargetELFStreamer::RISCVTargetELFStreamer(MCStreamer &S,
auto &MAB = static_cast<RISCVAsmBackend &>(MCA.getBackend());
setTargetABI(RISCVABI::computeTargetABI(STI.getTargetTriple(), Features,
MAB.getTargetOptions().getABIName()));
+ // `j label` in `.option norelax; j label; .option relax; ...; label:` needs a
+ // relocation to ensure the jump target is correct after linking. This is due
+ // to a limitation that shouldForceRelocation has to make the decision upfront
+ // without knowing a possibly future .option relax. When RISCVAsmParser is used,
+ // its ParseInstruction may call setForceRelocs as well.
+ if (STI.hasFeature(RISCV::FeatureRelax))
+ static_cast<RISCVAsmBackend &>(MAB).setForceRelocs();
}
RISCVELFStreamer &RISCVTargetELFStreamer::getStreamer() {
diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index 103a2e2da7b9..ed2b1ceb7d6f 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -308,8 +308,10 @@ bool RISCVExpandPseudo::expandRV32ZdinxStore(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) {
DebugLoc DL = MBBI->getDebugLoc();
const TargetRegisterInfo *TRI = STI->getRegisterInfo();
- Register Lo = TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_32);
- Register Hi = TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_32_hi);
+ Register Lo =
+ TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_gpr_even);
+ Register Hi =
+ TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_gpr_odd);
BuildMI(MBB, MBBI, DL, TII->get(RISCV::SW))
.addReg(Lo, getKillRegState(MBBI->getOperand(0).isKill()))
.addReg(MBBI->getOperand(1).getReg())
@@ -342,8 +344,10 @@ bool RISCVExpandPseudo::expandRV32ZdinxLoad(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) {
DebugLoc DL = MBBI->getDebugLoc();
const TargetRegisterInfo *TRI = STI->getRegisterInfo();
- Register Lo = TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_32);
- Register Hi = TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_32_hi);
+ Register Lo =
+ TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_gpr_even);
+ Register Hi =
+ TRI->getSubReg(MBBI->getOperand(0).getReg(), RISCV::sub_gpr_odd);
// If the register of operand 1 is equal to the Lo register, then swap the
// order of loading the Lo and Hi statements.
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index bb7a3291085d..279509575bb5 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -736,6 +736,7 @@ def FeatureStdExtZacas
def HasStdExtZacas : Predicate<"Subtarget->hasStdExtZacas()">,
AssemblerPredicate<(all_of FeatureStdExtZacas),
"'Zacas' (Atomic Compare-And-Swap Instructions)">;
+def NoStdExtZacas : Predicate<"!Subtarget->hasStdExtZacas()">;
//===----------------------------------------------------------------------===//
// Vendor extensions
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 0a1a466af591..cb9ffabc4123 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -138,7 +138,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
if (Subtarget.is64Bit())
addRegisterClass(MVT::f64, &RISCV::GPRRegClass);
else
- addRegisterClass(MVT::f64, &RISCV::GPRPF64RegClass);
+ addRegisterClass(MVT::f64, &RISCV::GPRPairRegClass);
}
static const MVT::SimpleValueType BoolVecVTs[] = {
@@ -814,8 +814,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}, VT,
Custom);
setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
- setOperationAction({ISD::AVGFLOORU, ISD::SADDSAT, ISD::UADDSAT,
- ISD::SSUBSAT, ISD::USUBSAT},
+ setOperationAction({ISD::AVGFLOORU, ISD::AVGCEILU, ISD::SADDSAT,
+ ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT},
VT, Legal);
// Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
@@ -1185,8 +1185,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV())
setOperationAction({ISD::MULHS, ISD::MULHU}, VT, Custom);
- setOperationAction({ISD::AVGFLOORU, ISD::SADDSAT, ISD::UADDSAT,
- ISD::SSUBSAT, ISD::USUBSAT},
+ setOperationAction({ISD::AVGFLOORU, ISD::AVGCEILU, ISD::SADDSAT,
+ ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT},
VT, Custom);
setOperationAction(ISD::VSELECT, VT, Custom);
@@ -5466,6 +5466,7 @@ static unsigned getRISCVVLOp(SDValue Op) {
OP_CASE(SSUBSAT)
OP_CASE(USUBSAT)
OP_CASE(AVGFLOORU)
+ OP_CASE(AVGCEILU)
OP_CASE(FADD)
OP_CASE(FSUB)
OP_CASE(FMUL)
@@ -5570,7 +5571,7 @@ static bool hasMergeOp(unsigned Opcode) {
Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE &&
"not a RISC-V target specific op");
static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP ==
- 125 &&
+ 126 &&
RISCVISD::LAST_RISCV_STRICTFP_OPCODE -
ISD::FIRST_TARGET_STRICTFP_OPCODE ==
21 &&
@@ -5596,7 +5597,7 @@ static bool hasMaskOp(unsigned Opcode) {
Opcode <= RISCVISD::LAST_RISCV_STRICTFP_OPCODE &&
"not a RISC-V target specific op");
static_assert(RISCVISD::LAST_VL_VECTOR_OP - RISCVISD::FIRST_VL_VECTOR_OP ==
- 125 &&
+ 126 &&
RISCVISD::LAST_RISCV_STRICTFP_OPCODE -
ISD::FIRST_TARGET_STRICTFP_OPCODE ==
21 &&
@@ -6461,6 +6462,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return SplitVectorOp(Op, DAG);
[[fallthrough]];
case ISD::AVGFLOORU:
+ case ISD::AVGCEILU:
case ISD::SADDSAT:
case ISD::UADDSAT:
case ISD::SSUBSAT:
@@ -7023,8 +7025,7 @@ foldBinOpIntoSelectIfProfitable(SDNode *BO, SelectionDAG &DAG,
if (!NewConstOp)
return SDValue();
- const APInt &NewConstAPInt =
- cast<ConstantSDNode>(NewConstOp)->getAPIntValue();
+ const APInt &NewConstAPInt = NewConstOp->getAsAPIntVal();
if (!NewConstAPInt.isZero() && !NewConstAPInt.isAllOnes())
return SDValue();
@@ -7154,8 +7155,8 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// is SETGE/SETLE to avoid an XORI.
if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
CCVal == ISD::SETLT) {
- const APInt &TrueVal = cast<ConstantSDNode>(TrueV)->getAPIntValue();
- const APInt &FalseVal = cast<ConstantSDNode>(FalseV)->getAPIntValue();
+ const APInt &TrueVal = TrueV->getAsAPIntVal();
+ const APInt &FalseVal = FalseV->getAsAPIntVal();
if (TrueVal - 1 == FalseVal)
return DAG.getNode(ISD::ADD, DL, VT, CondV, FalseV);
if (TrueVal + 1 == FalseVal)
@@ -16345,7 +16346,7 @@ static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
Register SrcReg = MI.getOperand(2).getReg();
const TargetRegisterClass *SrcRC = MI.getOpcode() == RISCV::SplitF64Pseudo_INX
- ? &RISCV::GPRPF64RegClass
+ ? &RISCV::GPRPairRegClass
: &RISCV::FPR64RegClass;
int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
@@ -16384,7 +16385,7 @@ static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
Register HiReg = MI.getOperand(2).getReg();
const TargetRegisterClass *DstRC =
- MI.getOpcode() == RISCV::BuildPairF64Pseudo_INX ? &RISCV::GPRPF64RegClass
+ MI.getOpcode() == RISCV::BuildPairF64Pseudo_INX ? &RISCV::GPRPairRegClass
: &RISCV::FPR64RegClass;
int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
@@ -18596,6 +18597,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(UREM_VL)
NODE_NAME_CASE(XOR_VL)
NODE_NAME_CASE(AVGFLOORU_VL)
+ NODE_NAME_CASE(AVGCEILU_VL)
NODE_NAME_CASE(SADDSAT_VL)
NODE_NAME_CASE(UADDSAT_VL)
NODE_NAME_CASE(SSUBSAT_VL)
@@ -18752,7 +18754,7 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
if (VT == MVT::f32 && Subtarget.hasStdExtZfinx())
return std::make_pair(0U, &RISCV::GPRF32RegClass);
if (VT == MVT::f64 && Subtarget.hasStdExtZdinx() && !Subtarget.is64Bit())
- return std::make_pair(0U, &RISCV::GPRPF64RegClass);
+ return std::make_pair(0U, &RISCV::GPRPairRegClass);
return std::make_pair(0U, &RISCV::GPRNoX0RegClass);
case 'f':
if (Subtarget.hasStdExtZfhmin() && VT == MVT::f16)
@@ -18934,7 +18936,7 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
// Subtarget into account.
if (Res.second == &RISCV::GPRF16RegClass ||
Res.second == &RISCV::GPRF32RegClass ||
- Res.second == &RISCV::GPRPF64RegClass)
+ Res.second == &RISCV::GPRPairRegClass)
return std::make_pair(Res.first, &RISCV::GPRRegClass);
return Res;
@@ -19362,6 +19364,11 @@ bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
return false;
}
+ISD::NodeType RISCVTargetLowering::getExtendForAtomicCmpSwapArg() const {
+ // Zacas will use amocas.w which does not require extension.
+ return Subtarget.hasStdExtZacas() ? ISD::ANY_EXTEND : ISD::SIGN_EXTEND;
+}
+
Register RISCVTargetLowering::getExceptionPointerRegister(
const Constant *PersonalityFn) const {
return RISCV::X10;
@@ -20017,8 +20024,13 @@ unsigned RISCVTargetLowering::getCustomCtpopCost(EVT VT,
}
bool RISCVTargetLowering::fallBackToDAGISel(const Instruction &Inst) const {
- // At the moment, the only scalable instruction GISel knows how to lower is
- // ret with scalable argument.
+
+ // GISel support is in progress or complete for G_ADD, G_SUB, G_AND, G_OR, and
+ // G_XOR.
+ unsigned Op = Inst.getOpcode();
+ if (Op == Instruction::Add || Op == Instruction::Sub ||
+ Op == Instruction::And || Op == Instruction::Or || Op == Instruction::Xor)
+ return false;
if (Inst.getType()->isScalableTy())
return true;
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 5d51fe168b04..c65953e37b17 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -255,6 +255,8 @@ enum NodeType : unsigned {
// Averaging adds of unsigned integers.
AVGFLOORU_VL,
+ // Rounding averaging adds of unsigned integers.
+ AVGCEILU_VL,
MULHS_VL,
MULHU_VL,
@@ -631,9 +633,7 @@ public:
return ISD::SIGN_EXTEND;
}
- ISD::NodeType getExtendForAtomicCmpSwapArg() const override {
- return ISD::SIGN_EXTEND;
- }
+ ISD::NodeType getExtendForAtomicCmpSwapArg() const override;
bool shouldTransformSignedTruncationCheck(EVT XVT,
unsigned KeptBits) const override;
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index e591aa935c0b..6c9e529e4bfb 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -1464,20 +1464,6 @@ static void doUnion(DemandedFields &A, DemandedFields B) {
A.MaskPolicy |= B.MaskPolicy;
}
-static bool isNonZeroAVL(const MachineOperand &MO,
- const MachineRegisterInfo &MRI) {
- if (MO.isReg()) {
- if (MO.getReg() == RISCV::X0)
- return true;
- if (MachineInstr *MI = MRI.getVRegDef(MO.getReg());
- MI && isNonZeroLoadImmediate(*MI))
- return true;
- return false;
- }
- assert(MO.isImm());
- return 0 != MO.getImm();
-}
-
// Return true if we can mutate PrevMI to match MI without changing any the
// fields which would be observed.
static bool canMutatePriorConfig(const MachineInstr &PrevMI,
@@ -1491,21 +1477,26 @@ static bool canMutatePriorConfig(const MachineInstr &PrevMI,
if (Used.VLAny)
return false;
- // We don't bother to handle the equally zero case here as it's largely
- // uninteresting.
if (Used.VLZeroness) {
if (isVLPreservingConfig(PrevMI))
return false;
- if (!isNonZeroAVL(MI.getOperand(1), MRI) ||
- !isNonZeroAVL(PrevMI.getOperand(1), MRI))
+ if (!getInfoForVSETVLI(PrevMI).hasEquallyZeroAVL(getInfoForVSETVLI(MI),
+ MRI))
return false;
}
- // TODO: Track whether the register is defined between
- // PrevMI and MI.
- if (MI.getOperand(1).isReg() &&
- RISCV::X0 != MI.getOperand(1).getReg())
- return false;
+ auto &AVL = MI.getOperand(1);
+ auto &PrevAVL = PrevMI.getOperand(1);
+ assert(MRI.isSSA());
+
+ // If the AVL is a register, we need to make sure MI's AVL dominates PrevMI.
+ // For now just check that PrevMI uses the same virtual register.
+ if (AVL.isReg() && AVL.getReg() != RISCV::X0) {
+ if (AVL.getReg().isPhysical())
+ return false;
+ if (!PrevAVL.isReg() || PrevAVL.getReg() != AVL.getReg())
+ return false;
+ }
}
if (!PrevMI.getOperand(2).isImm() || !MI.getOperand(2).isImm())
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 351f48c1708e..9813c7a70dfc 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -414,15 +414,16 @@ void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
- if (RISCV::GPRPF64RegClass.contains(DstReg, SrcReg)) {
- // Emit an ADDI for both parts of GPRPF64.
+ if (RISCV::GPRPairRegClass.contains(DstReg, SrcReg)) {
+ // Emit an ADDI for both parts of GPRPair.
BuildMI(MBB, MBBI, DL, get(RISCV::ADDI),
- TRI->getSubReg(DstReg, RISCV::sub_32))
- .addReg(TRI->getSubReg(SrcReg, RISCV::sub_32), getKillRegState(KillSrc))
+ TRI->getSubReg(DstReg, RISCV::sub_gpr_even))
+ .addReg(TRI->getSubReg(SrcReg, RISCV::sub_gpr_even),
+ getKillRegState(KillSrc))
.addImm(0);
BuildMI(MBB, MBBI, DL, get(RISCV::ADDI),
- TRI->getSubReg(DstReg, RISCV::sub_32_hi))
- .addReg(TRI->getSubReg(SrcReg, RISCV::sub_32_hi),
+ TRI->getSubReg(DstReg, RISCV::sub_gpr_odd))
+ .addReg(TRI->getSubReg(SrcReg, RISCV::sub_gpr_odd),
getKillRegState(KillSrc))
.addImm(0);
return;
@@ -607,7 +608,7 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ?
RISCV::SW : RISCV::SD;
IsScalableVector = false;
- } else if (RISCV::GPRPF64RegClass.hasSubClassEq(RC)) {
+ } else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) {
Opcode = RISCV::PseudoRV32ZdinxSD;
IsScalableVector = false;
} else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) {
@@ -690,7 +691,7 @@ void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ?
RISCV::LW : RISCV::LD;
IsScalableVector = false;
- } else if (RISCV::GPRPF64RegClass.hasSubClassEq(RC)) {
+ } else if (RISCV::GPRPairRegClass.hasSubClassEq(RC)) {
Opcode = RISCV::PseudoRV32ZdinxLD;
IsScalableVector = false;
} else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
index 4d0567e41abc..44552c00c62e 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
@@ -157,7 +157,16 @@ defm : AMOPat<"atomic_load_min_32", "AMOMIN_W">;
defm : AMOPat<"atomic_load_umax_32", "AMOMAXU_W">;
defm : AMOPat<"atomic_load_umin_32", "AMOMINU_W">;
-let Predicates = [HasStdExtA] in {
+defm : AMOPat<"atomic_swap_64", "AMOSWAP_D", i64, [IsRV64]>;
+defm : AMOPat<"atomic_load_add_64", "AMOADD_D", i64, [IsRV64]>;
+defm : AMOPat<"atomic_load_and_64", "AMOAND_D", i64, [IsRV64]>;
+defm : AMOPat<"atomic_load_or_64", "AMOOR_D", i64, [IsRV64]>;
+defm : AMOPat<"atomic_load_xor_64", "AMOXOR_D", i64, [IsRV64]>;
+defm : AMOPat<"atomic_load_max_64", "AMOMAX_D", i64, [IsRV64]>;
+defm : AMOPat<"atomic_load_min_64", "AMOMIN_D", i64, [IsRV64]>;
+defm : AMOPat<"atomic_load_umax_64", "AMOMAXU_D", i64, [IsRV64]>;
+defm : AMOPat<"atomic_load_umin_64", "AMOMINU_D", i64, [IsRV64]>;
+
/// Pseudo AMOs
@@ -169,21 +178,6 @@ class PseudoAMO : Pseudo<(outs GPR:$res, GPR:$scratch),
let hasSideEffects = 0;
}
-let Size = 20 in
-def PseudoAtomicLoadNand32 : PseudoAMO;
-// Ordering constants must be kept in sync with the AtomicOrdering enum in
-// AtomicOrdering.h.
-def : Pat<(XLenVT (atomic_load_nand_32_monotonic GPR:$addr, GPR:$incr)),
- (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 2)>;
-def : Pat<(XLenVT (atomic_load_nand_32_acquire GPR:$addr, GPR:$incr)),
- (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 4)>;
-def : Pat<(XLenVT (atomic_load_nand_32_release GPR:$addr, GPR:$incr)),
- (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 5)>;
-def : Pat<(XLenVT (atomic_load_nand_32_acq_rel GPR:$addr, GPR:$incr)),
- (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 6)>;
-def : Pat<(XLenVT (atomic_load_nand_32_seq_cst GPR:$addr, GPR:$incr)),
- (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 7)>;
-
class PseudoMaskedAMO
: Pseudo<(outs GPR:$res, GPR:$scratch),
(ins GPR:$addr, GPR:$incr, GPR:$mask, ixlenimm:$ordering), []> {
@@ -224,6 +218,23 @@ class PseudoMaskedAMOMinMaxPat<Intrinsic intrin, Pseudo AMOInst>
(AMOInst GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt,
timm:$ordering)>;
+let Predicates = [HasStdExtA] in {
+
+let Size = 20 in
+def PseudoAtomicLoadNand32 : PseudoAMO;
+// Ordering constants must be kept in sync with the AtomicOrdering enum in
+// AtomicOrdering.h.
+def : Pat<(XLenVT (atomic_load_nand_32_monotonic GPR:$addr, GPR:$incr)),
+ (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 2)>;
+def : Pat<(XLenVT (atomic_load_nand_32_acquire GPR:$addr, GPR:$incr)),
+ (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 4)>;
+def : Pat<(XLenVT (atomic_load_nand_32_release GPR:$addr, GPR:$incr)),
+ (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 5)>;
+def : Pat<(XLenVT (atomic_load_nand_32_acq_rel GPR:$addr, GPR:$incr)),
+ (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 6)>;
+def : Pat<(XLenVT (atomic_load_nand_32_seq_cst GPR:$addr, GPR:$incr)),
+ (PseudoAtomicLoadNand32 GPR:$addr, GPR:$incr, 7)>;
+
let Size = 28 in
def PseudoMaskedAtomicSwap32 : PseudoMaskedAMO;
def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_xchg_i32,
@@ -256,6 +267,43 @@ let Size = 36 in
def PseudoMaskedAtomicLoadUMin32 : PseudoMaskedAMOUMinUMax;
def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umin_i32,
PseudoMaskedAtomicLoadUMin32>;
+} // Predicates = [HasStdExtA]
+
+let Predicates = [HasStdExtA, IsRV64] in {
+
+let Size = 20 in
+def PseudoAtomicLoadNand64 : PseudoAMO;
+// Ordering constants must be kept in sync with the AtomicOrdering enum in
+// AtomicOrdering.h.
+def : Pat<(i64 (atomic_load_nand_64_monotonic GPR:$addr, GPR:$incr)),
+ (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 2)>;
+def : Pat<(i64 (atomic_load_nand_64_acquire GPR:$addr, GPR:$incr)),
+ (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 4)>;
+def : Pat<(i64 (atomic_load_nand_64_release GPR:$addr, GPR:$incr)),
+ (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 5)>;
+def : Pat<(i64 (atomic_load_nand_64_acq_rel GPR:$addr, GPR:$incr)),
+ (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 6)>;
+def : Pat<(i64 (atomic_load_nand_64_seq_cst GPR:$addr, GPR:$incr)),
+ (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 7)>;
+
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_xchg_i64,
+ PseudoMaskedAtomicSwap32>;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_add_i64,
+ PseudoMaskedAtomicLoadAdd32>;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_sub_i64,
+ PseudoMaskedAtomicLoadSub32>;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_nand_i64,
+ PseudoMaskedAtomicLoadNand32>;
+def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_max_i64,
+ PseudoMaskedAtomicLoadMax32>;
+def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_min_i64,
+ PseudoMaskedAtomicLoadMin32>;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umax_i64,
+ PseudoMaskedAtomicLoadUMax32>;
+def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umin_i64,
+ PseudoMaskedAtomicLoadUMin32>;
+} // Predicates = [HasStdExtA, IsRV64]
+
/// Compare and exchange
@@ -285,9 +333,17 @@ multiclass PseudoCmpXchgPat<string Op, Pseudo CmpXchgInst,
(CmpXchgInst GPR:$addr, GPR:$cmp, GPR:$new, 7)>;
}
+let Predicates = [HasStdExtA, NoStdExtZacas] in {
def PseudoCmpXchg32 : PseudoCmpXchg;
defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoCmpXchg32>;
+}
+
+let Predicates = [HasStdExtA, NoStdExtZacas, IsRV64] in {
+def PseudoCmpXchg64 : PseudoCmpXchg;
+defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoCmpXchg64, i64>;
+}
+let Predicates = [HasStdExtA] in {
def PseudoMaskedCmpXchg32
: Pseudo<(outs GPR:$res, GPR:$scratch),
(ins GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask,
@@ -303,60 +359,9 @@ def : Pat<(int_riscv_masked_cmpxchg_i32
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering),
(PseudoMaskedCmpXchg32
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering)>;
-
} // Predicates = [HasStdExtA]
-defm : AMOPat<"atomic_swap_64", "AMOSWAP_D", i64, [IsRV64]>;
-defm : AMOPat<"atomic_load_add_64", "AMOADD_D", i64, [IsRV64]>;
-defm : AMOPat<"atomic_load_and_64", "AMOAND_D", i64, [IsRV64]>;
-defm : AMOPat<"atomic_load_or_64", "AMOOR_D", i64, [IsRV64]>;
-defm : AMOPat<"atomic_load_xor_64", "AMOXOR_D", i64, [IsRV64]>;
-defm : AMOPat<"atomic_load_max_64", "AMOMAX_D", i64, [IsRV64]>;
-defm : AMOPat<"atomic_load_min_64", "AMOMIN_D", i64, [IsRV64]>;
-defm : AMOPat<"atomic_load_umax_64", "AMOMAXU_D", i64, [IsRV64]>;
-defm : AMOPat<"atomic_load_umin_64", "AMOMINU_D", i64, [IsRV64]>;
-
let Predicates = [HasStdExtA, IsRV64] in {
-
-/// 64-bit pseudo AMOs
-
-let Size = 20 in
-def PseudoAtomicLoadNand64 : PseudoAMO;
-// Ordering constants must be kept in sync with the AtomicOrdering enum in
-// AtomicOrdering.h.
-def : Pat<(i64 (atomic_load_nand_64_monotonic GPR:$addr, GPR:$incr)),
- (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 2)>;
-def : Pat<(i64 (atomic_load_nand_64_acquire GPR:$addr, GPR:$incr)),
- (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 4)>;
-def : Pat<(i64 (atomic_load_nand_64_release GPR:$addr, GPR:$incr)),
- (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 5)>;
-def : Pat<(i64 (atomic_load_nand_64_acq_rel GPR:$addr, GPR:$incr)),
- (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 6)>;
-def : Pat<(i64 (atomic_load_nand_64_seq_cst GPR:$addr, GPR:$incr)),
- (PseudoAtomicLoadNand64 GPR:$addr, GPR:$incr, 7)>;
-
-def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_xchg_i64,
- PseudoMaskedAtomicSwap32>;
-def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_add_i64,
- PseudoMaskedAtomicLoadAdd32>;
-def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_sub_i64,
- PseudoMaskedAtomicLoadSub32>;
-def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_nand_i64,
- PseudoMaskedAtomicLoadNand32>;
-def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_max_i64,
- PseudoMaskedAtomicLoadMax32>;
-def : PseudoMaskedAMOMinMaxPat<int_riscv_masked_atomicrmw_min_i64,
- PseudoMaskedAtomicLoadMin32>;
-def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umax_i64,
- PseudoMaskedAtomicLoadUMax32>;
-def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_umin_i64,
- PseudoMaskedAtomicLoadUMin32>;
-
-/// 64-bit compare and exchange
-
-def PseudoCmpXchg64 : PseudoCmpXchg;
-defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoCmpXchg64, i64>;
-
def : Pat<(int_riscv_masked_cmpxchg_i64
GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering),
(PseudoMaskedCmpXchg32
@@ -408,6 +413,7 @@ defm : AMOPat2<"atomic_load_min_32", "AMOMIN_W", i32>;
defm : AMOPat2<"atomic_load_umax_32", "AMOMAXU_W", i32>;
defm : AMOPat2<"atomic_load_umin_32", "AMOMINU_W", i32>;
+let Predicates = [HasStdExtA, IsRV64] in
defm : PseudoCmpXchgPat<"atomic_cmp_swap_32", PseudoCmpXchg32, i32>;
let Predicates = [HasAtomicLdSt] in {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
index 418421b2a556..fec43d814098 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoD.td
@@ -33,8 +33,8 @@ def AddrRegImmINX : ComplexPattern<iPTR, 2, "SelectAddrRegImmINX">;
// Zdinx
-def GPRPF64AsFPR : AsmOperandClass {
- let Name = "GPRPF64AsFPR";
+def GPRPairAsFPR : AsmOperandClass {
+ let Name = "GPRPairAsFPR";
let ParserMethod = "parseGPRAsFPR";
let PredicateMethod = "isGPRAsFPR";
let RenderMethod = "addRegOperands";
@@ -52,8 +52,8 @@ def FPR64INX : RegisterOperand<GPR> {
let DecoderMethod = "DecodeGPRRegisterClass";
}
-def FPR64IN32X : RegisterOperand<GPRPF64> {
- let ParserMatchClass = GPRPF64AsFPR;
+def FPR64IN32X : RegisterOperand<GPRPair> {
+ let ParserMatchClass = GPRPairAsFPR;
}
def DExt : ExtInfo<"", "", [HasStdExtD], f64, FPR64, FPR32, FPR64, ?>;
@@ -515,15 +515,15 @@ def PseudoFROUND_D_IN32X : PseudoFROUND<FPR64IN32X, f64>;
/// Loads
let isCall = 0, mayLoad = 1, mayStore = 0, Size = 8, isCodeGenOnly = 1 in
-def PseudoRV32ZdinxLD : Pseudo<(outs GPRPF64:$dst), (ins GPR:$rs1, simm12:$imm12), []>;
+def PseudoRV32ZdinxLD : Pseudo<(outs GPRPair:$dst), (ins GPR:$rs1, simm12:$imm12), []>;
def : Pat<(f64 (load (AddrRegImmINX (XLenVT GPR:$rs1), simm12:$imm12))),
(PseudoRV32ZdinxLD GPR:$rs1, simm12:$imm12)>;
/// Stores
let isCall = 0, mayLoad = 0, mayStore = 1, Size = 8, isCodeGenOnly = 1 in
-def PseudoRV32ZdinxSD : Pseudo<(outs), (ins GPRPF64:$rs2, GPRNoX0:$rs1, simm12:$imm12), []>;
-def : Pat<(store (f64 GPRPF64:$rs2), (AddrRegImmINX (XLenVT GPR:$rs1), simm12:$imm12)),
- (PseudoRV32ZdinxSD GPRPF64:$rs2, GPR:$rs1, simm12:$imm12)>;
+def PseudoRV32ZdinxSD : Pseudo<(outs), (ins GPRPair:$rs2, GPRNoX0:$rs1, simm12:$imm12), []>;
+def : Pat<(store (f64 GPRPair:$rs2), (AddrRegImmINX (XLenVT GPR:$rs1), simm12:$imm12)),
+ (PseudoRV32ZdinxSD GPRPair:$rs2, GPR:$rs1, simm12:$imm12)>;
/// Pseudo-instructions needed for the soft-float ABI with RV32D
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index 4f87c36506e5..8ebd8b89c119 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -877,6 +877,23 @@ multiclass VPatMultiplyAddSDNode_VV_VX<SDNode op, string instruction_name> {
}
}
+multiclass VPatAVGADD_VV_VX_RM<SDNode vop, int vxrm> {
+ foreach vti = AllIntegerVectors in {
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : Pat<(vop (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector vti.RegClass:$rs2)),
+ (!cast<Instruction>("PseudoVAADDU_VV_"#vti.LMul.MX)
+ (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs2,
+ vxrm, vti.AVL, vti.Log2SEW, TA_MA)>;
+ def : Pat<(vop (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector (SplatPat (XLenVT GPR:$rs2)))),
+ (!cast<Instruction>("PseudoVAADDU_VX_"#vti.LMul.MX)
+ (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, GPR:$rs2,
+ vxrm, vti.AVL, vti.Log2SEW, TA_MA)>;
+ }
+ }
+}
+
//===----------------------------------------------------------------------===//
// Patterns.
//===----------------------------------------------------------------------===//
@@ -1132,20 +1149,8 @@ defm : VPatBinarySDNode_VV_VX<ssubsat, "PseudoVSSUB">;
defm : VPatBinarySDNode_VV_VX<usubsat, "PseudoVSSUBU">;
// 12.2. Vector Single-Width Averaging Add and Subtract
-foreach vti = AllIntegerVectors in {
- let Predicates = GetVTypePredicates<vti>.Predicates in {
- def : Pat<(avgflooru (vti.Vector vti.RegClass:$rs1),
- (vti.Vector vti.RegClass:$rs2)),
- (!cast<Instruction>("PseudoVAADDU_VV_"#vti.LMul.MX)
- (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs2,
- 0b10, vti.AVL, vti.Log2SEW, TA_MA)>;
- def : Pat<(avgflooru (vti.Vector vti.RegClass:$rs1),
- (vti.Vector (SplatPat (XLenVT GPR:$rs2)))),
- (!cast<Instruction>("PseudoVAADDU_VX_"#vti.LMul.MX)
- (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, GPR:$rs2,
- 0b10, vti.AVL, vti.Log2SEW, TA_MA)>;
- }
-}
+defm : VPatAVGADD_VV_VX_RM<avgflooru, 0b10>;
+defm : VPatAVGADD_VV_VX_RM<avgceilu, 0b00>;
// 15. Vector Mask Instructions
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index d60ff4b5fab0..1deb9a709463 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -112,6 +112,7 @@ def riscv_cttz_vl : SDNode<"RISCVISD::CTTZ_VL", SDT_RISCVIntUnOp_VL>
def riscv_ctpop_vl : SDNode<"RISCVISD::CTPOP_VL", SDT_RISCVIntUnOp_VL>;
def riscv_avgflooru_vl : SDNode<"RISCVISD::AVGFLOORU_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
+def riscv_avgceilu_vl : SDNode<"RISCVISD::AVGCEILU_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
def riscv_saddsat_vl : SDNode<"RISCVISD::SADDSAT_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
def riscv_uaddsat_vl : SDNode<"RISCVISD::UADDSAT_VL", SDT_RISCVIntBinOp_VL, [SDNPCommutative]>;
def riscv_ssubsat_vl : SDNode<"RISCVISD::SSUBSAT_VL", SDT_RISCVIntBinOp_VL>;
@@ -2031,6 +2032,25 @@ multiclass VPatSlide1VL_VF<SDNode vop, string instruction_name> {
}
}
+multiclass VPatAVGADDVL_VV_VX_RM<SDNode vop, int vxrm> {
+ foreach vti = AllIntegerVectors in {
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : Pat<(vop (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector vti.RegClass:$rs2),
+ vti.RegClass:$merge, (vti.Mask V0), VLOpFrag),
+ (!cast<Instruction>("PseudoVAADDU_VV_"#vti.LMul.MX#"_MASK")
+ vti.RegClass:$merge, vti.RegClass:$rs1, vti.RegClass:$rs2,
+ (vti.Mask V0), vxrm, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ def : Pat<(vop (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector (SplatPat (XLenVT GPR:$rs2))),
+ vti.RegClass:$merge, (vti.Mask V0), VLOpFrag),
+ (!cast<Instruction>("PseudoVAADDU_VX_"#vti.LMul.MX#"_MASK")
+ vti.RegClass:$merge, vti.RegClass:$rs1, GPR:$rs2,
+ (vti.Mask V0), vxrm, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
+ }
+ }
+}
+
//===----------------------------------------------------------------------===//
// Patterns.
//===----------------------------------------------------------------------===//
@@ -2308,22 +2328,8 @@ defm : VPatBinaryVL_VV_VX<riscv_ssubsat_vl, "PseudoVSSUB">;
defm : VPatBinaryVL_VV_VX<riscv_usubsat_vl, "PseudoVSSUBU">;
// 12.2. Vector Single-Width Averaging Add and Subtract
-foreach vti = AllIntegerVectors in {
- let Predicates = GetVTypePredicates<vti>.Predicates in {
- def : Pat<(riscv_avgflooru_vl (vti.Vector vti.RegClass:$rs1),
- (vti.Vector vti.RegClass:$rs2),
- vti.RegClass:$merge, (vti.Mask V0), VLOpFrag),
- (!cast<Instruction>("PseudoVAADDU_VV_"#vti.LMul.MX#"_MASK")
- vti.RegClass:$merge, vti.RegClass:$rs1, vti.RegClass:$rs2,
- (vti.Mask V0), 0b10, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(riscv_avgflooru_vl (vti.Vector vti.RegClass:$rs1),
- (vti.Vector (SplatPat (XLenVT GPR:$rs2))),
- vti.RegClass:$merge, (vti.Mask V0), VLOpFrag),
- (!cast<Instruction>("PseudoVAADDU_VX_"#vti.LMul.MX#"_MASK")
- vti.RegClass:$merge, vti.RegClass:$rs1, GPR:$rs2,
- (vti.Mask V0), 0b10, GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
- }
-}
+defm : VPatAVGADDVL_VV_VX_RM<riscv_avgflooru_vl, 0b10>;
+defm : VPatAVGADDVL_VV_VX_RM<riscv_avgceilu_vl, 0b00>;
// 12.5. Vector Narrowing Fixed-Point Clip Instructions
class VPatTruncSatClipMaxMinBase<string inst,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td
index a09f5715b24f..ffcdd0010749 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZa.td
@@ -17,15 +17,107 @@
// Zacas (Atomic Compare-and-Swap)
//===----------------------------------------------------------------------===//
+def GPRPairRV32Operand : AsmOperandClass {
+ let Name = "GPRPairRV32";
+ let ParserMethod = "parseGPRPair<false>";
+ let PredicateMethod = "isGPRPair";
+ let RenderMethod = "addRegOperands";
+}
+
+def GPRPairRV64Operand : AsmOperandClass {
+ let Name = "GPRPairRV64";
+ let ParserMethod = "parseGPRPair<true>";
+ let PredicateMethod = "isGPRPair";
+ let RenderMethod = "addRegOperands";
+}
+
+def GPRPairRV32 : RegisterOperand<GPRPair> {
+ let ParserMatchClass = GPRPairRV32Operand;
+}
+
+def GPRPairRV64 : RegisterOperand<GPRPair> {
+ let ParserMatchClass = GPRPairRV64Operand;
+}
+
+let hasSideEffects = 0, mayLoad = 1, mayStore = 1, Constraints = "$rd = $rd_wb" in
+class AMO_cas<bits<5> funct5, bit aq, bit rl, bits<3> funct3, string opcodestr,
+ DAGOperand RC>
+ : RVInstRAtomic<funct5, aq, rl, funct3, OPC_AMO,
+ (outs RC:$rd_wb), (ins RC:$rd, GPRMemZeroOffset:$rs1, RC:$rs2),
+ opcodestr, "$rd, $rs2, $rs1">;
+
+multiclass AMO_cas_aq_rl<bits<5> funct5, bits<3> funct3, string opcodestr,
+ DAGOperand RC> {
+ def "" : AMO_cas<funct5, 0, 0, funct3, opcodestr, RC>;
+ def _AQ : AMO_cas<funct5, 1, 0, funct3, opcodestr # ".aq", RC>;
+ def _RL : AMO_cas<funct5, 0, 1, funct3, opcodestr # ".rl", RC>;
+ def _AQ_RL : AMO_cas<funct5, 1, 1, funct3, opcodestr # ".aqrl", RC>;
+}
+
let Predicates = [HasStdExtZacas] in {
-defm AMOCAS_W : AMO_rr_aq_rl<0b00101, 0b010, "amocas.w">;
-defm AMOCAS_D : AMO_rr_aq_rl<0b00101, 0b011, "amocas.d">;
+defm AMOCAS_W : AMO_cas_aq_rl<0b00101, 0b010, "amocas.w", GPR>;
} // Predicates = [HasStdExtZacas]
+let Predicates = [HasStdExtZacas, IsRV32], DecoderNamespace = "RV32Zacas" in {
+defm AMOCAS_D_RV32 : AMO_cas_aq_rl<0b00101, 0b011, "amocas.d", GPRPairRV32>;
+} // Predicates = [HasStdExtZacas, IsRV32]
+
let Predicates = [HasStdExtZacas, IsRV64] in {
-defm AMOCAS_Q : AMO_rr_aq_rl<0b00101, 0b100, "amocas.q">;
+defm AMOCAS_D_RV64 : AMO_cas_aq_rl<0b00101, 0b011, "amocas.d", GPR>;
+defm AMOCAS_Q : AMO_cas_aq_rl<0b00101, 0b100, "amocas.q", GPRPairRV64>;
} // Predicates = [HasStdExtZacas, IsRV64]
+multiclass AMOCASPat<string AtomicOp, string BaseInst, ValueType vt = XLenVT,
+ list<Predicate> ExtraPreds = []> {
+ let Predicates = !listconcat([HasStdExtZacas, NotHasStdExtZtso], ExtraPreds) in {
+ def : Pat<(!cast<PatFrag>(AtomicOp#"_monotonic") (vt GPR:$addr),
+ (vt GPR:$cmp),
+ (vt GPR:$new)),
+ (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>;
+ def : Pat<(!cast<PatFrag>(AtomicOp#"_acquire") (vt GPR:$addr),
+ (vt GPR:$cmp),
+ (vt GPR:$new)),
+ (!cast<RVInst>(BaseInst#"_AQ") GPR:$cmp, GPR:$addr, GPR:$new)>;
+ def : Pat<(!cast<PatFrag>(AtomicOp#"_release") (vt GPR:$addr),
+ (vt GPR:$cmp),
+ (vt GPR:$new)),
+ (!cast<RVInst>(BaseInst#"_RL") GPR:$cmp, GPR:$addr, GPR:$new)>;
+ def : Pat<(!cast<PatFrag>(AtomicOp#"_acq_rel") (vt GPR:$addr),
+ (vt GPR:$cmp),
+ (vt GPR:$new)),
+ (!cast<RVInst>(BaseInst#"_AQ_RL") GPR:$cmp, GPR:$addr, GPR:$new)>;
+ def : Pat<(!cast<PatFrag>(AtomicOp#"_seq_cst") (vt GPR:$addr),
+ (vt GPR:$cmp),
+ (vt GPR:$new)),
+ (!cast<RVInst>(BaseInst#"_AQ_RL") GPR:$cmp, GPR:$addr, GPR:$new)>;
+ } // Predicates = !listconcat([HasStdExtZacas, NotHasStdExtZtso], ExtraPreds)
+ let Predicates = !listconcat([HasStdExtZacas, HasStdExtZtso], ExtraPreds) in {
+ def : Pat<(!cast<PatFrag>(AtomicOp#"_monotonic") (vt GPR:$addr),
+ (vt GPR:$cmp),
+ (vt GPR:$new)),
+ (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>;
+ def : Pat<(!cast<PatFrag>(AtomicOp#"_acquire") (vt GPR:$addr),
+ (vt GPR:$cmp),
+ (vt GPR:$new)),
+ (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>;
+ def : Pat<(!cast<PatFrag>(AtomicOp#"_release") (vt GPR:$addr),
+ (vt GPR:$cmp),
+ (vt GPR:$new)),
+ (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>;
+ def : Pat<(!cast<PatFrag>(AtomicOp#"_acq_rel") (vt GPR:$addr),
+ (vt GPR:$cmp),
+ (vt GPR:$new)),
+ (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>;
+ def : Pat<(!cast<PatFrag>(AtomicOp#"_seq_cst") (vt GPR:$addr),
+ (vt GPR:$cmp),
+ (vt GPR:$new)),
+ (!cast<RVInst>(BaseInst) GPR:$cmp, GPR:$addr, GPR:$new)>;
+ } // Predicates = !listconcat([HasStdExtZacas, HasStdExtZtso], ExtraPreds)
+}
+
+defm : AMOCASPat<"atomic_cmp_swap_32", "AMOCAS_W">;
+defm : AMOCASPat<"atomic_cmp_swap_64", "AMOCAS_D_RV64", i64, [IsRV64]>;
+
//===----------------------------------------------------------------------===//
// Zawrs (Wait-on-Reservation-Set)
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
index a59d058382fe..5a4d8c4cfece 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -63,7 +63,10 @@ def sub_vrm1_5 : ComposedSubRegIndex<sub_vrm2_2, sub_vrm1_1>;
def sub_vrm1_6 : ComposedSubRegIndex<sub_vrm2_3, sub_vrm1_0>;
def sub_vrm1_7 : ComposedSubRegIndex<sub_vrm2_3, sub_vrm1_1>;
-def sub_32_hi : SubRegIndex<32, 32>;
+// GPR sizes change with HwMode.
+// FIXME: Support HwMode in SubRegIndex?
+def sub_gpr_even : SubRegIndex<-1>;
+def sub_gpr_odd : SubRegIndex<-1, -1>;
} // Namespace = "RISCV"
// Integer registers
@@ -118,6 +121,8 @@ def XLenVT : ValueTypeByHwMode<[RV32, RV64],
// Allow f64 in GPR for ZDINX on RV64.
def XLenFVT : ValueTypeByHwMode<[RV64],
[f64]>;
+def XLenPairFVT : ValueTypeByHwMode<[RV32],
+ [f64]>;
def XLenRI : RegInfoByHwMode<
[RV32, RV64],
[RegInfo<32,32,32>, RegInfo<64,64,64>]>;
@@ -546,33 +551,37 @@ def DUMMY_REG_PAIR_WITH_X0 : RISCVReg<0, "0">;
def GPRAll : GPRRegisterClass<(add GPR, DUMMY_REG_PAIR_WITH_X0)>;
let RegAltNameIndices = [ABIRegAltName] in {
- def X0_PD : RISCVRegWithSubRegs<0, X0.AsmName,
- [X0, DUMMY_REG_PAIR_WITH_X0],
- X0.AltNames> {
- let SubRegIndices = [sub_32, sub_32_hi];
+ def X0_Pair : RISCVRegWithSubRegs<0, X0.AsmName,
+ [X0, DUMMY_REG_PAIR_WITH_X0],
+ X0.AltNames> {
+ let SubRegIndices = [sub_gpr_even, sub_gpr_odd];
let CoveredBySubRegs = 1;
}
foreach I = 1-15 in {
defvar Index = !shl(I, 1);
+ defvar IndexP1 = !add(Index, 1);
defvar Reg = !cast<Register>("X"#Index);
- defvar RegP1 = !cast<Register>("X"#!add(Index,1));
- def X#Index#_PD : RISCVRegWithSubRegs<Index, Reg.AsmName,
- [Reg, RegP1],
- Reg.AltNames> {
- let SubRegIndices = [sub_32, sub_32_hi];
+ defvar RegP1 = !cast<Register>("X"#IndexP1);
+ def "X" # Index #"_X" # IndexP1 : RISCVRegWithSubRegs<Index,
+ Reg.AsmName,
+ [Reg, RegP1],
+ Reg.AltNames> {
+ let SubRegIndices = [sub_gpr_even, sub_gpr_odd];
let CoveredBySubRegs = 1;
}
}
}
-let RegInfos = RegInfoByHwMode<[RV64], [RegInfo<64, 64, 64>]> in
-def GPRPF64 : RegisterClass<"RISCV", [f64], 64, (add
- X10_PD, X12_PD, X14_PD, X16_PD,
- X6_PD,
- X28_PD, X30_PD,
- X8_PD,
- X18_PD, X20_PD, X22_PD, X24_PD, X26_PD,
- X0_PD, X2_PD, X4_PD
+let RegInfos = RegInfoByHwMode<[RV32, RV64],
+ [RegInfo<64, 64, 64>, RegInfo<128, 128, 128>]>,
+ DecoderMethod = "DecodeGPRPairRegisterClass" in
+def GPRPair : RegisterClass<"RISCV", [XLenPairFVT], 64, (add
+ X10_X11, X12_X13, X14_X15, X16_X17,
+ X6_X7,
+ X28_X29, X30_X31,
+ X8_X9,
+ X18_X19, X20_X21, X22_X23, X24_X25, X26_X27,
+ X0_Pair, X2_X3, X4_X5
)>;
// The register class is added for inline assembly for vector mask types.
diff --git a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index 320f91c76057..815eca1240d8 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -1649,7 +1649,7 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) {
}
}
if (Node->getValueType(0) == MVT::i128) {
- const APInt &Val = cast<ConstantSDNode>(Node)->getAPIntValue();
+ const APInt &Val = Node->getAsAPIntVal();
SystemZVectorConstantInfo VCI(Val);
if (VCI.isVectorConstantLegal(*Subtarget)) {
loadVectorConstant(VCI, Node);
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 2450c6801a66..7d387c7b9f2f 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -340,6 +340,13 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setLibcallName(RTLIB::SHL_I128, nullptr);
setLibcallName(RTLIB::SRA_I128, nullptr);
+ // Also expand 256 bit shifts if i128 is a legal type.
+ if (isTypeLegal(MVT::i128)) {
+ setOperationAction(ISD::SRL_PARTS, MVT::i128, Expand);
+ setOperationAction(ISD::SHL_PARTS, MVT::i128, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i128, Expand);
+ }
+
// Handle bitcast from fp128 to i128.
if (!isTypeLegal(MVT::i128))
setOperationAction(ISD::BITCAST, MVT::i128, Custom);
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
index 15dc44a04395..7f0140a5e8c6 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
@@ -839,9 +839,9 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) {
unsigned Reg;
- if (Attrs.hasParamAttr(I, Attribute::SExt))
+ if (Call->paramHasAttr(I, Attribute::SExt))
Reg = getRegForSignedValue(V);
- else if (Attrs.hasParamAttr(I, Attribute::ZExt))
+ else if (Call->paramHasAttr(I, Attribute::ZExt))
Reg = getRegForUnsignedValue(V);
else
Reg = getRegForValue(V);
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index 304b998e1f26..e006dd877360 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -148,21 +148,25 @@ classifyFirstOpcodeInMacroFusion(unsigned Opcode) {
case X86::AND16ri8:
case X86::AND16rm:
case X86::AND16rr:
+ case X86::AND16rr_REV:
case X86::AND32i32:
case X86::AND32ri:
case X86::AND32ri8:
case X86::AND32rm:
case X86::AND32rr:
+ case X86::AND32rr_REV:
case X86::AND64i32:
case X86::AND64ri32:
case X86::AND64ri8:
case X86::AND64rm:
case X86::AND64rr:
+ case X86::AND64rr_REV:
case X86::AND8i8:
case X86::AND8ri:
case X86::AND8ri8:
case X86::AND8rm:
case X86::AND8rr:
+ case X86::AND8rr_REV:
return FirstMacroFusionInstKind::And;
// CMP
case X86::CMP16i16:
@@ -171,24 +175,28 @@ classifyFirstOpcodeInMacroFusion(unsigned Opcode) {
case X86::CMP16ri8:
case X86::CMP16rm:
case X86::CMP16rr:
+ case X86::CMP16rr_REV:
case X86::CMP32i32:
case X86::CMP32mr:
case X86::CMP32ri:
case X86::CMP32ri8:
case X86::CMP32rm:
case X86::CMP32rr:
+ case X86::CMP32rr_REV:
case X86::CMP64i32:
case X86::CMP64mr:
case X86::CMP64ri32:
case X86::CMP64ri8:
case X86::CMP64rm:
case X86::CMP64rr:
+ case X86::CMP64rr_REV:
case X86::CMP8i8:
case X86::CMP8mr:
case X86::CMP8ri:
case X86::CMP8ri8:
case X86::CMP8rm:
case X86::CMP8rr:
+ case X86::CMP8rr_REV:
return FirstMacroFusionInstKind::Cmp;
// ADD
case X86::ADD16i16:
@@ -196,42 +204,50 @@ classifyFirstOpcodeInMacroFusion(unsigned Opcode) {
case X86::ADD16ri8:
case X86::ADD16rm:
case X86::ADD16rr:
+ case X86::ADD16rr_REV:
case X86::ADD32i32:
case X86::ADD32ri:
case X86::ADD32ri8:
case X86::ADD32rm:
case X86::ADD32rr:
+ case X86::ADD32rr_REV:
case X86::ADD64i32:
case X86::ADD64ri32:
case X86::ADD64ri8:
case X86::ADD64rm:
case X86::ADD64rr:
+ case X86::ADD64rr_REV:
case X86::ADD8i8:
case X86::ADD8ri:
case X86::ADD8ri8:
case X86::ADD8rm:
case X86::ADD8rr:
+ case X86::ADD8rr_REV:
// SUB
case X86::SUB16i16:
case X86::SUB16ri:
case X86::SUB16ri8:
case X86::SUB16rm:
case X86::SUB16rr:
+ case X86::SUB16rr_REV:
case X86::SUB32i32:
case X86::SUB32ri:
case X86::SUB32ri8:
case X86::SUB32rm:
case X86::SUB32rr:
+ case X86::SUB32rr_REV:
case X86::SUB64i32:
case X86::SUB64ri32:
case X86::SUB64ri8:
case X86::SUB64rm:
case X86::SUB64rr:
+ case X86::SUB64rr_REV:
case X86::SUB8i8:
case X86::SUB8ri:
case X86::SUB8ri8:
case X86::SUB8rm:
case X86::SUB8rr:
+ case X86::SUB8rr_REV:
return FirstMacroFusionInstKind::AddSub;
// INC
case X86::INC16r:
diff --git a/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp b/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
index aad839b83ee1..b13bf361ab79 100644
--- a/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
+++ b/llvm/lib/Target/X86/X86FlagsCopyLowering.cpp
@@ -173,6 +173,7 @@ static FlagArithMnemonic getMnemonicFromOpcode(unsigned Opcode) {
#define LLVM_EXPAND_ADC_SBB_INSTR(MNEMONIC) \
LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rr) \
+ LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rr_REV) \
LLVM_EXPAND_INSTR_SIZES(MNEMONIC, rm) \
LLVM_EXPAND_INSTR_SIZES(MNEMONIC, mr) \
case X86::MNEMONIC##8ri: \
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 5a28240ea9e2..700ab797b2f6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -2444,6 +2444,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
ISD::SRL,
ISD::OR,
ISD::AND,
+ ISD::BITREVERSE,
ISD::ADD,
ISD::FADD,
ISD::FSUB,
@@ -4821,8 +4822,8 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
APInt UndefSrcElts(NumSrcElts, 0);
SmallVector<APInt, 64> SrcEltBits;
- auto *CN = cast<ConstantSDNode>(Op.getOperand(0).getOperand(0));
- SrcEltBits.push_back(CN->getAPIntValue().zextOrTrunc(SrcEltSizeInBits));
+ const APInt &C = Op.getOperand(0).getConstantOperandAPInt(0);
+ SrcEltBits.push_back(C.zextOrTrunc(SrcEltSizeInBits));
SrcEltBits.append(NumSrcElts - 1, APInt(SrcEltSizeInBits, 0));
return CastBitData(UndefSrcElts, SrcEltBits);
}
@@ -17223,6 +17224,7 @@ static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
"Cannot lower 512-bit vectors w/o basic ISA!");
int NumElts = Mask.size();
+ int NumV2Elements = count_if(Mask, [NumElts](int M) { return M >= NumElts; });
// Try to recognize shuffles that are just padding a subvector with zeros.
int SubvecElts = 0;
@@ -17288,17 +17290,18 @@ static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef<int> Mask,
Offset += NumElts; // Increment for next iteration.
}
- // If we're broadcasting a SETCC result, try to broadcast the ops instead.
+ // If we're performing an unary shuffle on a SETCC result, try to shuffle the
+ // ops instead.
// TODO: What other unary shuffles would benefit from this?
- if (isBroadcastShuffleMask(Mask) && V1.getOpcode() == ISD::SETCC &&
- V1->hasOneUse()) {
+ if (NumV2Elements == 0 && V1.getOpcode() == ISD::SETCC && V1->hasOneUse()) {
SDValue Op0 = V1.getOperand(0);
SDValue Op1 = V1.getOperand(1);
ISD::CondCode CC = cast<CondCodeSDNode>(V1.getOperand(2))->get();
EVT OpVT = Op0.getValueType();
- return DAG.getSetCC(
- DL, VT, DAG.getVectorShuffle(OpVT, DL, Op0, DAG.getUNDEF(OpVT), Mask),
- DAG.getVectorShuffle(OpVT, DL, Op1, DAG.getUNDEF(OpVT), Mask), CC);
+ if (OpVT.getScalarSizeInBits() >= 32 || isBroadcastShuffleMask(Mask))
+ return DAG.getSetCC(
+ DL, VT, DAG.getVectorShuffle(OpVT, DL, Op0, DAG.getUNDEF(OpVT), Mask),
+ DAG.getVectorShuffle(OpVT, DL, Op1, DAG.getUNDEF(OpVT), Mask), CC);
}
MVT ExtVT;
@@ -22551,7 +22554,7 @@ static SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
// FIXME: Do this for non-constant compares for constant on LHS?
if (CmpVT == MVT::i64 && isa<ConstantSDNode>(Op1) && !isX86CCSigned(X86CC) &&
Op0.hasOneUse() && // Hacky way to not break CSE opportunities with sub.
- cast<ConstantSDNode>(Op1)->getAPIntValue().getActiveBits() <= 32 &&
+ Op1->getAsAPIntVal().getActiveBits() <= 32 &&
DAG.MaskedValueIsZero(Op0, APInt::getHighBitsSet(64, 32))) {
CmpVT = MVT::i32;
Op0 = DAG.getNode(ISD::TRUNCATE, dl, CmpVT, Op0);
@@ -47029,8 +47032,8 @@ static SDValue combineShiftRightArithmetic(SDNode *N, SelectionDAG &DAG,
SDValue N00 = N0.getOperand(0);
SDValue N01 = N0.getOperand(1);
- APInt ShlConst = (cast<ConstantSDNode>(N01))->getAPIntValue();
- APInt SarConst = (cast<ConstantSDNode>(N1))->getAPIntValue();
+ APInt ShlConst = N01->getAsAPIntVal();
+ APInt SarConst = N1->getAsAPIntVal();
EVT CVT = N1.getValueType();
if (SarConst.isNegative())
@@ -51835,6 +51838,33 @@ static SDValue combineXor(SDNode *N, SelectionDAG &DAG,
return combineFneg(N, DAG, DCI, Subtarget);
}
+static SDValue combineBITREVERSE(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &Subtarget) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // Convert a (iX bitreverse(bitcast(vXi1 X))) -> (iX bitcast(shuffle(X)))
+ if (VT.isInteger() && N0.getOpcode() == ISD::BITCAST && N0.hasOneUse()) {
+ SDValue Src = N0.getOperand(0);
+ EVT SrcVT = Src.getValueType();
+ if (SrcVT.isVector() && SrcVT.getScalarType() == MVT::i1 &&
+ (DCI.isBeforeLegalize() ||
+ DAG.getTargetLoweringInfo().isTypeLegal(SrcVT)) &&
+ Subtarget.hasSSSE3()) {
+ unsigned NumElts = SrcVT.getVectorNumElements();
+ SmallVector<int, 32> ReverseMask(NumElts);
+ for (unsigned I = 0; I != NumElts; ++I)
+ ReverseMask[I] = (NumElts - 1) - I;
+ SDValue Rev =
+ DAG.getVectorShuffle(SrcVT, SDLoc(N), Src, Src, ReverseMask);
+ return DAG.getBitcast(VT, Rev);
+ }
+ }
+
+ return SDValue();
+}
+
static SDValue combineBEXTR(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
@@ -56124,6 +56154,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::AND: return combineAnd(N, DAG, DCI, Subtarget);
case ISD::OR: return combineOr(N, DAG, DCI, Subtarget);
case ISD::XOR: return combineXor(N, DAG, DCI, Subtarget);
+ case ISD::BITREVERSE: return combineBITREVERSE(N, DAG, DCI, Subtarget);
case X86ISD::BEXTR:
case X86ISD::BEXTRI: return combineBEXTR(N, DAG, DCI, Subtarget);
case ISD::LOAD: return combineLoad(N, DAG, DCI, Subtarget);
diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td
index 5cfa95e085e3..76b0fe5f5cad 100644
--- a/llvm/lib/Target/X86/X86InstrArithmetic.td
+++ b/llvm/lib/Target/X86/X86InstrArithmetic.td
@@ -1107,43 +1107,85 @@ def : Pat<(store (X86adc_flag GR64:$src, (loadi64 addr:$dst), EFLAGS),
// Patterns for basic arithmetic ops with relocImm for the immediate field.
multiclass ArithBinOp_RF_relocImm_Pats<SDNode OpNodeFlag, SDNode OpNode> {
- def : Pat<(OpNodeFlag GR8:$src1, relocImm8_su:$src2),
- (!cast<Instruction>(NAME#"8ri") GR8:$src1, relocImm8_su:$src2)>;
- def : Pat<(OpNodeFlag GR16:$src1, relocImm16_su:$src2),
- (!cast<Instruction>(NAME#"16ri") GR16:$src1, relocImm16_su:$src2)>;
- def : Pat<(OpNodeFlag GR32:$src1, relocImm32_su:$src2),
- (!cast<Instruction>(NAME#"32ri") GR32:$src1, relocImm32_su:$src2)>;
- def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt32_su:$src2),
- (!cast<Instruction>(NAME#"64ri32") GR64:$src1, i64relocImmSExt32_su:$src2)>;
+ let Predicates = [NoNDD] in {
+ def : Pat<(OpNodeFlag GR8:$src1, relocImm8_su:$src2),
+ (!cast<Instruction>(NAME#"8ri") GR8:$src1, relocImm8_su:$src2)>;
+ def : Pat<(OpNodeFlag GR16:$src1, relocImm16_su:$src2),
+ (!cast<Instruction>(NAME#"16ri") GR16:$src1, relocImm16_su:$src2)>;
+ def : Pat<(OpNodeFlag GR32:$src1, relocImm32_su:$src2),
+ (!cast<Instruction>(NAME#"32ri") GR32:$src1, relocImm32_su:$src2)>;
+ def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt32_su:$src2),
+ (!cast<Instruction>(NAME#"64ri32") GR64:$src1, i64relocImmSExt32_su:$src2)>;
+
+ def : Pat<(store (OpNode (load addr:$dst), relocImm8_su:$src), addr:$dst),
+ (!cast<Instruction>(NAME#"8mi") addr:$dst, relocImm8_su:$src)>;
+ def : Pat<(store (OpNode (load addr:$dst), relocImm16_su:$src), addr:$dst),
+ (!cast<Instruction>(NAME#"16mi") addr:$dst, relocImm16_su:$src)>;
+ def : Pat<(store (OpNode (load addr:$dst), relocImm32_su:$src), addr:$dst),
+ (!cast<Instruction>(NAME#"32mi") addr:$dst, relocImm32_su:$src)>;
+ def : Pat<(store (OpNode (load addr:$dst), i64relocImmSExt32_su:$src), addr:$dst),
+ (!cast<Instruction>(NAME#"64mi32") addr:$dst, i64relocImmSExt32_su:$src)>;
+ }
+ let Predicates = [HasNDD] in {
+ def : Pat<(OpNodeFlag GR8:$src1, relocImm8_su:$src2),
+ (!cast<Instruction>(NAME#"8ri_ND") GR8:$src1, relocImm8_su:$src2)>;
+ def : Pat<(OpNodeFlag GR16:$src1, relocImm16_su:$src2),
+ (!cast<Instruction>(NAME#"16ri_ND") GR16:$src1, relocImm16_su:$src2)>;
+ def : Pat<(OpNodeFlag GR32:$src1, relocImm32_su:$src2),
+ (!cast<Instruction>(NAME#"32ri_ND") GR32:$src1, relocImm32_su:$src2)>;
+ def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt32_su:$src2),
+ (!cast<Instruction>(NAME#"64ri32_ND") GR64:$src1, i64relocImmSExt32_su:$src2)>;
- def : Pat<(store (OpNode (load addr:$dst), relocImm8_su:$src), addr:$dst),
- (!cast<Instruction>(NAME#"8mi") addr:$dst, relocImm8_su:$src)>;
- def : Pat<(store (OpNode (load addr:$dst), relocImm16_su:$src), addr:$dst),
- (!cast<Instruction>(NAME#"16mi") addr:$dst, relocImm16_su:$src)>;
- def : Pat<(store (OpNode (load addr:$dst), relocImm32_su:$src), addr:$dst),
- (!cast<Instruction>(NAME#"32mi") addr:$dst, relocImm32_su:$src)>;
- def : Pat<(store (OpNode (load addr:$dst), i64relocImmSExt32_su:$src), addr:$dst),
- (!cast<Instruction>(NAME#"64mi32") addr:$dst, i64relocImmSExt32_su:$src)>;
+ def : Pat<(OpNode (load addr:$dst), relocImm8_su:$src),
+ (!cast<Instruction>(NAME#"8mi_ND") addr:$dst, relocImm8_su:$src)>;
+ def : Pat<(OpNode (load addr:$dst), relocImm16_su:$src),
+ (!cast<Instruction>(NAME#"16mi_ND") addr:$dst, relocImm16_su:$src)>;
+ def : Pat<(OpNode (load addr:$dst), relocImm32_su:$src),
+ (!cast<Instruction>(NAME#"32mi_ND") addr:$dst, relocImm32_su:$src)>;
+ def : Pat<(OpNode (load addr:$dst), i64relocImmSExt32_su:$src),
+ (!cast<Instruction>(NAME#"64mi32_ND") addr:$dst, i64relocImmSExt32_su:$src)>;
+ }
}
multiclass ArithBinOp_RFF_relocImm_Pats<SDNode OpNodeFlag> {
- def : Pat<(OpNodeFlag GR8:$src1, relocImm8_su:$src2, EFLAGS),
- (!cast<Instruction>(NAME#"8ri") GR8:$src1, relocImm8_su:$src2)>;
- def : Pat<(OpNodeFlag GR16:$src1, relocImm16_su:$src2, EFLAGS),
- (!cast<Instruction>(NAME#"16ri") GR16:$src1, relocImm16_su:$src2)>;
- def : Pat<(OpNodeFlag GR32:$src1, relocImm32_su:$src2, EFLAGS),
- (!cast<Instruction>(NAME#"32ri") GR32:$src1, relocImm32_su:$src2)>;
- def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt32_su:$src2, EFLAGS),
- (!cast<Instruction>(NAME#"64ri32") GR64:$src1, i64relocImmSExt32_su:$src2)>;
+ let Predicates = [NoNDD] in {
+ def : Pat<(OpNodeFlag GR8:$src1, relocImm8_su:$src2, EFLAGS),
+ (!cast<Instruction>(NAME#"8ri") GR8:$src1, relocImm8_su:$src2)>;
+ def : Pat<(OpNodeFlag GR16:$src1, relocImm16_su:$src2, EFLAGS),
+ (!cast<Instruction>(NAME#"16ri") GR16:$src1, relocImm16_su:$src2)>;
+ def : Pat<(OpNodeFlag GR32:$src1, relocImm32_su:$src2, EFLAGS),
+ (!cast<Instruction>(NAME#"32ri") GR32:$src1, relocImm32_su:$src2)>;
+ def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt32_su:$src2, EFLAGS),
+ (!cast<Instruction>(NAME#"64ri32") GR64:$src1, i64relocImmSExt32_su:$src2)>;
- def : Pat<(store (OpNodeFlag (load addr:$dst), relocImm8_su:$src, EFLAGS), addr:$dst),
- (!cast<Instruction>(NAME#"8mi") addr:$dst, relocImm8_su:$src)>;
- def : Pat<(store (OpNodeFlag (load addr:$dst), relocImm16_su:$src, EFLAGS), addr:$dst),
- (!cast<Instruction>(NAME#"16mi") addr:$dst, relocImm16_su:$src)>;
- def : Pat<(store (OpNodeFlag (load addr:$dst), relocImm32_su:$src, EFLAGS), addr:$dst),
- (!cast<Instruction>(NAME#"32mi") addr:$dst, relocImm32_su:$src)>;
- def : Pat<(store (OpNodeFlag (load addr:$dst), i64relocImmSExt32_su:$src, EFLAGS), addr:$dst),
- (!cast<Instruction>(NAME#"64mi32") addr:$dst, i64relocImmSExt32_su:$src)>;
+ def : Pat<(store (OpNodeFlag (load addr:$dst), relocImm8_su:$src, EFLAGS), addr:$dst),
+ (!cast<Instruction>(NAME#"8mi") addr:$dst, relocImm8_su:$src)>;
+ def : Pat<(store (OpNodeFlag (load addr:$dst), relocImm16_su:$src, EFLAGS), addr:$dst),
+ (!cast<Instruction>(NAME#"16mi") addr:$dst, relocImm16_su:$src)>;
+ def : Pat<(store (OpNodeFlag (load addr:$dst), relocImm32_su:$src, EFLAGS), addr:$dst),
+ (!cast<Instruction>(NAME#"32mi") addr:$dst, relocImm32_su:$src)>;
+ def : Pat<(store (OpNodeFlag (load addr:$dst), i64relocImmSExt32_su:$src, EFLAGS), addr:$dst),
+ (!cast<Instruction>(NAME#"64mi32") addr:$dst, i64relocImmSExt32_su:$src)>;
+ }
+ let Predicates = [HasNDD] in {
+ def : Pat<(OpNodeFlag GR8:$src1, relocImm8_su:$src2, EFLAGS),
+ (!cast<Instruction>(NAME#"8ri_ND") GR8:$src1, relocImm8_su:$src2)>;
+ def : Pat<(OpNodeFlag GR16:$src1, relocImm16_su:$src2, EFLAGS),
+ (!cast<Instruction>(NAME#"16ri_ND") GR16:$src1, relocImm16_su:$src2)>;
+ def : Pat<(OpNodeFlag GR32:$src1, relocImm32_su:$src2, EFLAGS),
+ (!cast<Instruction>(NAME#"32ri_ND") GR32:$src1, relocImm32_su:$src2)>;
+ def : Pat<(OpNodeFlag GR64:$src1, i64relocImmSExt32_su:$src2, EFLAGS),
+ (!cast<Instruction>(NAME#"64ri32_ND") GR64:$src1, i64relocImmSExt32_su:$src2)>;
+
+ def : Pat<(OpNodeFlag (load addr:$dst), relocImm8_su:$src, EFLAGS),
+ (!cast<Instruction>(NAME#"8mi_ND") addr:$dst, relocImm8_su:$src)>;
+ def : Pat<(OpNodeFlag (load addr:$dst), relocImm16_su:$src, EFLAGS),
+ (!cast<Instruction>(NAME#"16mi_ND") addr:$dst, relocImm16_su:$src)>;
+ def : Pat<(OpNodeFlag (load addr:$dst), relocImm32_su:$src, EFLAGS),
+ (!cast<Instruction>(NAME#"32mi_ND") addr:$dst, relocImm32_su:$src)>;
+ def : Pat<(OpNodeFlag (load addr:$dst), i64relocImmSExt32_su:$src, EFLAGS),
+ (!cast<Instruction>(NAME#"64mi32_ND") addr:$dst, i64relocImmSExt32_su:$src)>;
+ }
}
multiclass ArithBinOp_F_relocImm_Pats<SDNode OpNodeFlag> {
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index c77c77ee4a3e..422391a6e02a 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -1550,13 +1550,24 @@ def : Pat<(X86add_flag_nocf GR64:$src1, 0x0000000080000000),
// AddedComplexity is needed to give priority over i64immSExt8 and i64immSExt32.
let AddedComplexity = 1 in {
-def : Pat<(and GR64:$src, i64immZExt32:$imm),
- (SUBREG_TO_REG
- (i64 0),
- (AND32ri
- (EXTRACT_SUBREG GR64:$src, sub_32bit),
- (i32 (GetLo32XForm imm:$imm))),
- sub_32bit)>;
+ let Predicates = [NoNDD] in {
+ def : Pat<(and GR64:$src, i64immZExt32:$imm),
+ (SUBREG_TO_REG
+ (i64 0),
+ (AND32ri
+ (EXTRACT_SUBREG GR64:$src, sub_32bit),
+ (i32 (GetLo32XForm imm:$imm))),
+ sub_32bit)>;
+ }
+ let Predicates = [HasNDD] in {
+ def : Pat<(and GR64:$src, i64immZExt32:$imm),
+ (SUBREG_TO_REG
+ (i64 0),
+ (AND32ri_ND
+ (EXTRACT_SUBREG GR64:$src, sub_32bit),
+ (i32 (GetLo32XForm imm:$imm))),
+ sub_32bit)>;
+ }
} // AddedComplexity = 1
@@ -1762,10 +1773,18 @@ def : Pat<(X86xor_flag (i8 (trunc GR32:$src)),
// where the least significant bit is not 0. However, the probability of this
// happening is considered low enough that this is officially not a
// "real problem".
-def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>;
-def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;
-def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;
-def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>;
+let Predicates = [NoNDD] in {
+ def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr GR8 :$src1, GR8 :$src1)>;
+ def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;
+ def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;
+ def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>;
+}
+let Predicates = [HasNDD] in {
+ def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr_ND GR8 :$src1, GR8 :$src1)>;
+ def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr_ND GR16:$src1, GR16:$src1)>;
+ def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr_ND GR32:$src1, GR32:$src1)>;
+ def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr_ND GR64:$src1, GR64:$src1)>;
+}
// Shift amount is implicitly masked.
multiclass MaskedShiftAmountPats<SDNode frag, string name> {
@@ -1937,75 +1956,179 @@ defm : one_bit_patterns<GR64, i64, BTR64rr, BTS64rr, BTC64rr, shiftMask64>;
// EFLAGS-defining Patterns
//===----------------------------------------------------------------------===//
-// add reg, reg
-def : Pat<(add GR8 :$src1, GR8 :$src2), (ADD8rr GR8 :$src1, GR8 :$src2)>;
-def : Pat<(add GR16:$src1, GR16:$src2), (ADD16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(add GR32:$src1, GR32:$src2), (ADD32rr GR32:$src1, GR32:$src2)>;
-def : Pat<(add GR64:$src1, GR64:$src2), (ADD64rr GR64:$src1, GR64:$src2)>;
+multiclass EFLAGSDefiningPats<string suffix, Predicate p> {
+ let Predicates = [p] in {
+ // add reg, reg
+ def : Pat<(add GR8 :$src1, GR8 :$src2), (!cast<Instruction>(ADD8rr#suffix) GR8 :$src1, GR8 :$src2)>;
+ def : Pat<(add GR16:$src1, GR16:$src2), (!cast<Instruction>(ADD16rr#suffix) GR16:$src1, GR16:$src2)>;
+ def : Pat<(add GR32:$src1, GR32:$src2), (!cast<Instruction>(ADD32rr#suffix) GR32:$src1, GR32:$src2)>;
+ def : Pat<(add GR64:$src1, GR64:$src2), (!cast<Instruction>(ADD64rr#suffix) GR64:$src1, GR64:$src2)>;
+
+ // add reg, mem
+ def : Pat<(add GR8:$src1, (loadi8 addr:$src2)),
+ (!cast<Instruction>(ADD8rm#suffix) GR8:$src1, addr:$src2)>;
+ def : Pat<(add GR16:$src1, (loadi16 addr:$src2)),
+ (!cast<Instruction>(ADD16rm#suffix) GR16:$src1, addr:$src2)>;
+ def : Pat<(add GR32:$src1, (loadi32 addr:$src2)),
+ (!cast<Instruction>(ADD32rm#suffix) GR32:$src1, addr:$src2)>;
+ def : Pat<(add GR64:$src1, (loadi64 addr:$src2)),
+ (!cast<Instruction>(ADD64rm#suffix) GR64:$src1, addr:$src2)>;
+
+ // add reg, imm
+ def : Pat<(add GR8 :$src1, imm:$src2), (!cast<Instruction>(ADD8ri#suffix) GR8:$src1 , imm:$src2)>;
+ def : Pat<(add GR16:$src1, imm:$src2), (!cast<Instruction>(ADD16ri#suffix) GR16:$src1, imm:$src2)>;
+ def : Pat<(add GR32:$src1, imm:$src2), (!cast<Instruction>(ADD32ri#suffix) GR32:$src1, imm:$src2)>;
+ def : Pat<(add GR64:$src1, i64immSExt32:$src2), (!cast<Instruction>(ADD64ri32#suffix) GR64:$src1, i64immSExt32:$src2)>;
-// add reg, mem
-def : Pat<(add GR8:$src1, (loadi8 addr:$src2)),
- (ADD8rm GR8:$src1, addr:$src2)>;
-def : Pat<(add GR16:$src1, (loadi16 addr:$src2)),
- (ADD16rm GR16:$src1, addr:$src2)>;
-def : Pat<(add GR32:$src1, (loadi32 addr:$src2)),
- (ADD32rm GR32:$src1, addr:$src2)>;
-def : Pat<(add GR64:$src1, (loadi64 addr:$src2)),
- (ADD64rm GR64:$src1, addr:$src2)>;
+ // sub reg, reg
+ def : Pat<(sub GR8 :$src1, GR8 :$src2), (!cast<Instruction>(SUB8rr#suffix) GR8 :$src1, GR8 :$src2)>;
+ def : Pat<(sub GR16:$src1, GR16:$src2), (!cast<Instruction>(SUB16rr#suffix) GR16:$src1, GR16:$src2)>;
+ def : Pat<(sub GR32:$src1, GR32:$src2), (!cast<Instruction>(SUB32rr#suffix) GR32:$src1, GR32:$src2)>;
+ def : Pat<(sub GR64:$src1, GR64:$src2), (!cast<Instruction>(SUB64rr#suffix) GR64:$src1, GR64:$src2)>;
-// add reg, imm
-def : Pat<(add GR8 :$src1, imm:$src2), (ADD8ri GR8:$src1 , imm:$src2)>;
-def : Pat<(add GR16:$src1, imm:$src2), (ADD16ri GR16:$src1, imm:$src2)>;
-def : Pat<(add GR32:$src1, imm:$src2), (ADD32ri GR32:$src1, imm:$src2)>;
-def : Pat<(add GR64:$src1, i64immSExt32:$src2), (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>;
+ // sub reg, mem
+ def : Pat<(sub GR8:$src1, (loadi8 addr:$src2)),
+ (!cast<Instruction>(SUB8rm#suffix) GR8:$src1, addr:$src2)>;
+ def : Pat<(sub GR16:$src1, (loadi16 addr:$src2)),
+ (!cast<Instruction>(SUB16rm#suffix) GR16:$src1, addr:$src2)>;
+ def : Pat<(sub GR32:$src1, (loadi32 addr:$src2)),
+ (!cast<Instruction>(SUB32rm#suffix) GR32:$src1, addr:$src2)>;
+ def : Pat<(sub GR64:$src1, (loadi64 addr:$src2)),
+ (!cast<Instruction>(SUB64rm#suffix) GR64:$src1, addr:$src2)>;
-// sub reg, reg
-def : Pat<(sub GR8 :$src1, GR8 :$src2), (SUB8rr GR8 :$src1, GR8 :$src2)>;
-def : Pat<(sub GR16:$src1, GR16:$src2), (SUB16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(sub GR32:$src1, GR32:$src2), (SUB32rr GR32:$src1, GR32:$src2)>;
-def : Pat<(sub GR64:$src1, GR64:$src2), (SUB64rr GR64:$src1, GR64:$src2)>;
+ // sub reg, imm
+ def : Pat<(sub GR8:$src1, imm:$src2),
+ (!cast<Instruction>(SUB8ri#suffix) GR8:$src1, imm:$src2)>;
+ def : Pat<(sub GR16:$src1, imm:$src2),
+ (!cast<Instruction>(SUB16ri#suffix) GR16:$src1, imm:$src2)>;
+ def : Pat<(sub GR32:$src1, imm:$src2),
+ (!cast<Instruction>(SUB32ri#suffix) GR32:$src1, imm:$src2)>;
+ def : Pat<(sub GR64:$src1, i64immSExt32:$src2),
+ (!cast<Instruction>(SUB64ri32#suffix) GR64:$src1, i64immSExt32:$src2)>;
-// sub reg, mem
-def : Pat<(sub GR8:$src1, (loadi8 addr:$src2)),
- (SUB8rm GR8:$src1, addr:$src2)>;
-def : Pat<(sub GR16:$src1, (loadi16 addr:$src2)),
- (SUB16rm GR16:$src1, addr:$src2)>;
-def : Pat<(sub GR32:$src1, (loadi32 addr:$src2)),
- (SUB32rm GR32:$src1, addr:$src2)>;
-def : Pat<(sub GR64:$src1, (loadi64 addr:$src2)),
- (SUB64rm GR64:$src1, addr:$src2)>;
+ // sub 0, reg
+ def : Pat<(X86sub_flag 0, GR8 :$src), (!cast<Instruction>(NEG8r#suffix) GR8 :$src)>;
+ def : Pat<(X86sub_flag 0, GR16:$src), (!cast<Instruction>(NEG16r#suffix) GR16:$src)>;
+ def : Pat<(X86sub_flag 0, GR32:$src), (!cast<Instruction>(NEG32r#suffix) GR32:$src)>;
+ def : Pat<(X86sub_flag 0, GR64:$src), (!cast<Instruction>(NEG64r#suffix) GR64:$src)>;
-// sub reg, imm
-def : Pat<(sub GR8:$src1, imm:$src2),
- (SUB8ri GR8:$src1, imm:$src2)>;
-def : Pat<(sub GR16:$src1, imm:$src2),
- (SUB16ri GR16:$src1, imm:$src2)>;
-def : Pat<(sub GR32:$src1, imm:$src2),
- (SUB32ri GR32:$src1, imm:$src2)>;
-def : Pat<(sub GR64:$src1, i64immSExt32:$src2),
- (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>;
+ // mul reg, reg
+ def : Pat<(mul GR16:$src1, GR16:$src2),
+ (!cast<Instruction>(IMUL16rr#suffix) GR16:$src1, GR16:$src2)>;
+ def : Pat<(mul GR32:$src1, GR32:$src2),
+ (!cast<Instruction>(IMUL32rr#suffix) GR32:$src1, GR32:$src2)>;
+ def : Pat<(mul GR64:$src1, GR64:$src2),
+ (!cast<Instruction>(IMUL64rr#suffix) GR64:$src1, GR64:$src2)>;
-// sub 0, reg
-def : Pat<(X86sub_flag 0, GR8 :$src), (NEG8r GR8 :$src)>;
-def : Pat<(X86sub_flag 0, GR16:$src), (NEG16r GR16:$src)>;
-def : Pat<(X86sub_flag 0, GR32:$src), (NEG32r GR32:$src)>;
-def : Pat<(X86sub_flag 0, GR64:$src), (NEG64r GR64:$src)>;
+ // mul reg, mem
+ def : Pat<(mul GR16:$src1, (loadi16 addr:$src2)),
+ (!cast<Instruction>(IMUL16rm#suffix) GR16:$src1, addr:$src2)>;
+ def : Pat<(mul GR32:$src1, (loadi32 addr:$src2)),
+ (!cast<Instruction>(IMUL32rm#suffix) GR32:$src1, addr:$src2)>;
+ def : Pat<(mul GR64:$src1, (loadi64 addr:$src2)),
+ (!cast<Instruction>(IMUL64rm#suffix) GR64:$src1, addr:$src2)>;
-// mul reg, reg
-def : Pat<(mul GR16:$src1, GR16:$src2),
- (IMUL16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(mul GR32:$src1, GR32:$src2),
- (IMUL32rr GR32:$src1, GR32:$src2)>;
-def : Pat<(mul GR64:$src1, GR64:$src2),
- (IMUL64rr GR64:$src1, GR64:$src2)>;
+ // or reg/reg.
+ def : Pat<(or GR8 :$src1, GR8 :$src2), (!cast<Instruction>(OR8rr#suffix) GR8 :$src1, GR8 :$src2)>;
+ def : Pat<(or GR16:$src1, GR16:$src2), (!cast<Instruction>(OR16rr#suffix) GR16:$src1, GR16:$src2)>;
+ def : Pat<(or GR32:$src1, GR32:$src2), (!cast<Instruction>(OR32rr#suffix) GR32:$src1, GR32:$src2)>;
+ def : Pat<(or GR64:$src1, GR64:$src2), (!cast<Instruction>(OR64rr#suffix) GR64:$src1, GR64:$src2)>;
+
+ // or reg/mem
+ def : Pat<(or GR8:$src1, (loadi8 addr:$src2)),
+ (!cast<Instruction>(OR8rm#suffix) GR8:$src1, addr:$src2)>;
+ def : Pat<(or GR16:$src1, (loadi16 addr:$src2)),
+ (!cast<Instruction>(OR16rm#suffix) GR16:$src1, addr:$src2)>;
+ def : Pat<(or GR32:$src1, (loadi32 addr:$src2)),
+ (!cast<Instruction>(OR32rm#suffix) GR32:$src1, addr:$src2)>;
+ def : Pat<(or GR64:$src1, (loadi64 addr:$src2)),
+ (!cast<Instruction>(OR64rm#suffix) GR64:$src1, addr:$src2)>;
+
+ // or reg/imm
+ def : Pat<(or GR8:$src1 , imm:$src2), (!cast<Instruction>(OR8ri#suffix) GR8 :$src1, imm:$src2)>;
+ def : Pat<(or GR16:$src1, imm:$src2), (!cast<Instruction>(OR16ri#suffix) GR16:$src1, imm:$src2)>;
+ def : Pat<(or GR32:$src1, imm:$src2), (!cast<Instruction>(OR32ri#suffix) GR32:$src1, imm:$src2)>;
+ def : Pat<(or GR64:$src1, i64immSExt32:$src2),
+ (!cast<Instruction>(OR64ri32#suffix) GR64:$src1, i64immSExt32:$src2)>;
+
+ // xor reg/reg
+ def : Pat<(xor GR8 :$src1, GR8 :$src2), (!cast<Instruction>(XOR8rr#suffix) GR8 :$src1, GR8 :$src2)>;
+ def : Pat<(xor GR16:$src1, GR16:$src2), (!cast<Instruction>(XOR16rr#suffix) GR16:$src1, GR16:$src2)>;
+ def : Pat<(xor GR32:$src1, GR32:$src2), (!cast<Instruction>(XOR32rr#suffix) GR32:$src1, GR32:$src2)>;
+ def : Pat<(xor GR64:$src1, GR64:$src2), (!cast<Instruction>(XOR64rr#suffix) GR64:$src1, GR64:$src2)>;
+
+ // xor reg/mem
+ def : Pat<(xor GR8:$src1, (loadi8 addr:$src2)),
+ (!cast<Instruction>(XOR8rm#suffix) GR8:$src1, addr:$src2)>;
+ def : Pat<(xor GR16:$src1, (loadi16 addr:$src2)),
+ (!cast<Instruction>(XOR16rm#suffix) GR16:$src1, addr:$src2)>;
+ def : Pat<(xor GR32:$src1, (loadi32 addr:$src2)),
+ (!cast<Instruction>(XOR32rm#suffix) GR32:$src1, addr:$src2)>;
+ def : Pat<(xor GR64:$src1, (loadi64 addr:$src2)),
+ (!cast<Instruction>(XOR64rm#suffix) GR64:$src1, addr:$src2)>;
+
+ // xor reg/imm
+ def : Pat<(xor GR8:$src1, imm:$src2),
+ (!cast<Instruction>(XOR8ri#suffix) GR8:$src1, imm:$src2)>;
+ def : Pat<(xor GR16:$src1, imm:$src2),
+ (!cast<Instruction>(XOR16ri#suffix) GR16:$src1, imm:$src2)>;
+ def : Pat<(xor GR32:$src1, imm:$src2),
+ (!cast<Instruction>(XOR32ri#suffix) GR32:$src1, imm:$src2)>;
+ def : Pat<(xor GR64:$src1, i64immSExt32:$src2),
+ (!cast<Instruction>(XOR64ri32#suffix) GR64:$src1, i64immSExt32:$src2)>;
+
+ // and reg/reg
+ def : Pat<(and GR8 :$src1, GR8 :$src2), (!cast<Instruction>(AND8rr#suffix) GR8 :$src1, GR8 :$src2)>;
+ def : Pat<(and GR16:$src1, GR16:$src2), (!cast<Instruction>(AND16rr#suffix) GR16:$src1, GR16:$src2)>;
+ def : Pat<(and GR32:$src1, GR32:$src2), (!cast<Instruction>(AND32rr#suffix) GR32:$src1, GR32:$src2)>;
+ def : Pat<(and GR64:$src1, GR64:$src2), (!cast<Instruction>(AND64rr#suffix) GR64:$src1, GR64:$src2)>;
+
+ // and reg/mem
+ def : Pat<(and GR8:$src1, (loadi8 addr:$src2)),
+ (!cast<Instruction>(AND8rm#suffix) GR8:$src1, addr:$src2)>;
+ def : Pat<(and GR16:$src1, (loadi16 addr:$src2)),
+ (!cast<Instruction>(AND16rm#suffix) GR16:$src1, addr:$src2)>;
+ def : Pat<(and GR32:$src1, (loadi32 addr:$src2)),
+ (!cast<Instruction>(AND32rm#suffix) GR32:$src1, addr:$src2)>;
+ def : Pat<(and GR64:$src1, (loadi64 addr:$src2)),
+ (!cast<Instruction>(AND64rm#suffix) GR64:$src1, addr:$src2)>;
+
+ // and reg/imm
+ def : Pat<(and GR8:$src1, imm:$src2),
+ (!cast<Instruction>(AND8ri#suffix) GR8:$src1, imm:$src2)>;
+ def : Pat<(and GR16:$src1, imm:$src2),
+ (!cast<Instruction>(AND16ri#suffix) GR16:$src1, imm:$src2)>;
+ def : Pat<(and GR32:$src1, imm:$src2),
+ (!cast<Instruction>(AND32ri#suffix) GR32:$src1, imm:$src2)>;
+ def : Pat<(and GR64:$src1, i64immSExt32:$src2),
+ (!cast<Instruction>(AND64ri32#suffix) GR64:$src1, i64immSExt32:$src2)>;
+ }
-// mul reg, mem
-def : Pat<(mul GR16:$src1, (loadi16 addr:$src2)),
- (IMUL16rm GR16:$src1, addr:$src2)>;
-def : Pat<(mul GR32:$src1, (loadi32 addr:$src2)),
- (IMUL32rm GR32:$src1, addr:$src2)>;
-def : Pat<(mul GR64:$src1, (loadi64 addr:$src2)),
- (IMUL64rm GR64:$src1, addr:$src2)>;
+ // Increment/Decrement reg.
+ // Do not make INC/DEC if it is slow
+ let Predicates = [UseIncDec, p] in {
+ def : Pat<(add GR8:$src, 1), (!cast<Instruction>(INC8r#suffix) GR8:$src)>;
+ def : Pat<(add GR16:$src, 1), (!cast<Instruction>(INC16r#suffix) GR16:$src)>;
+ def : Pat<(add GR32:$src, 1), (!cast<Instruction>(INC32r#suffix) GR32:$src)>;
+ def : Pat<(add GR64:$src, 1), (!cast<Instruction>(INC64r#suffix) GR64:$src)>;
+ def : Pat<(add GR8:$src, -1), (!cast<Instruction>(DEC8r#suffix) GR8:$src)>;
+ def : Pat<(add GR16:$src, -1), (!cast<Instruction>(DEC16r#suffix) GR16:$src)>;
+ def : Pat<(add GR32:$src, -1), (!cast<Instruction>(DEC32r#suffix) GR32:$src)>;
+ def : Pat<(add GR64:$src, -1), (!cast<Instruction>(DEC64r#suffix) GR64:$src)>;
+
+ def : Pat<(X86add_flag_nocf GR8:$src, -1), (!cast<Instruction>(DEC8r#suffix) GR8:$src)>;
+ def : Pat<(X86add_flag_nocf GR16:$src, -1), (!cast<Instruction>(DEC16r#suffix) GR16:$src)>;
+ def : Pat<(X86add_flag_nocf GR32:$src, -1), (!cast<Instruction>(DEC32r#suffix) GR32:$src)>;
+ def : Pat<(X86add_flag_nocf GR64:$src, -1), (!cast<Instruction>(DEC64r#suffix) GR64:$src)>;
+ def : Pat<(X86sub_flag_nocf GR8:$src, -1), (!cast<Instruction>(INC8r#suffix) GR8:$src)>;
+ def : Pat<(X86sub_flag_nocf GR16:$src, -1), (!cast<Instruction>(INC16r#suffix) GR16:$src)>;
+ def : Pat<(X86sub_flag_nocf GR32:$src, -1), (!cast<Instruction>(INC32r#suffix) GR32:$src)>;
+ def : Pat<(X86sub_flag_nocf GR64:$src, -1), (!cast<Instruction>(INC64r#suffix) GR64:$src)>;
+ }
+}
+
+defm : EFLAGSDefiningPats<"", NoNDD>;
+defm : EFLAGSDefiningPats<"_ND", HasNDD>;
// mul reg, imm
def : Pat<(mul GR16:$src1, imm:$src2),
@@ -2023,103 +2146,6 @@ def : Pat<(mul (loadi32 addr:$src1), imm:$src2),
def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2),
(IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>;
-// Increment/Decrement reg.
-// Do not make INC/DEC if it is slow
-let Predicates = [UseIncDec] in {
- def : Pat<(add GR8:$src, 1), (INC8r GR8:$src)>;
- def : Pat<(add GR16:$src, 1), (INC16r GR16:$src)>;
- def : Pat<(add GR32:$src, 1), (INC32r GR32:$src)>;
- def : Pat<(add GR64:$src, 1), (INC64r GR64:$src)>;
- def : Pat<(add GR8:$src, -1), (DEC8r GR8:$src)>;
- def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>;
- def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>;
- def : Pat<(add GR64:$src, -1), (DEC64r GR64:$src)>;
-
- def : Pat<(X86add_flag_nocf GR8:$src, -1), (DEC8r GR8:$src)>;
- def : Pat<(X86add_flag_nocf GR16:$src, -1), (DEC16r GR16:$src)>;
- def : Pat<(X86add_flag_nocf GR32:$src, -1), (DEC32r GR32:$src)>;
- def : Pat<(X86add_flag_nocf GR64:$src, -1), (DEC64r GR64:$src)>;
- def : Pat<(X86sub_flag_nocf GR8:$src, -1), (INC8r GR8:$src)>;
- def : Pat<(X86sub_flag_nocf GR16:$src, -1), (INC16r GR16:$src)>;
- def : Pat<(X86sub_flag_nocf GR32:$src, -1), (INC32r GR32:$src)>;
- def : Pat<(X86sub_flag_nocf GR64:$src, -1), (INC64r GR64:$src)>;
-}
-
-// or reg/reg.
-def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr GR8 :$src1, GR8 :$src2)>;
-def : Pat<(or GR16:$src1, GR16:$src2), (OR16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(or GR32:$src1, GR32:$src2), (OR32rr GR32:$src1, GR32:$src2)>;
-def : Pat<(or GR64:$src1, GR64:$src2), (OR64rr GR64:$src1, GR64:$src2)>;
-
-// or reg/mem
-def : Pat<(or GR8:$src1, (loadi8 addr:$src2)),
- (OR8rm GR8:$src1, addr:$src2)>;
-def : Pat<(or GR16:$src1, (loadi16 addr:$src2)),
- (OR16rm GR16:$src1, addr:$src2)>;
-def : Pat<(or GR32:$src1, (loadi32 addr:$src2)),
- (OR32rm GR32:$src1, addr:$src2)>;
-def : Pat<(or GR64:$src1, (loadi64 addr:$src2)),
- (OR64rm GR64:$src1, addr:$src2)>;
-
-// or reg/imm
-def : Pat<(or GR8:$src1 , imm:$src2), (OR8ri GR8 :$src1, imm:$src2)>;
-def : Pat<(or GR16:$src1, imm:$src2), (OR16ri GR16:$src1, imm:$src2)>;
-def : Pat<(or GR32:$src1, imm:$src2), (OR32ri GR32:$src1, imm:$src2)>;
-def : Pat<(or GR64:$src1, i64immSExt32:$src2),
- (OR64ri32 GR64:$src1, i64immSExt32:$src2)>;
-
-// xor reg/reg
-def : Pat<(xor GR8 :$src1, GR8 :$src2), (XOR8rr GR8 :$src1, GR8 :$src2)>;
-def : Pat<(xor GR16:$src1, GR16:$src2), (XOR16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(xor GR32:$src1, GR32:$src2), (XOR32rr GR32:$src1, GR32:$src2)>;
-def : Pat<(xor GR64:$src1, GR64:$src2), (XOR64rr GR64:$src1, GR64:$src2)>;
-
-// xor reg/mem
-def : Pat<(xor GR8:$src1, (loadi8 addr:$src2)),
- (XOR8rm GR8:$src1, addr:$src2)>;
-def : Pat<(xor GR16:$src1, (loadi16 addr:$src2)),
- (XOR16rm GR16:$src1, addr:$src2)>;
-def : Pat<(xor GR32:$src1, (loadi32 addr:$src2)),
- (XOR32rm GR32:$src1, addr:$src2)>;
-def : Pat<(xor GR64:$src1, (loadi64 addr:$src2)),
- (XOR64rm GR64:$src1, addr:$src2)>;
-
-// xor reg/imm
-def : Pat<(xor GR8:$src1, imm:$src2),
- (XOR8ri GR8:$src1, imm:$src2)>;
-def : Pat<(xor GR16:$src1, imm:$src2),
- (XOR16ri GR16:$src1, imm:$src2)>;
-def : Pat<(xor GR32:$src1, imm:$src2),
- (XOR32ri GR32:$src1, imm:$src2)>;
-def : Pat<(xor GR64:$src1, i64immSExt32:$src2),
- (XOR64ri32 GR64:$src1, i64immSExt32:$src2)>;
-
-// and reg/reg
-def : Pat<(and GR8 :$src1, GR8 :$src2), (AND8rr GR8 :$src1, GR8 :$src2)>;
-def : Pat<(and GR16:$src1, GR16:$src2), (AND16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(and GR32:$src1, GR32:$src2), (AND32rr GR32:$src1, GR32:$src2)>;
-def : Pat<(and GR64:$src1, GR64:$src2), (AND64rr GR64:$src1, GR64:$src2)>;
-
-// and reg/mem
-def : Pat<(and GR8:$src1, (loadi8 addr:$src2)),
- (AND8rm GR8:$src1, addr:$src2)>;
-def : Pat<(and GR16:$src1, (loadi16 addr:$src2)),
- (AND16rm GR16:$src1, addr:$src2)>;
-def : Pat<(and GR32:$src1, (loadi32 addr:$src2)),
- (AND32rm GR32:$src1, addr:$src2)>;
-def : Pat<(and GR64:$src1, (loadi64 addr:$src2)),
- (AND64rm GR64:$src1, addr:$src2)>;
-
-// and reg/imm
-def : Pat<(and GR8:$src1, imm:$src2),
- (AND8ri GR8:$src1, imm:$src2)>;
-def : Pat<(and GR16:$src1, imm:$src2),
- (AND16ri GR16:$src1, imm:$src2)>;
-def : Pat<(and GR32:$src1, imm:$src2),
- (AND32ri GR32:$src1, imm:$src2)>;
-def : Pat<(and GR64:$src1, i64immSExt32:$src2),
- (AND64ri32 GR64:$src1, i64immSExt32:$src2)>;
-
// Bit scan instruction patterns to match explicit zero-undef behavior.
def : Pat<(cttz_zero_undef GR16:$src), (BSF16rr GR16:$src)>;
def : Pat<(cttz_zero_undef GR32:$src), (BSF32rr GR32:$src)>;
diff --git a/llvm/lib/Target/X86/X86InstrMisc.td b/llvm/lib/Target/X86/X86InstrMisc.td
index 97c625a64cfc..753cf62392a1 100644
--- a/llvm/lib/Target/X86/X86InstrMisc.td
+++ b/llvm/lib/Target/X86/X86InstrMisc.td
@@ -1523,28 +1523,28 @@ def MOVDIR64B64_EVEX : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem_GR64:$
// ENQCMD/S - Enqueue 64-byte command as user with 64-byte write atomicity
//
let SchedRW = [WriteStore], Defs = [EFLAGS] in {
- def ENQCMD16 : I<0xF8, MRMSrcMem, (outs), (ins GR16:$dst, i512mem:$src),
+ def ENQCMD16 : I<0xF8, MRMSrcMem, (outs), (ins GR16:$dst, i512mem_GR16:$src),
"enqcmd\t{$src, $dst|$dst, $src}",
[(set EFLAGS, (X86enqcmd GR16:$dst, addr:$src))]>,
T8, XD, AdSize16, Requires<[HasENQCMD, Not64BitMode]>;
- def ENQCMD32 : I<0xF8, MRMSrcMem, (outs), (ins GR32:$dst, i512mem:$src),
+ def ENQCMD32 : I<0xF8, MRMSrcMem, (outs), (ins GR32:$dst, i512mem_GR32:$src),
"enqcmd\t{$src, $dst|$dst, $src}",
[(set EFLAGS, (X86enqcmd GR32:$dst, addr:$src))]>,
T8, XD, AdSize32, Requires<[HasENQCMD]>;
- def ENQCMD64 : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem:$src),
+ def ENQCMD64 : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem_GR64:$src),
"enqcmd\t{$src, $dst|$dst, $src}",
[(set EFLAGS, (X86enqcmd GR64:$dst, addr:$src))]>,
T8, XD, AdSize64, Requires<[HasENQCMD, In64BitMode]>;
- def ENQCMDS16 : I<0xF8, MRMSrcMem, (outs), (ins GR16:$dst, i512mem:$src),
+ def ENQCMDS16 : I<0xF8, MRMSrcMem, (outs), (ins GR16:$dst, i512mem_GR16:$src),
"enqcmds\t{$src, $dst|$dst, $src}",
[(set EFLAGS, (X86enqcmds GR16:$dst, addr:$src))]>,
T8, XS, AdSize16, Requires<[HasENQCMD, Not64BitMode]>;
- def ENQCMDS32 : I<0xF8, MRMSrcMem, (outs), (ins GR32:$dst, i512mem:$src),
+ def ENQCMDS32 : I<0xF8, MRMSrcMem, (outs), (ins GR32:$dst, i512mem_GR32:$src),
"enqcmds\t{$src, $dst|$dst, $src}",
[(set EFLAGS, (X86enqcmds GR32:$dst, addr:$src))]>,
T8, XS, AdSize32, Requires<[HasENQCMD]>;
- def ENQCMDS64 : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem:$src),
+ def ENQCMDS64 : I<0xF8, MRMSrcMem, (outs), (ins GR64:$dst, i512mem_GR64:$src),
"enqcmds\t{$src, $dst|$dst, $src}",
[(set EFLAGS, (X86enqcmds GR64:$dst, addr:$src))]>,
T8, XS, AdSize64, Requires<[HasENQCMD, In64BitMode]>;
diff --git a/llvm/lib/Target/X86/X86PfmCounters.td b/llvm/lib/Target/X86/X86PfmCounters.td
index 49ef6efc6aec..48d689549709 100644
--- a/llvm/lib/Target/X86/X86PfmCounters.td
+++ b/llvm/lib/Target/X86/X86PfmCounters.td
@@ -18,6 +18,10 @@ def DefaultPfmCounters : ProcPfmCounters {}
def : PfmCountersDefaultBinding<DefaultPfmCounters>;
// Intel X86 Counters.
+defvar DefaultIntelPfmValidationCounters = [
+ PfmValidationCounter<InstructionRetired, "INSTRUCTIONS_RETIRED">
+];
+
def PentiumPfmCounters : ProcPfmCounters {
let CycleCounter = PfmCounter<"cpu_clk_unhalted">;
let UopsCounter = PfmCounter<"uops_retired">;
@@ -100,6 +104,7 @@ def SandyBridgePfmCounters : ProcPfmCounters {
PfmIssueCounter<"SBPort4", "uops_dispatched_port:port_4">,
PfmIssueCounter<"SBPort5", "uops_dispatched_port:port_5">
];
+ let ValidationCounters = DefaultIntelPfmValidationCounters;
}
def : PfmCountersBinding<"sandybridge", SandyBridgePfmCounters>;
def : PfmCountersBinding<"ivybridge", SandyBridgePfmCounters>;
@@ -117,6 +122,7 @@ def HaswellPfmCounters : ProcPfmCounters {
PfmIssueCounter<"HWPort6", "uops_executed_port:port_6">,
PfmIssueCounter<"HWPort7", "uops_executed_port:port_7">
];
+ let ValidationCounters = DefaultIntelPfmValidationCounters;
}
def : PfmCountersBinding<"haswell", HaswellPfmCounters>;
@@ -133,6 +139,7 @@ def BroadwellPfmCounters : ProcPfmCounters {
PfmIssueCounter<"BWPort6", "uops_executed_port:port_6">,
PfmIssueCounter<"BWPort7", "uops_executed_port:port_7">
];
+ let ValidationCounters = DefaultIntelPfmValidationCounters;
}
def : PfmCountersBinding<"broadwell", BroadwellPfmCounters>;
@@ -149,6 +156,7 @@ def SkylakeClientPfmCounters : ProcPfmCounters {
PfmIssueCounter<"SKLPort6", "uops_dispatched_port:port_6">,
PfmIssueCounter<"SKLPort7", "uops_dispatched_port:port_7">
];
+ let ValidationCounters = DefaultIntelPfmValidationCounters;
}
def : PfmCountersBinding<"skylake", SkylakeClientPfmCounters>;
@@ -165,6 +173,7 @@ def SkylakeServerPfmCounters : ProcPfmCounters {
PfmIssueCounter<"SKXPort6", "uops_dispatched_port:port_6">,
PfmIssueCounter<"SKXPort7", "uops_dispatched_port:port_7">
];
+ let ValidationCounters = DefaultIntelPfmValidationCounters;
}
def : PfmCountersBinding<"skylake-avx512", SkylakeServerPfmCounters>;
def : PfmCountersBinding<"cascadelake", SkylakeServerPfmCounters>;
@@ -182,6 +191,7 @@ def IceLakePfmCounters : ProcPfmCounters {
PfmIssueCounter<"ICXPort6", "uops_dispatched_port:port_6">,
PfmIssueCounter<"ICXPort78", "uops_dispatched_port:port_7_8">
];
+ let ValidationCounters = DefaultIntelPfmValidationCounters;
}
def : PfmCountersBinding<"icelake-client", IceLakePfmCounters>;
def : PfmCountersBinding<"icelake-server", IceLakePfmCounters>;
@@ -189,6 +199,10 @@ def : PfmCountersBinding<"rocketlake", IceLakePfmCounters>;
def : PfmCountersBinding<"tigerlake", IceLakePfmCounters>;
// AMD X86 Counters.
+defvar DefaultAMDPfmValidationCounters = [
+ PfmValidationCounter<InstructionRetired, "RETIRED_INSTRUCTIONS">
+];
+
// Set basic counters for AMD cpus that we know libpfm4 supports.
def DefaultAMDPfmCounters : ProcPfmCounters {
let CycleCounter = PfmCounter<"cpu_clk_unhalted">;
@@ -265,6 +279,7 @@ def ZnVer1PfmCounters : ProcPfmCounters {
PfmIssueCounter<"ZnAGU", "ls_dispatch:ld_st_dispatch + ls_dispatch:ld_dispatch + ls_dispatch:store_dispatch">,
PfmIssueCounter<"ZnDivider", "div_op_count">
];
+ let ValidationCounters = DefaultAMDPfmValidationCounters;
}
def : PfmCountersBinding<"znver1", ZnVer1PfmCounters>;
@@ -275,6 +290,7 @@ def ZnVer2PfmCounters : ProcPfmCounters {
PfmIssueCounter<"Zn2AGU", "ls_dispatch:ld_st_dispatch + ls_dispatch:ld_dispatch + ls_dispatch:store_dispatch">,
PfmIssueCounter<"Zn2Divider", "div_op_count">
];
+ let ValidationCounters = DefaultAMDPfmValidationCounters;
}
def : PfmCountersBinding<"znver2", ZnVer2PfmCounters>;
@@ -288,6 +304,7 @@ def ZnVer3PfmCounters : ProcPfmCounters {
PfmIssueCounter<"Zn3Store", "ls_dispatch:store_dispatch">,
PfmIssueCounter<"Zn3Divider", "div_op_count">
];
+ let ValidationCounters = DefaultAMDPfmValidationCounters;
}
def : PfmCountersBinding<"znver3", ZnVer3PfmCounters>;
@@ -302,5 +319,6 @@ def ZnVer4PfmCounters : ProcPfmCounters {
PfmIssueCounter<"Zn4Divider", "div_op_count">,
PfmIssueCounter<"Zn4AGU", "ls_dispatch:ld_st_dispatch + ls_dispatch:ld_dispatch + ls_dispatch:store_dispatch">
];
+ let ValidationCounters = DefaultAMDPfmValidationCounters;
}
def : PfmCountersBinding<"znver4", ZnVer4PfmCounters>;
diff --git a/llvm/lib/TargetParser/ARMTargetParserCommon.cpp b/llvm/lib/TargetParser/ARMTargetParserCommon.cpp
index 6d3a59d532fd..45d04f9bcbfb 100644
--- a/llvm/lib/TargetParser/ARMTargetParserCommon.cpp
+++ b/llvm/lib/TargetParser/ARMTargetParserCommon.cpp
@@ -140,13 +140,14 @@ ARM::EndianKind ARM::parseArchEndian(StringRef Arch) {
// an erroneous part of the spec.
bool ARM::parseBranchProtection(StringRef Spec, ParsedBranchProtection &PBP,
StringRef &Err) {
- PBP = {"none", "a_key", false, false};
+ PBP = {"none", "a_key", false, false, false};
if (Spec == "none")
return true; // defaults are ok
if (Spec == "standard") {
PBP.Scope = "non-leaf";
PBP.BranchTargetEnforcement = true;
+ PBP.GuardedControlStack = true;
return true;
}
@@ -173,6 +174,10 @@ bool ARM::parseBranchProtection(StringRef Spec, ParsedBranchProtection &PBP,
}
continue;
}
+ if (Opt == "gcs") {
+ PBP.GuardedControlStack = true;
+ continue;
+ }
if (Opt == "")
Err = "<empty>";
else
diff --git a/llvm/lib/TextAPI/InterfaceFile.cpp b/llvm/lib/TextAPI/InterfaceFile.cpp
index 3689ab919191..d712ed386825 100644
--- a/llvm/lib/TextAPI/InterfaceFile.cpp
+++ b/llvm/lib/TextAPI/InterfaceFile.cpp
@@ -24,17 +24,23 @@ void InterfaceFileRef::addTarget(const Target &Target) {
void InterfaceFile::addAllowableClient(StringRef InstallName,
const Target &Target) {
+ if (InstallName.empty())
+ return;
auto Client = addEntry(AllowableClients, InstallName);
Client->addTarget(Target);
}
void InterfaceFile::addReexportedLibrary(StringRef InstallName,
const Target &Target) {
+ if (InstallName.empty())
+ return;
auto Lib = addEntry(ReexportedLibraries, InstallName);
Lib->addTarget(Target);
}
void InterfaceFile::addParentUmbrella(const Target &Target_, StringRef Parent) {
+ if (Parent.empty())
+ return;
auto Iter = lower_bound(ParentUmbrellas, Target_,
[](const std::pair<Target, std::string> &LHS,
Target RHS) { return LHS.first < RHS; });
@@ -48,6 +54,8 @@ void InterfaceFile::addParentUmbrella(const Target &Target_, StringRef Parent) {
}
void InterfaceFile::addRPath(const Target &InputTarget, StringRef RPath) {
+ if (RPath.empty())
+ return;
using RPathEntryT = const std::pair<Target, std::string>;
RPathEntryT Entry(InputTarget, RPath);
auto Iter =
diff --git a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
index 70a3f3067d9d..0a6f69bc73d5 100644
--- a/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
+++ b/llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp
@@ -77,6 +77,16 @@ STATISTIC(MaxAllocVersionsThinBackend,
"allocation during ThinLTO backend");
STATISTIC(UnclonableAllocsThinBackend,
"Number of unclonable ambigous allocations during ThinLTO backend");
+STATISTIC(RemovedEdgesWithMismatchedCallees,
+ "Number of edges removed due to mismatched callees (profiled vs IR)");
+STATISTIC(FoundProfiledCalleeCount,
+ "Number of profiled callees found via tail calls");
+STATISTIC(FoundProfiledCalleeDepth,
+ "Aggregate depth of profiled callees found via tail calls");
+STATISTIC(FoundProfiledCalleeMaxDepth,
+ "Maximum depth of profiled callees found via tail calls");
+STATISTIC(FoundProfiledCalleeNonUniquelyCount,
+ "Number of profiled callees found via multiple tail call chains");
static cl::opt<std::string> DotFilePathPrefix(
"memprof-dot-file-path-prefix", cl::init(""), cl::Hidden,
@@ -104,6 +114,12 @@ static cl::opt<std::string> MemProfImportSummary(
cl::desc("Import summary to use for testing the ThinLTO backend via opt"),
cl::Hidden);
+static cl::opt<unsigned>
+ TailCallSearchDepth("memprof-tail-call-search-depth", cl::init(5),
+ cl::Hidden,
+ cl::desc("Max depth to recursively search for missing "
+ "frames through tail calls."));
+
namespace llvm {
// Indicate we are linking with an allocator that supports hot/cold operator
// new interfaces.
@@ -365,8 +381,7 @@ protected:
/// Save lists of calls with MemProf metadata in each function, for faster
/// iteration.
- std::vector<std::pair<FuncTy *, std::vector<CallInfo>>>
- FuncToCallsWithMetadata;
+ MapVector<FuncTy *, std::vector<CallInfo>> FuncToCallsWithMetadata;
/// Map from callsite node to the enclosing caller function.
std::map<const ContextNode *, const FuncTy *> NodeToCallingFunc;
@@ -411,9 +426,25 @@ private:
return static_cast<const DerivedCCG *>(this)->getStackId(IdOrIndex);
}
- /// Returns true if the given call targets the given function.
- bool calleeMatchesFunc(CallTy Call, const FuncTy *Func) {
- return static_cast<DerivedCCG *>(this)->calleeMatchesFunc(Call, Func);
+ /// Returns true if the given call targets the callee of the given edge, or if
+ /// we were able to identify the call chain through intermediate tail calls.
+ /// In the latter case new context nodes are added to the graph for the
+ /// identified tail calls, and their synthesized nodes are added to
+ /// TailCallToContextNodeMap. The EdgeIter is updated in either case to the
+ /// next element after the input position (either incremented or updated after
+ /// removing the old edge).
+ bool
+ calleesMatch(CallTy Call, EdgeIter &EI,
+ MapVector<CallInfo, ContextNode *> &TailCallToContextNodeMap);
+
+ /// Returns true if the given call targets the given function, or if we were
+ /// able to identify the call chain through intermediate tail calls (in which
+ /// case FoundCalleeChain will be populated).
+ bool calleeMatchesFunc(
+ CallTy Call, const FuncTy *Func, const FuncTy *CallerFunc,
+ std::vector<std::pair<CallTy, FuncTy *>> &FoundCalleeChain) {
+ return static_cast<DerivedCCG *>(this)->calleeMatchesFunc(
+ Call, Func, CallerFunc, FoundCalleeChain);
}
/// Get a list of nodes corresponding to the stack ids in the given
@@ -553,7 +584,13 @@ private:
Instruction *>;
uint64_t getStackId(uint64_t IdOrIndex) const;
- bool calleeMatchesFunc(Instruction *Call, const Function *Func);
+ bool calleeMatchesFunc(
+ Instruction *Call, const Function *Func, const Function *CallerFunc,
+ std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain);
+ bool findProfiledCalleeThroughTailCalls(
+ const Function *ProfiledCallee, Value *CurCallee, unsigned Depth,
+ std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
+ bool &FoundMultipleCalleeChains);
uint64_t getLastStackId(Instruction *Call);
std::vector<uint64_t> getStackIdsWithContextNodesForCall(Instruction *Call);
void updateAllocationCall(CallInfo &Call, AllocationType AllocType);
@@ -606,12 +643,31 @@ public:
function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
isPrevailing);
+ ~IndexCallsiteContextGraph() {
+ // Now that we are done with the graph it is safe to add the new
+ // CallsiteInfo structs to the function summary vectors. The graph nodes
+ // point into locations within these vectors, so we don't want to add them
+ // any earlier.
+ for (auto &I : FunctionCalleesToSynthesizedCallsiteInfos) {
+ auto *FS = I.first;
+ for (auto &Callsite : I.second)
+ FS->addCallsite(*Callsite.second);
+ }
+ }
+
private:
friend CallsiteContextGraph<IndexCallsiteContextGraph, FunctionSummary,
IndexCall>;
uint64_t getStackId(uint64_t IdOrIndex) const;
- bool calleeMatchesFunc(IndexCall &Call, const FunctionSummary *Func);
+ bool calleeMatchesFunc(
+ IndexCall &Call, const FunctionSummary *Func,
+ const FunctionSummary *CallerFunc,
+ std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain);
+ bool findProfiledCalleeThroughTailCalls(
+ ValueInfo ProfiledCallee, ValueInfo CurCallee, unsigned Depth,
+ std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
+ bool &FoundMultipleCalleeChains);
uint64_t getLastStackId(IndexCall &Call);
std::vector<uint64_t> getStackIdsWithContextNodesForCall(IndexCall &Call);
void updateAllocationCall(CallInfo &Call, AllocationType AllocType);
@@ -630,6 +686,16 @@ private:
std::map<const FunctionSummary *, ValueInfo> FSToVIMap;
const ModuleSummaryIndex &Index;
+ function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
+ isPrevailing;
+
+ // Saves/owns the callsite info structures synthesized for missing tail call
+ // frames that we discover while building the graph.
+ // It maps from the summary of the function making the tail call, to a map
+ // of callee ValueInfo to corresponding synthesized callsite info.
+ std::unordered_map<FunctionSummary *,
+ std::map<ValueInfo, std::unique_ptr<CallsiteInfo>>>
+ FunctionCalleesToSynthesizedCallsiteInfos;
};
} // namespace
@@ -1493,7 +1559,7 @@ ModuleCallsiteContextGraph::ModuleCallsiteContextGraph(
}
}
if (!CallsWithMetadata.empty())
- FuncToCallsWithMetadata.push_back({&F, CallsWithMetadata});
+ FuncToCallsWithMetadata[&F] = CallsWithMetadata;
}
if (DumpCCG) {
@@ -1518,7 +1584,7 @@ IndexCallsiteContextGraph::IndexCallsiteContextGraph(
ModuleSummaryIndex &Index,
function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
isPrevailing)
- : Index(Index) {
+ : Index(Index), isPrevailing(isPrevailing) {
for (auto &I : Index) {
auto VI = Index.getValueInfo(I);
for (auto &S : VI.getSummaryList()) {
@@ -1572,7 +1638,7 @@ IndexCallsiteContextGraph::IndexCallsiteContextGraph(
CallsWithMetadata.push_back({&SN});
if (!CallsWithMetadata.empty())
- FuncToCallsWithMetadata.push_back({FS, CallsWithMetadata});
+ FuncToCallsWithMetadata[FS] = CallsWithMetadata;
if (!FS->allocs().empty() || !FS->callsites().empty())
FSToVIMap[FS] = VI;
@@ -1604,6 +1670,11 @@ void CallsiteContextGraph<DerivedCCG, FuncTy,
// this transformation for regular LTO, and for ThinLTO we can simulate that
// effect in the summary and perform the actual speculative devirtualization
// while cloning in the ThinLTO backend.
+
+ // Keep track of the new nodes synthesized for discovered tail calls missing
+ // from the profiled contexts.
+ MapVector<CallInfo, ContextNode *> TailCallToContextNodeMap;
+
for (auto Entry = NonAllocationCallToContextNodeMap.begin();
Entry != NonAllocationCallToContextNodeMap.end();) {
auto *Node = Entry->second;
@@ -1611,13 +1682,17 @@ void CallsiteContextGraph<DerivedCCG, FuncTy,
// Check all node callees and see if in the same function.
bool Removed = false;
auto Call = Node->Call.call();
- for (auto &Edge : Node->CalleeEdges) {
- if (!Edge->Callee->hasCall())
+ for (auto EI = Node->CalleeEdges.begin(); EI != Node->CalleeEdges.end();) {
+ auto Edge = *EI;
+ if (!Edge->Callee->hasCall()) {
+ ++EI;
continue;
+ }
assert(NodeToCallingFunc.count(Edge->Callee));
// Check if the called function matches that of the callee node.
- if (calleeMatchesFunc(Call, NodeToCallingFunc[Edge->Callee]))
+ if (calleesMatch(Call, EI, TailCallToContextNodeMap))
continue;
+ RemovedEdgesWithMismatchedCallees++;
// Work around by setting Node to have a null call, so it gets
// skipped during cloning. Otherwise assignFunctions will assert
// because its data structures are not designed to handle this case.
@@ -1629,6 +1704,11 @@ void CallsiteContextGraph<DerivedCCG, FuncTy,
if (!Removed)
Entry++;
}
+
+ // Add the new nodes after the above loop so that the iteration is not
+ // invalidated.
+ for (auto &[Call, Node] : TailCallToContextNodeMap)
+ NonAllocationCallToContextNodeMap[Call] = Node;
}
uint64_t ModuleCallsiteContextGraph::getStackId(uint64_t IdOrIndex) const {
@@ -1642,8 +1722,173 @@ uint64_t IndexCallsiteContextGraph::getStackId(uint64_t IdOrIndex) const {
return Index.getStackIdAtIndex(IdOrIndex);
}
-bool ModuleCallsiteContextGraph::calleeMatchesFunc(Instruction *Call,
- const Function *Func) {
+template <typename DerivedCCG, typename FuncTy, typename CallTy>
+bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::calleesMatch(
+ CallTy Call, EdgeIter &EI,
+ MapVector<CallInfo, ContextNode *> &TailCallToContextNodeMap) {
+ auto Edge = *EI;
+ const FuncTy *ProfiledCalleeFunc = NodeToCallingFunc[Edge->Callee];
+ const FuncTy *CallerFunc = NodeToCallingFunc[Edge->Caller];
+ // Will be populated in order of callee to caller if we find a chain of tail
+ // calls between the profiled caller and callee.
+ std::vector<std::pair<CallTy, FuncTy *>> FoundCalleeChain;
+ if (!calleeMatchesFunc(Call, ProfiledCalleeFunc, CallerFunc,
+ FoundCalleeChain)) {
+ ++EI;
+ return false;
+ }
+
+ // The usual case where the profiled callee matches that of the IR/summary.
+ if (FoundCalleeChain.empty()) {
+ ++EI;
+ return true;
+ }
+
+ auto AddEdge = [Edge, &EI](ContextNode *Caller, ContextNode *Callee) {
+ auto *CurEdge = Callee->findEdgeFromCaller(Caller);
+ // If there is already an edge between these nodes, simply update it and
+ // return.
+ if (CurEdge) {
+ CurEdge->ContextIds.insert(Edge->ContextIds.begin(),
+ Edge->ContextIds.end());
+ CurEdge->AllocTypes |= Edge->AllocTypes;
+ return;
+ }
+ // Otherwise, create a new edge and insert it into the caller and callee
+ // lists.
+ auto NewEdge = std::make_shared<ContextEdge>(
+ Callee, Caller, Edge->AllocTypes, Edge->ContextIds);
+ Callee->CallerEdges.push_back(NewEdge);
+ if (Caller == Edge->Caller) {
+ // If we are inserting the new edge into the current edge's caller, insert
+ // the new edge before the current iterator position, and then increment
+ // back to the current edge.
+ EI = Caller->CalleeEdges.insert(EI, NewEdge);
+ ++EI;
+ assert(*EI == Edge &&
+ "Iterator position not restored after insert and increment");
+ } else
+ Caller->CalleeEdges.push_back(NewEdge);
+ };
+
+ // Create new nodes for each found callee and connect in between the profiled
+ // caller and callee.
+ auto *CurCalleeNode = Edge->Callee;
+ for (auto &[NewCall, Func] : FoundCalleeChain) {
+ ContextNode *NewNode = nullptr;
+ // First check if we have already synthesized a node for this tail call.
+ if (TailCallToContextNodeMap.count(NewCall)) {
+ NewNode = TailCallToContextNodeMap[NewCall];
+ NewNode->ContextIds.insert(Edge->ContextIds.begin(),
+ Edge->ContextIds.end());
+ NewNode->AllocTypes |= Edge->AllocTypes;
+ } else {
+ FuncToCallsWithMetadata[Func].push_back({NewCall});
+ // Create Node and record node info.
+ NodeOwner.push_back(
+ std::make_unique<ContextNode>(/*IsAllocation=*/false, NewCall));
+ NewNode = NodeOwner.back().get();
+ NodeToCallingFunc[NewNode] = Func;
+ TailCallToContextNodeMap[NewCall] = NewNode;
+ NewNode->ContextIds = Edge->ContextIds;
+ NewNode->AllocTypes = Edge->AllocTypes;
+ }
+
+ // Hook up node to its callee node
+ AddEdge(NewNode, CurCalleeNode);
+
+ CurCalleeNode = NewNode;
+ }
+
+ // Hook up edge's original caller to new callee node.
+ AddEdge(Edge->Caller, CurCalleeNode);
+
+ // Remove old edge
+ Edge->Callee->eraseCallerEdge(Edge.get());
+ EI = Edge->Caller->CalleeEdges.erase(EI);
+
+ return true;
+}
+
+bool ModuleCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
+ const Function *ProfiledCallee, Value *CurCallee, unsigned Depth,
+ std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain,
+ bool &FoundMultipleCalleeChains) {
+ // Stop recursive search if we have already explored the maximum specified
+ // depth.
+ if (Depth > TailCallSearchDepth)
+ return false;
+
+ auto SaveCallsiteInfo = [&](Instruction *Callsite, Function *F) {
+ FoundCalleeChain.push_back({Callsite, F});
+ };
+
+ auto *CalleeFunc = dyn_cast<Function>(CurCallee);
+ if (!CalleeFunc) {
+ auto *Alias = dyn_cast<GlobalAlias>(CurCallee);
+ assert(Alias);
+ CalleeFunc = dyn_cast<Function>(Alias->getAliasee());
+ assert(CalleeFunc);
+ }
+
+ // Look for tail calls in this function, and check if they either call the
+ // profiled callee directly, or indirectly (via a recursive search).
+ // Only succeed if there is a single unique tail call chain found between the
+ // profiled caller and callee, otherwise we could perform incorrect cloning.
+ bool FoundSingleCalleeChain = false;
+ for (auto &BB : *CalleeFunc) {
+ for (auto &I : BB) {
+ auto *CB = dyn_cast<CallBase>(&I);
+ if (!CB || !CB->isTailCall())
+ continue;
+ auto *CalledValue = CB->getCalledOperand();
+ auto *CalledFunction = CB->getCalledFunction();
+ if (CalledValue && !CalledFunction) {
+ CalledValue = CalledValue->stripPointerCasts();
+ // Stripping pointer casts can reveal a called function.
+ CalledFunction = dyn_cast<Function>(CalledValue);
+ }
+ // Check if this is an alias to a function. If so, get the
+ // called aliasee for the checks below.
+ if (auto *GA = dyn_cast<GlobalAlias>(CalledValue)) {
+ assert(!CalledFunction &&
+ "Expected null called function in callsite for alias");
+ CalledFunction = dyn_cast<Function>(GA->getAliaseeObject());
+ }
+ if (!CalledFunction)
+ continue;
+ if (CalledFunction == ProfiledCallee) {
+ if (FoundSingleCalleeChain) {
+ FoundMultipleCalleeChains = true;
+ return false;
+ }
+ FoundSingleCalleeChain = true;
+ FoundProfiledCalleeCount++;
+ FoundProfiledCalleeDepth += Depth;
+ if (Depth > FoundProfiledCalleeMaxDepth)
+ FoundProfiledCalleeMaxDepth = Depth;
+ SaveCallsiteInfo(&I, CalleeFunc);
+ } else if (findProfiledCalleeThroughTailCalls(
+ ProfiledCallee, CalledFunction, Depth + 1,
+ FoundCalleeChain, FoundMultipleCalleeChains)) {
+ if (FoundMultipleCalleeChains)
+ return false;
+ if (FoundSingleCalleeChain) {
+ FoundMultipleCalleeChains = true;
+ return false;
+ }
+ FoundSingleCalleeChain = true;
+ SaveCallsiteInfo(&I, CalleeFunc);
+ }
+ }
+ }
+
+ return FoundSingleCalleeChain;
+}
+
+bool ModuleCallsiteContextGraph::calleeMatchesFunc(
+ Instruction *Call, const Function *Func, const Function *CallerFunc,
+ std::vector<std::pair<Instruction *, Function *>> &FoundCalleeChain) {
auto *CB = dyn_cast<CallBase>(Call);
if (!CB->getCalledOperand())
return false;
@@ -1652,11 +1897,117 @@ bool ModuleCallsiteContextGraph::calleeMatchesFunc(Instruction *Call,
if (CalleeFunc == Func)
return true;
auto *Alias = dyn_cast<GlobalAlias>(CalleeVal);
- return Alias && Alias->getAliasee() == Func;
+ if (Alias && Alias->getAliasee() == Func)
+ return true;
+
+ // Recursively search for the profiled callee through tail calls starting with
+ // the actual Callee. The discovered tail call chain is saved in
+ // FoundCalleeChain, and we will fixup the graph to include these callsites
+ // after returning.
+ // FIXME: We will currently redo the same recursive walk if we find the same
+ // mismatched callee from another callsite. We can improve this with more
+ // bookkeeping of the created chain of new nodes for each mismatch.
+ unsigned Depth = 1;
+ bool FoundMultipleCalleeChains = false;
+ if (!findProfiledCalleeThroughTailCalls(Func, CalleeVal, Depth,
+ FoundCalleeChain,
+ FoundMultipleCalleeChains)) {
+ LLVM_DEBUG(dbgs() << "Not found through unique tail call chain: "
+ << Func->getName() << " from " << CallerFunc->getName()
+ << " that actually called " << CalleeVal->getName()
+ << (FoundMultipleCalleeChains
+ ? " (found multiple possible chains)"
+ : "")
+ << "\n");
+ if (FoundMultipleCalleeChains)
+ FoundProfiledCalleeNonUniquelyCount++;
+ return false;
+ }
+
+ return true;
}
-bool IndexCallsiteContextGraph::calleeMatchesFunc(IndexCall &Call,
- const FunctionSummary *Func) {
+bool IndexCallsiteContextGraph::findProfiledCalleeThroughTailCalls(
+ ValueInfo ProfiledCallee, ValueInfo CurCallee, unsigned Depth,
+ std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain,
+ bool &FoundMultipleCalleeChains) {
+ // Stop recursive search if we have already explored the maximum specified
+ // depth.
+ if (Depth > TailCallSearchDepth)
+ return false;
+
+ auto CreateAndSaveCallsiteInfo = [&](ValueInfo Callee, FunctionSummary *FS) {
+ // Make a CallsiteInfo for each discovered callee, if one hasn't already
+ // been synthesized.
+ if (!FunctionCalleesToSynthesizedCallsiteInfos.count(FS) ||
+ !FunctionCalleesToSynthesizedCallsiteInfos[FS].count(Callee))
+ // StackIds is empty (we don't have debug info available in the index for
+ // these callsites)
+ FunctionCalleesToSynthesizedCallsiteInfos[FS][Callee] =
+ std::make_unique<CallsiteInfo>(Callee, SmallVector<unsigned>());
+ CallsiteInfo *NewCallsiteInfo =
+ FunctionCalleesToSynthesizedCallsiteInfos[FS][Callee].get();
+ FoundCalleeChain.push_back({NewCallsiteInfo, FS});
+ };
+
+ // Look for tail calls in this function, and check if they either call the
+ // profiled callee directly, or indirectly (via a recursive search).
+ // Only succeed if there is a single unique tail call chain found between the
+ // profiled caller and callee, otherwise we could perform incorrect cloning.
+ bool FoundSingleCalleeChain = false;
+ for (auto &S : CurCallee.getSummaryList()) {
+ if (!GlobalValue::isLocalLinkage(S->linkage()) &&
+ !isPrevailing(CurCallee.getGUID(), S.get()))
+ continue;
+ auto *FS = dyn_cast<FunctionSummary>(S->getBaseObject());
+ if (!FS)
+ continue;
+ auto FSVI = CurCallee;
+ auto *AS = dyn_cast<AliasSummary>(S.get());
+ if (AS)
+ FSVI = AS->getAliaseeVI();
+ for (auto &CallEdge : FS->calls()) {
+ if (!CallEdge.second.hasTailCall())
+ continue;
+ if (CallEdge.first == ProfiledCallee) {
+ if (FoundSingleCalleeChain) {
+ FoundMultipleCalleeChains = true;
+ return false;
+ }
+ FoundSingleCalleeChain = true;
+ FoundProfiledCalleeCount++;
+ FoundProfiledCalleeDepth += Depth;
+ if (Depth > FoundProfiledCalleeMaxDepth)
+ FoundProfiledCalleeMaxDepth = Depth;
+ CreateAndSaveCallsiteInfo(CallEdge.first, FS);
+ // Add FS to FSToVIMap in case it isn't already there.
+ assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
+ FSToVIMap[FS] = FSVI;
+ } else if (findProfiledCalleeThroughTailCalls(
+ ProfiledCallee, CallEdge.first, Depth + 1,
+ FoundCalleeChain, FoundMultipleCalleeChains)) {
+ if (FoundMultipleCalleeChains)
+ return false;
+ if (FoundSingleCalleeChain) {
+ FoundMultipleCalleeChains = true;
+ return false;
+ }
+ FoundSingleCalleeChain = true;
+ CreateAndSaveCallsiteInfo(CallEdge.first, FS);
+ // Add FS to FSToVIMap in case it isn't already there.
+ assert(!FSToVIMap.count(FS) || FSToVIMap[FS] == FSVI);
+ FSToVIMap[FS] = FSVI;
+ }
+ }
+ }
+
+ return FoundSingleCalleeChain;
+}
+
+bool IndexCallsiteContextGraph::calleeMatchesFunc(
+ IndexCall &Call, const FunctionSummary *Func,
+ const FunctionSummary *CallerFunc,
+ std::vector<std::pair<IndexCall, FunctionSummary *>> &FoundCalleeChain) {
ValueInfo Callee =
dyn_cast_if_present<CallsiteInfo *>(Call.getBase())->Callee;
// If there is no summary list then this is a call to an externally defined
@@ -1666,11 +2017,38 @@ bool IndexCallsiteContextGraph::calleeMatchesFunc(IndexCall &Call,
? nullptr
: dyn_cast<AliasSummary>(Callee.getSummaryList()[0].get());
assert(FSToVIMap.count(Func));
- return Callee == FSToVIMap[Func] ||
- // If callee is an alias, check the aliasee, since only function
- // summary base objects will contain the stack node summaries and thus
- // get a context node.
- (Alias && Alias->getAliaseeVI() == FSToVIMap[Func]);
+ auto FuncVI = FSToVIMap[Func];
+ if (Callee == FuncVI ||
+ // If callee is an alias, check the aliasee, since only function
+ // summary base objects will contain the stack node summaries and thus
+ // get a context node.
+ (Alias && Alias->getAliaseeVI() == FuncVI))
+ return true;
+
+ // Recursively search for the profiled callee through tail calls starting with
+ // the actual Callee. The discovered tail call chain is saved in
+ // FoundCalleeChain, and we will fixup the graph to include these callsites
+ // after returning.
+ // FIXME: We will currently redo the same recursive walk if we find the same
+ // mismatched callee from another callsite. We can improve this with more
+ // bookkeeping of the created chain of new nodes for each mismatch.
+ unsigned Depth = 1;
+ bool FoundMultipleCalleeChains = false;
+ if (!findProfiledCalleeThroughTailCalls(
+ FuncVI, Callee, Depth, FoundCalleeChain, FoundMultipleCalleeChains)) {
+ LLVM_DEBUG(dbgs() << "Not found through unique tail call chain: " << FuncVI
+ << " from " << FSToVIMap[CallerFunc]
+ << " that actually called " << Callee
+ << (FoundMultipleCalleeChains
+ ? " (found multiple possible chains)"
+ : "")
+ << "\n");
+ if (FoundMultipleCalleeChains)
+ FoundProfiledCalleeNonUniquelyCount++;
+ return false;
+ }
+
+ return true;
}
static std::string getAllocTypeString(uint8_t AllocTypes) {
@@ -2533,6 +2911,9 @@ bool CallsiteContextGraph<DerivedCCG, FuncTy, CallTy>::assignFunctions() {
// that were previously assigned to call PreviousAssignedFuncClone,
// to record that they now call NewFuncClone.
for (auto CE : Clone->CallerEdges) {
+ // Skip any that have been removed on an earlier iteration.
+ if (!CE)
+ continue;
// Ignore any caller that does not have a recorded callsite Call.
if (!CE->Caller->hasCall())
continue;
@@ -2945,6 +3326,42 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
NumClonesCreated = NumClones;
};
+ auto CloneCallsite = [&](const CallsiteInfo &StackNode, CallBase *CB,
+ Function *CalledFunction) {
+ // Perform cloning if not yet done.
+ CloneFuncIfNeeded(/*NumClones=*/StackNode.Clones.size());
+
+ // Should have skipped indirect calls via mayHaveMemprofSummary.
+ assert(CalledFunction);
+ assert(!IsMemProfClone(*CalledFunction));
+
+ // Update the calls per the summary info.
+ // Save orig name since it gets updated in the first iteration
+ // below.
+ auto CalleeOrigName = CalledFunction->getName();
+ for (unsigned J = 0; J < StackNode.Clones.size(); J++) {
+ // Do nothing if this version calls the original version of its
+ // callee.
+ if (!StackNode.Clones[J])
+ continue;
+ auto NewF = M.getOrInsertFunction(
+ getMemProfFuncName(CalleeOrigName, StackNode.Clones[J]),
+ CalledFunction->getFunctionType());
+ CallBase *CBClone;
+ // Copy 0 is the original function.
+ if (!J)
+ CBClone = CB;
+ else
+ CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
+ CBClone->setCalledFunction(NewF);
+ ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CBClone)
+ << ore::NV("Call", CBClone) << " in clone "
+ << ore::NV("Caller", CBClone->getFunction())
+ << " assigned to call function clone "
+ << ore::NV("Callee", NewF.getCallee()));
+ }
+ };
+
// Locate the summary for F.
ValueInfo TheFnVI = findValueInfoForFunc(F, M, ImportSummary);
// If not found, this could be an imported local (see comment in
@@ -2974,6 +3391,23 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
auto SI = FS->callsites().begin();
auto AI = FS->allocs().begin();
+ // To handle callsite infos synthesized for tail calls which have missing
+ // frames in the profiled context, map callee VI to the synthesized callsite
+ // info.
+ DenseMap<ValueInfo, CallsiteInfo> MapTailCallCalleeVIToCallsite;
+ // Iterate the callsites for this function in reverse, since we place all
+ // those synthesized for tail calls at the end.
+ for (auto CallsiteIt = FS->callsites().rbegin();
+ CallsiteIt != FS->callsites().rend(); CallsiteIt++) {
+ auto &Callsite = *CallsiteIt;
+ // Stop as soon as we see a non-synthesized callsite info (see comment
+ // above loop). All the entries added for discovered tail calls have empty
+ // stack ids.
+ if (!Callsite.StackIdIndices.empty())
+ break;
+ MapTailCallCalleeVIToCallsite.insert({Callsite.Callee, Callsite});
+ }
+
// Assume for now that the instructions are in the exact same order
// as when the summary was created, but confirm this is correct by
// matching the stack ids.
@@ -3126,37 +3560,16 @@ bool MemProfContextDisambiguation::applyImport(Module &M) {
}
#endif
- // Perform cloning if not yet done.
- CloneFuncIfNeeded(/*NumClones=*/StackNode.Clones.size());
-
- // Should have skipped indirect calls via mayHaveMemprofSummary.
- assert(CalledFunction);
- assert(!IsMemProfClone(*CalledFunction));
-
- // Update the calls per the summary info.
- // Save orig name since it gets updated in the first iteration
- // below.
- auto CalleeOrigName = CalledFunction->getName();
- for (unsigned J = 0; J < StackNode.Clones.size(); J++) {
- // Do nothing if this version calls the original version of its
- // callee.
- if (!StackNode.Clones[J])
- continue;
- auto NewF = M.getOrInsertFunction(
- getMemProfFuncName(CalleeOrigName, StackNode.Clones[J]),
- CalledFunction->getFunctionType());
- CallBase *CBClone;
- // Copy 0 is the original function.
- if (!J)
- CBClone = CB;
- else
- CBClone = cast<CallBase>((*VMaps[J - 1])[CB]);
- CBClone->setCalledFunction(NewF);
- ORE.emit(OptimizationRemark(DEBUG_TYPE, "MemprofCall", CBClone)
- << ore::NV("Call", CBClone) << " in clone "
- << ore::NV("Caller", CBClone->getFunction())
- << " assigned to call function clone "
- << ore::NV("Callee", NewF.getCallee()));
+ CloneCallsite(StackNode, CB, CalledFunction);
+ } else if (CB->isTailCall()) {
+ // Locate the synthesized callsite info for the callee VI, if any was
+ // created, and use that for cloning.
+ ValueInfo CalleeVI =
+ findValueInfoForFunc(*CalledFunction, M, ImportSummary);
+ if (CalleeVI && MapTailCallCalleeVIToCallsite.count(CalleeVI)) {
+ auto Callsite = MapTailCallCalleeVIToCallsite.find(CalleeVI);
+ assert(Callsite != MapTailCallCalleeVIToCallsite.end());
+ CloneCallsite(Callsite->second, CB, CalledFunction);
}
}
// Memprof and callsite metadata on memory allocations no longer needed.
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 96b612254ca5..c7e6f32c5406 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1723,6 +1723,30 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
I, Builder.CreateIntrinsic(Intrinsic::ctpop, {I.getType()},
{Builder.CreateOr(A, B)}));
+ // Fold the log2_ceil idiom:
+ // zext(ctpop(A) >u/!= 1) + (ctlz(A, true) ^ (BW - 1))
+ // -->
+ // BW - ctlz(A - 1, false)
+ const APInt *XorC;
+ if (match(&I,
+ m_c_Add(
+ m_ZExt(m_ICmp(Pred, m_Intrinsic<Intrinsic::ctpop>(m_Value(A)),
+ m_One())),
+ m_OneUse(m_ZExtOrSelf(m_OneUse(m_Xor(
+ m_OneUse(m_TruncOrSelf(m_OneUse(
+ m_Intrinsic<Intrinsic::ctlz>(m_Deferred(A), m_One())))),
+ m_APInt(XorC))))))) &&
+ (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_NE) &&
+ *XorC == A->getType()->getScalarSizeInBits() - 1) {
+ Value *Sub = Builder.CreateAdd(A, Constant::getAllOnesValue(A->getType()));
+ Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {A->getType()},
+ {Sub, Builder.getFalse()});
+ Value *Ret = Builder.CreateSub(
+ ConstantInt::get(A->getType(), A->getType()->getScalarSizeInBits()),
+ Ctlz, "", /*HasNUW*/ true, /*HasNSW*/ true);
+ return replaceInstUsesWith(I, Builder.CreateZExtOrTrunc(Ret, I.getType()));
+ }
+
if (Instruction *Res = foldSquareSumInt(I))
return Res;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index c03f50d75814..0620752e3213 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -46,44 +46,6 @@ static Value *getFCmpValue(unsigned Code, Value *LHS, Value *RHS,
return Builder.CreateFCmp(NewPred, LHS, RHS);
}
-/// Transform BITWISE_OP(BSWAP(A),BSWAP(B)) or
-/// BITWISE_OP(BSWAP(A), Constant) to BSWAP(BITWISE_OP(A, B))
-/// \param I Binary operator to transform.
-/// \return Pointer to node that must replace the original binary operator, or
-/// null pointer if no transformation was made.
-static Value *SimplifyBSwap(BinaryOperator &I,
- InstCombiner::BuilderTy &Builder) {
- assert(I.isBitwiseLogicOp() && "Unexpected opcode for bswap simplifying");
-
- Value *OldLHS = I.getOperand(0);
- Value *OldRHS = I.getOperand(1);
-
- Value *NewLHS;
- if (!match(OldLHS, m_BSwap(m_Value(NewLHS))))
- return nullptr;
-
- Value *NewRHS;
- const APInt *C;
-
- if (match(OldRHS, m_BSwap(m_Value(NewRHS)))) {
- // OP( BSWAP(x), BSWAP(y) ) -> BSWAP( OP(x, y) )
- if (!OldLHS->hasOneUse() && !OldRHS->hasOneUse())
- return nullptr;
- // NewRHS initialized by the matcher.
- } else if (match(OldRHS, m_APInt(C))) {
- // OP( BSWAP(x), CONSTANT ) -> BSWAP( OP(x, BSWAP(CONSTANT) ) )
- if (!OldLHS->hasOneUse())
- return nullptr;
- NewRHS = ConstantInt::get(I.getType(), C->byteSwap());
- } else
- return nullptr;
-
- Value *BinOp = Builder.CreateBinOp(I.getOpcode(), NewLHS, NewRHS);
- Function *F = Intrinsic::getDeclaration(I.getModule(), Intrinsic::bswap,
- I.getType());
- return Builder.CreateCall(F, BinOp);
-}
-
/// Emit a computation of: (V >= Lo && V < Hi) if Inside is true, otherwise
/// (V < Lo || V >= Hi). This method expects that Lo < Hi. IsSigned indicates
/// whether to treat V, Lo, and Hi as signed or not.
@@ -2159,6 +2121,64 @@ Instruction *InstCombinerImpl::foldBinOpOfDisplacedShifts(BinaryOperator &I) {
return BinaryOperator::Create(ShiftOp, NewC, ShAmt);
}
+// Fold and/or/xor with two equal intrinsic IDs:
+// bitwise(fshl (A, B, ShAmt), fshl(C, D, ShAmt))
+// -> fshl(bitwise(A, C), bitwise(B, D), ShAmt)
+// bitwise(fshr (A, B, ShAmt), fshr(C, D, ShAmt))
+// -> fshr(bitwise(A, C), bitwise(B, D), ShAmt)
+// bitwise(bswap(A), bswap(B)) -> bswap(bitwise(A, B))
+// bitwise(bswap(A), C) -> bswap(bitwise(A, bswap(C)))
+// bitwise(bitreverse(A), bitreverse(B)) -> bitreverse(bitwise(A, B))
+// bitwise(bitreverse(A), C) -> bitreverse(bitwise(A, bitreverse(C)))
+static Instruction *
+foldBitwiseLogicWithIntrinsics(BinaryOperator &I,
+ InstCombiner::BuilderTy &Builder) {
+ assert(I.isBitwiseLogicOp() && "Should and/or/xor");
+ if (!I.getOperand(0)->hasOneUse())
+ return nullptr;
+ IntrinsicInst *X = dyn_cast<IntrinsicInst>(I.getOperand(0));
+ if (!X)
+ return nullptr;
+
+ IntrinsicInst *Y = dyn_cast<IntrinsicInst>(I.getOperand(1));
+ if (Y && (!Y->hasOneUse() || X->getIntrinsicID() != Y->getIntrinsicID()))
+ return nullptr;
+
+ Intrinsic::ID IID = X->getIntrinsicID();
+ const APInt *RHSC;
+ // Try to match constant RHS.
+ if (!Y && (!(IID == Intrinsic::bswap || IID == Intrinsic::bitreverse) ||
+ !match(I.getOperand(1), m_APInt(RHSC))))
+ return nullptr;
+
+ switch (IID) {
+ case Intrinsic::fshl:
+ case Intrinsic::fshr: {
+ if (X->getOperand(2) != Y->getOperand(2))
+ return nullptr;
+ Value *NewOp0 =
+ Builder.CreateBinOp(I.getOpcode(), X->getOperand(0), Y->getOperand(0));
+ Value *NewOp1 =
+ Builder.CreateBinOp(I.getOpcode(), X->getOperand(1), Y->getOperand(1));
+ Function *F = Intrinsic::getDeclaration(I.getModule(), IID, I.getType());
+ return CallInst::Create(F, {NewOp0, NewOp1, X->getOperand(2)});
+ }
+ case Intrinsic::bswap:
+ case Intrinsic::bitreverse: {
+ Value *NewOp0 = Builder.CreateBinOp(
+ I.getOpcode(), X->getOperand(0),
+ Y ? Y->getOperand(0)
+ : ConstantInt::get(I.getType(), IID == Intrinsic::bswap
+ ? RHSC->byteSwap()
+ : RHSC->reverseBits()));
+ Function *F = Intrinsic::getDeclaration(I.getModule(), IID, I.getType());
+ return CallInst::Create(F, {NewOp0});
+ }
+ default:
+ return nullptr;
+ }
+}
+
// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
// here. We should standardize that construct where it is needed or choose some
// other way to ensure that commutated variants of patterns are not missed.
@@ -2194,9 +2214,6 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
if (Value *V = foldUsingDistributiveLaws(I))
return replaceInstUsesWith(I, V);
- if (Value *V = SimplifyBSwap(I, Builder))
- return replaceInstUsesWith(I, V);
-
if (Instruction *R = foldBinOpShiftWithShift(I))
return R;
@@ -2688,6 +2705,9 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
if (Instruction *Res = foldBinOpOfDisplacedShifts(I))
return Res;
+ if (Instruction *Res = foldBitwiseLogicWithIntrinsics(I, Builder))
+ return Res;
+
return nullptr;
}
@@ -3347,9 +3367,6 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
if (Value *V = foldUsingDistributiveLaws(I))
return replaceInstUsesWith(I, V);
- if (Value *V = SimplifyBSwap(I, Builder))
- return replaceInstUsesWith(I, V);
-
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
Type *Ty = I.getType();
if (Ty->isIntOrIntVectorTy(1)) {
@@ -3884,6 +3901,9 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, *C1 | *C2));
}
+ if (Instruction *Res = foldBitwiseLogicWithIntrinsics(I, Builder))
+ return Res;
+
return nullptr;
}
@@ -4507,9 +4527,6 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
if (SimplifyDemandedInstructionBits(I))
return &I;
- if (Value *V = SimplifyBSwap(I, Builder))
- return replaceInstUsesWith(I, V);
-
if (Instruction *R = foldNot(I))
return R;
@@ -4799,5 +4816,8 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
if (Instruction *Res = foldBinOpOfDisplacedShifts(I))
return Res;
+ if (Instruction *Res = foldBitwiseLogicWithIntrinsics(I, Builder))
+ return Res;
+
return nullptr;
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 40b48699f758..64fbd5543a9e 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1884,6 +1884,10 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
return crossLogicOpFold;
}
+ // Try to fold into bitreverse if bswap is the root of the expression tree.
+ if (Instruction *BitOp = matchBSwapOrBitReverse(*II, /*MatchBSwaps*/ false,
+ /*MatchBitReversals*/ true))
+ return BitOp;
break;
}
case Intrinsic::masked_load:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index ab55f235920a..21bfc91148bf 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -1704,11 +1704,11 @@ Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI,
if (CmpRHS != CmpLHS && isa<Constant>(CmpRHS) && !isa<Constant>(CmpLHS)) {
if (CmpLHS == TrueVal && Pred == ICmpInst::ICMP_EQ) {
// Transform (X == C) ? X : Y -> (X == C) ? C : Y
- SI.setOperand(1, CmpRHS);
+ replaceOperand(SI, 1, CmpRHS);
Changed = true;
} else if (CmpLHS == FalseVal && Pred == ICmpInst::ICMP_NE) {
// Transform (X != C) ? Y : X -> (X != C) ? Y : C
- SI.setOperand(2, CmpRHS);
+ replaceOperand(SI, 2, CmpRHS);
Changed = true;
}
}
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index e3deafa49bd9..5e7e08eaa997 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -216,7 +216,7 @@ static cl::opt<bool> ClInstrumentWrites(
cl::Hidden, cl::init(true));
static cl::opt<bool>
- ClUseStackSafety("asan-use-stack-safety", cl::Hidden, cl::init(false),
+ ClUseStackSafety("asan-use-stack-safety", cl::Hidden, cl::init(true),
cl::Hidden, cl::desc("Use Stack Safety analysis results"),
cl::Optional);
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index 6b95c7028d93..c20fc942eaf0 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -617,9 +617,7 @@ void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
std::vector<uint8_t> Indexes;
JamCRC JC;
for (auto &BB : F) {
- const Instruction *TI = BB.getTerminator();
- for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
- BasicBlock *Succ = TI->getSuccessor(I);
+ for (BasicBlock *Succ : successors(&BB)) {
auto BI = findBBInfo(Succ);
if (BI == nullptr)
continue;
@@ -658,10 +656,10 @@ void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
<< " CRC = " << JC.getCRC()
<< ", Selects = " << SIVisitor.getNumOfSelectInsts()
<< ", Edges = " << MST.numEdges() << ", ICSites = "
- << ValueSites[IPVK_IndirectCallTarget].size());
- LLVM_DEBUG(dbgs() << ", Memops = " << ValueSites[IPVK_MemOPSize].size()
- << ", High32 CRC = " << JCH.getCRC());
- LLVM_DEBUG(dbgs() << ", Hash = " << FunctionHash << "\n";);
+ << ValueSites[IPVK_IndirectCallTarget].size()
+ << ", Memops = " << ValueSites[IPVK_MemOPSize].size()
+ << ", High32 CRC = " << JCH.getCRC()
+ << ", Hash = " << FunctionHash << "\n";);
if (PGOTraceFuncHash != "-" && F.getName().contains(PGOTraceFuncHash))
dbgs() << "Funcname=" << F.getName() << ", Hash=" << FunctionHash
diff --git a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
index eef94636578d..533cefaf1061 100644
--- a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
@@ -207,6 +207,12 @@ struct FlattenInfo {
match(MatchedMul, m_c_Mul(m_Trunc(m_Specific(OuterInductionPHI)),
m_Value(MatchedItCount)));
+ // Matches the pattern ptr+i*M+j, with the two additions being done via GEP.
+ bool IsGEP = match(U, m_GEP(m_GEP(m_Value(), m_Value(MatchedMul)),
+ m_Specific(InnerInductionPHI))) &&
+ match(MatchedMul, m_c_Mul(m_Specific(OuterInductionPHI),
+ m_Value(MatchedItCount)));
+
if (!MatchedItCount)
return false;
@@ -224,7 +230,7 @@ struct FlattenInfo {
// Look through extends if the IV has been widened. Don't look through
// extends if we already looked through a trunc.
- if (Widened && IsAdd &&
+ if (Widened && (IsAdd || IsGEP) &&
(isa<SExtInst>(MatchedItCount) || isa<ZExtInst>(MatchedItCount))) {
assert(MatchedItCount->getType() == InnerInductionPHI->getType() &&
"Unexpected type mismatch in types after widening");
@@ -236,7 +242,7 @@ struct FlattenInfo {
LLVM_DEBUG(dbgs() << "Looking for inner trip count: ";
InnerTripCount->dump());
- if ((IsAdd || IsAddTrunc) && MatchedItCount == InnerTripCount) {
+ if ((IsAdd || IsAddTrunc || IsGEP) && MatchedItCount == InnerTripCount) {
LLVM_DEBUG(dbgs() << "Found. This sse is optimisable\n");
ValidOuterPHIUses.insert(MatchedMul);
LinearIVUses.insert(U);
@@ -646,33 +652,40 @@ static OverflowResult checkOverflow(FlattenInfo &FI, DominatorTree *DT,
if (OR != OverflowResult::MayOverflow)
return OR;
- for (Value *V : FI.LinearIVUses) {
- for (Value *U : V->users()) {
- if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
- for (Value *GEPUser : U->users()) {
- auto *GEPUserInst = cast<Instruction>(GEPUser);
- if (!isa<LoadInst>(GEPUserInst) &&
- !(isa<StoreInst>(GEPUserInst) &&
- GEP == GEPUserInst->getOperand(1)))
- continue;
- if (!isGuaranteedToExecuteForEveryIteration(GEPUserInst,
- FI.InnerLoop))
- continue;
- // The IV is used as the operand of a GEP which dominates the loop
- // latch, and the IV is at least as wide as the address space of the
- // GEP. In this case, the GEP would wrap around the address space
- // before the IV increment wraps, which would be UB.
- if (GEP->isInBounds() &&
- V->getType()->getIntegerBitWidth() >=
- DL.getPointerTypeSizeInBits(GEP->getType())) {
- LLVM_DEBUG(
- dbgs() << "use of linear IV would be UB if overflow occurred: ";
- GEP->dump());
- return OverflowResult::NeverOverflows;
- }
- }
+ auto CheckGEP = [&](GetElementPtrInst *GEP, Value *GEPOperand) {
+ for (Value *GEPUser : GEP->users()) {
+ auto *GEPUserInst = cast<Instruction>(GEPUser);
+ if (!isa<LoadInst>(GEPUserInst) &&
+ !(isa<StoreInst>(GEPUserInst) && GEP == GEPUserInst->getOperand(1)))
+ continue;
+ if (!isGuaranteedToExecuteForEveryIteration(GEPUserInst, FI.InnerLoop))
+ continue;
+ // The IV is used as the operand of a GEP which dominates the loop
+ // latch, and the IV is at least as wide as the address space of the
+ // GEP. In this case, the GEP would wrap around the address space
+ // before the IV increment wraps, which would be UB.
+ if (GEP->isInBounds() &&
+ GEPOperand->getType()->getIntegerBitWidth() >=
+ DL.getPointerTypeSizeInBits(GEP->getType())) {
+ LLVM_DEBUG(
+ dbgs() << "use of linear IV would be UB if overflow occurred: ";
+ GEP->dump());
+ return true;
}
}
+ return false;
+ };
+
+ // Check if any IV user is, or is used by, a GEP that would cause UB if the
+ // multiply overflows.
+ for (Value *V : FI.LinearIVUses) {
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(V))
+ if (GEP->getNumIndices() == 1 && CheckGEP(GEP, GEP->getOperand(1)))
+ return OverflowResult::NeverOverflows;
+ for (Value *U : V->users())
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(U))
+ if (CheckGEP(GEP, V))
+ return OverflowResult::NeverOverflows;
}
return OverflowResult::MayOverflow;
@@ -778,6 +791,18 @@ static bool DoFlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
OuterValue = Builder.CreateTrunc(FI.OuterInductionPHI, V->getType(),
"flatten.trunciv");
+ if (auto *GEP = dyn_cast<GetElementPtrInst>(V)) {
+ // Replace the GEP with one that uses OuterValue as the offset.
+ auto *InnerGEP = cast<GetElementPtrInst>(GEP->getOperand(0));
+ Value *Base = InnerGEP->getOperand(0);
+ // When the base of the GEP doesn't dominate the outer induction phi then
+ // we need to insert the new GEP where the old GEP was.
+ if (!DT->dominates(Base, &*Builder.GetInsertPoint()))
+ Builder.SetInsertPoint(cast<Instruction>(V));
+ OuterValue = Builder.CreateGEP(GEP->getSourceElementType(), Base,
+ OuterValue, "flatten." + V->getName());
+ }
+
LLVM_DEBUG(dbgs() << "Replacing: "; V->dump(); dbgs() << "with: ";
OuterValue->dump());
V->replaceAllUsesWith(OuterValue);
diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index 3f02441b74ba..b98f823ab00b 100644
--- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -1975,19 +1975,10 @@ insertRelocationStores(iterator_range<Value::user_iterator> GCRelocs,
assert(AllocaMap.count(OriginalValue));
Value *Alloca = AllocaMap[OriginalValue];
- // Emit store into the related alloca
- // All gc_relocates are i8 addrspace(1)* typed, and it must be bitcasted to
- // the correct type according to alloca.
+ // Emit store into the related alloca.
assert(Relocate->getNextNode() &&
"Should always have one since it's not a terminator");
- IRBuilder<> Builder(Relocate->getNextNode());
- Value *CastedRelocatedValue =
- Builder.CreateBitCast(Relocate,
- cast<AllocaInst>(Alloca)->getAllocatedType(),
- suffixed_name_or(Relocate, ".casted", ""));
-
- new StoreInst(CastedRelocatedValue, Alloca,
- cast<Instruction>(CastedRelocatedValue)->getNextNode());
+ new StoreInst(Relocate, Alloca, Relocate->getNextNode());
#ifndef NDEBUG
VisitedLiveValues.insert(OriginalValue);
@@ -2620,13 +2611,9 @@ static bool inlineGetBaseAndOffset(Function &F,
Value *Base =
findBasePointer(Callsite->getOperand(0), DVCache, KnownBases);
assert(!DVCache.count(Callsite));
- auto *BaseBC = IRBuilder<>(Callsite).CreateBitCast(
- Base, Callsite->getType(), suffixed_name_or(Base, ".cast", ""));
- if (BaseBC != Base)
- DVCache[BaseBC] = Base;
- Callsite->replaceAllUsesWith(BaseBC);
- if (!BaseBC->hasName())
- BaseBC->takeName(Callsite);
+ Callsite->replaceAllUsesWith(Base);
+ if (!Base->hasName())
+ Base->takeName(Callsite);
Callsite->eraseFromParent();
break;
}
diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index 225dd454068c..d2fed11445e4 100644
--- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -1093,67 +1093,25 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
// => add the offset
//
// %gep2 ; clone of %gep
- // %new.gep = gep %gep2, <offset / sizeof(*%gep)>
+ // %new.gep = gep i8, %gep2, %offset
// %gep ; will be removed
// ... %gep ...
//
// => replace all uses of %gep with %new.gep and remove %gep
//
// %gep2 ; clone of %gep
- // %new.gep = gep %gep2, <offset / sizeof(*%gep)>
- // ... %new.gep ...
- //
- // If AccumulativeByteOffset is not a multiple of sizeof(*%gep), we emit an
- // uglygep (http://llvm.org/docs/GetElementPtr.html#what-s-an-uglygep):
- // bitcast %gep2 to i8*, add the offset, and bitcast the result back to the
- // type of %gep.
- //
- // %gep2 ; clone of %gep
- // %0 = bitcast %gep2 to i8*
- // %uglygep = gep %0, <offset>
- // %new.gep = bitcast %uglygep to <type of %gep>
+ // %new.gep = gep i8, %gep2, %offset
// ... %new.gep ...
Instruction *NewGEP = GEP->clone();
NewGEP->insertBefore(GEP);
- // Per ANSI C standard, signed / unsigned = unsigned and signed % unsigned =
- // unsigned.. Therefore, we cast ElementTypeSizeOfGEP to signed because it is
- // used with unsigned integers later.
- int64_t ElementTypeSizeOfGEP = static_cast<int64_t>(
- DL->getTypeAllocSize(GEP->getResultElementType()));
Type *PtrIdxTy = DL->getIndexType(GEP->getType());
- if (AccumulativeByteOffset % ElementTypeSizeOfGEP == 0) {
- // Very likely. As long as %gep is naturally aligned, the byte offset we
- // extracted should be a multiple of sizeof(*%gep).
- int64_t Index = AccumulativeByteOffset / ElementTypeSizeOfGEP;
- NewGEP = GetElementPtrInst::Create(GEP->getResultElementType(), NewGEP,
- ConstantInt::get(PtrIdxTy, Index, true),
- GEP->getName(), GEP);
- NewGEP->copyMetadata(*GEP);
- // Inherit the inbounds attribute of the original GEP.
- cast<GetElementPtrInst>(NewGEP)->setIsInBounds(GEPWasInBounds);
- } else {
- // Unlikely but possible. For example,
- // #pragma pack(1)
- // struct S {
- // int a[3];
- // int64 b[8];
- // };
- // #pragma pack()
- //
- // Suppose the gep before extraction is &s[i + 1].b[j + 3]. After
- // extraction, it becomes &s[i].b[j] and AccumulativeByteOffset is
- // sizeof(S) + 3 * sizeof(int64) = 100, which is not a multiple of
- // sizeof(int64).
- //
- // Emit an uglygep in this case.
- IRBuilder<> Builder(GEP);
- NewGEP = cast<Instruction>(Builder.CreateGEP(
- Builder.getInt8Ty(), NewGEP,
- {ConstantInt::get(PtrIdxTy, AccumulativeByteOffset, true)}, "uglygep",
- GEPWasInBounds));
- NewGEP->copyMetadata(*GEP);
- }
+ IRBuilder<> Builder(GEP);
+ NewGEP = cast<Instruction>(Builder.CreateGEP(
+ Builder.getInt8Ty(), NewGEP,
+ {ConstantInt::get(PtrIdxTy, AccumulativeByteOffset, true)},
+ GEP->getName(), GEPWasInBounds));
+ NewGEP->copyMetadata(*GEP);
GEP->replaceAllUsesWith(NewGEP);
GEP->eraseFromParent();
diff --git a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
index ca1f3a0c0ae3..2cce6eb22341 100644
--- a/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
@@ -233,13 +233,9 @@ private:
void factorArrayIndex(Value *ArrayIdx, const SCEV *Base, uint64_t ElementSize,
GetElementPtrInst *GEP);
- // Emit code that computes the "bump" from Basis to C. If the candidate is a
- // GEP and the bump is not divisible by the element size of the GEP, this
- // function sets the BumpWithUglyGEP flag to notify its caller to bump the
- // basis using an ugly GEP.
+ // Emit code that computes the "bump" from Basis to C.
static Value *emitBump(const Candidate &Basis, const Candidate &C,
- IRBuilder<> &Builder, const DataLayout *DL,
- bool &BumpWithUglyGEP);
+ IRBuilder<> &Builder, const DataLayout *DL);
const DataLayout *DL = nullptr;
DominatorTree *DT = nullptr;
@@ -581,26 +577,11 @@ static void unifyBitWidth(APInt &A, APInt &B) {
Value *StraightLineStrengthReduce::emitBump(const Candidate &Basis,
const Candidate &C,
IRBuilder<> &Builder,
- const DataLayout *DL,
- bool &BumpWithUglyGEP) {
+ const DataLayout *DL) {
APInt Idx = C.Index->getValue(), BasisIdx = Basis.Index->getValue();
unifyBitWidth(Idx, BasisIdx);
APInt IndexOffset = Idx - BasisIdx;
- BumpWithUglyGEP = false;
- if (Basis.CandidateKind == Candidate::GEP) {
- APInt ElementSize(
- IndexOffset.getBitWidth(),
- DL->getTypeAllocSize(
- cast<GetElementPtrInst>(Basis.Ins)->getResultElementType()));
- APInt Q, R;
- APInt::sdivrem(IndexOffset, ElementSize, Q, R);
- if (R == 0)
- IndexOffset = Q;
- else
- BumpWithUglyGEP = true;
- }
-
// Compute Bump = C - Basis = (i' - i) * S.
// Common case 1: if (i' - i) is 1, Bump = S.
if (IndexOffset == 1)
@@ -645,8 +626,7 @@ void StraightLineStrengthReduce::rewriteCandidateWithBasis(
return;
IRBuilder<> Builder(C.Ins);
- bool BumpWithUglyGEP;
- Value *Bump = emitBump(Basis, C, Builder, DL, BumpWithUglyGEP);
+ Value *Bump = emitBump(Basis, C, Builder, DL);
Value *Reduced = nullptr; // equivalent to but weaker than C.Ins
switch (C.CandidateKind) {
case Candidate::Add:
@@ -673,28 +653,13 @@ void StraightLineStrengthReduce::rewriteCandidateWithBasis(
}
break;
}
- case Candidate::GEP:
- {
- Type *OffsetTy = DL->getIndexType(C.Ins->getType());
+ case Candidate::GEP: {
bool InBounds = cast<GetElementPtrInst>(C.Ins)->isInBounds();
- if (BumpWithUglyGEP) {
- // C = (char *)Basis + Bump
- unsigned AS = Basis.Ins->getType()->getPointerAddressSpace();
- Type *CharTy = PointerType::get(Basis.Ins->getContext(), AS);
- Reduced = Builder.CreateBitCast(Basis.Ins, CharTy);
- Reduced =
- Builder.CreateGEP(Builder.getInt8Ty(), Reduced, Bump, "", InBounds);
- Reduced = Builder.CreateBitCast(Reduced, C.Ins->getType());
- } else {
- // C = gep Basis, Bump
- // Canonicalize bump to pointer size.
- Bump = Builder.CreateSExtOrTrunc(Bump, OffsetTy);
- Reduced = Builder.CreateGEP(
- cast<GetElementPtrInst>(Basis.Ins)->getResultElementType(), Basis.Ins,
- Bump, "", InBounds);
- }
- break;
- }
+ // C = (char *)Basis + Bump
+ Reduced =
+ Builder.CreateGEP(Builder.getInt8Ty(), Basis.Ins, Bump, "", InBounds);
+ break;
+ }
default:
llvm_unreachable("C.CandidateKind is invalid");
};
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index c76cc9db16d7..b9cad764aaef 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -3905,7 +3905,8 @@ bool llvm::recognizeBSwapOrBitReverseIdiom(
SmallVectorImpl<Instruction *> &InsertedInsts) {
if (!match(I, m_Or(m_Value(), m_Value())) &&
!match(I, m_FShl(m_Value(), m_Value(), m_Value())) &&
- !match(I, m_FShr(m_Value(), m_Value(), m_Value())))
+ !match(I, m_FShr(m_Value(), m_Value(), m_Value())) &&
+ !match(I, m_BSwap(m_Value())))
return false;
if (!MatchBSwaps && !MatchBitReversals)
return false;
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 61d891d65346..7515e539e7fb 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -6919,18 +6919,17 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
Builder.SetInsertPoint(SI);
- auto *ShiftC = ConstantInt::get(Ty, Shift);
- auto *Sub = Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base));
- auto *LShr = Builder.CreateLShr(Sub, ShiftC);
- auto *Shl = Builder.CreateShl(Sub, Ty->getBitWidth() - Shift);
- auto *Rot = Builder.CreateOr(LShr, Shl);
+ Value *Sub =
+ Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base));
+ Value *Rot = Builder.CreateIntrinsic(
+ Ty, Intrinsic::fshl,
+ {Sub, Sub, ConstantInt::get(Ty, Ty->getBitWidth() - Shift)});
SI->replaceUsesOfWith(SI->getCondition(), Rot);
for (auto Case : SI->cases()) {
auto *Orig = Case.getCaseValue();
auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base);
- Case.setValue(
- cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(ShiftC->getValue()))));
+ Case.setValue(cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(Shift))));
}
return true;
}
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 51ce88480c08..9743fa0e7402 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5004,9 +5004,8 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor(
VectorizationFactor Candidate(i, C.first, ScalarCost.ScalarCost);
#ifndef NDEBUG
- unsigned AssumedMinimumVscale = 1;
- if (std::optional<unsigned> VScale = getVScaleForTuning(OrigLoop, TTI))
- AssumedMinimumVscale = *VScale;
+ unsigned AssumedMinimumVscale =
+ getVScaleForTuning(OrigLoop, TTI).value_or(1);
unsigned Width =
Candidate.Width.isScalable()
? Candidate.Width.getKnownMinValue() * AssumedMinimumVscale
@@ -8031,6 +8030,7 @@ void VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlan &Plan) {
VPValue *EdgeMask = createEdgeMask(Predecessor, BB, Plan);
if (!EdgeMask) { // Mask of predecessor is all-one so mask of block is too.
BlockMaskCache[BB] = EdgeMask;
+ return;
}
if (!BlockMask) { // BlockMask has its initialized nullptr value.
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 8e22b54f002d..055fbb00871f 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6894,6 +6894,31 @@ protected:
};
} // namespace
+/// Returns the cost of the shuffle instructions with the given \p Kind, vector
+/// type \p Tp and optional \p Mask. Adds SLP-specifc cost estimation for insert
+/// subvector pattern.
+static InstructionCost
+getShuffleCost(const TargetTransformInfo &TTI, TTI::ShuffleKind Kind,
+ VectorType *Tp, ArrayRef<int> Mask = std::nullopt,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
+ int Index = 0, VectorType *SubTp = nullptr,
+ ArrayRef<const Value *> Args = std::nullopt) {
+ if (Kind != TTI::SK_PermuteTwoSrc)
+ return TTI.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args);
+ int NumSrcElts = Tp->getElementCount().getKnownMinValue();
+ int NumSubElts;
+ if (Mask.size() > 2 && ShuffleVectorInst::isInsertSubvectorMask(
+ Mask, NumSrcElts, NumSubElts, Index)) {
+ if (Index + NumSubElts > NumSrcElts &&
+ Index + NumSrcElts <= static_cast<int>(Mask.size()))
+ return TTI.getShuffleCost(
+ TTI::SK_InsertSubvector,
+ FixedVectorType::get(Tp->getElementType(), Mask.size()), std::nullopt,
+ TTI::TCK_RecipThroughput, Index, Tp);
+ }
+ return TTI.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args);
+}
+
/// Merges shuffle masks and emits final shuffle instruction, if required. It
/// supports shuffling of 2 input vectors. It implements lazy shuffles emission,
/// when the actual shuffle instruction is generated only if this is actually
@@ -7141,15 +7166,15 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
std::optional<TTI::ShuffleKind> RegShuffleKind =
CheckPerRegistersShuffle(SubMask);
if (!RegShuffleKind) {
- Cost += TTI.getShuffleCost(
- *ShuffleKinds[Part],
+ Cost += ::getShuffleCost(
+ TTI, *ShuffleKinds[Part],
FixedVectorType::get(VL.front()->getType(), NumElts), MaskSlice);
continue;
}
if (*RegShuffleKind != TTI::SK_PermuteSingleSrc ||
!ShuffleVectorInst::isIdentityMask(SubMask, EltsPerVector)) {
- Cost += TTI.getShuffleCost(
- *RegShuffleKind,
+ Cost += ::getShuffleCost(
+ TTI, *RegShuffleKind,
FixedVectorType::get(VL.front()->getType(), EltsPerVector),
SubMask);
}
@@ -7222,8 +7247,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
cast<VectorType>(V1->getType())->getElementCount().getKnownMinValue();
if (isEmptyOrIdentity(Mask, VF))
return TTI::TCC_Free;
- return TTI.getShuffleCost(TTI::SK_PermuteTwoSrc,
- cast<VectorType>(V1->getType()), Mask);
+ return ::getShuffleCost(TTI, TTI::SK_PermuteTwoSrc,
+ cast<VectorType>(V1->getType()), Mask);
}
InstructionCost createShuffleVector(Value *V1, ArrayRef<int> Mask) const {
// Empty mask or identity mask are free.
@@ -8101,7 +8126,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
for (unsigned I = OffsetEnd + 1 - Offset; I < VecSz; ++I)
Mask[I] =
((I >= InMask.size()) || InMask.test(I)) ? PoisonMaskElem : I;
- Cost += TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, InsertVecTy, Mask);
+ Cost +=
+ ::getShuffleCost(*TTI, TTI::SK_PermuteTwoSrc, InsertVecTy, Mask);
}
}
return Cost;
@@ -8428,8 +8454,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
return I->getOpcode() == E->getAltOpcode();
},
Mask);
- VecCost += TTIRef.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc,
- FinalVecTy, Mask);
+ VecCost += ::getShuffleCost(TTIRef, TargetTransformInfo::SK_PermuteTwoSrc,
+ FinalVecTy, Mask);
// Patterns like [fadd,fsub] can be combined into a single instruction
// in x86. Reordering them into [fsub,fadd] blocks this pattern. So we
// need to take into account their order when looking for the most used
@@ -9133,7 +9159,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
auto *FTy =
FixedVectorType::get(TEs.back()->Scalars.front()->getType(), VF);
InstructionCost C =
- TTI->getShuffleCost(TTI::SK_PermuteTwoSrc, FTy, Mask);
+ ::getShuffleCost(*TTI, TTI::SK_PermuteTwoSrc, FTy, Mask);
LLVM_DEBUG(dbgs() << "SLP: Adding cost " << C
<< " for final shuffle of vector node and external "
"insertelement users.\n";
@@ -11991,8 +12017,12 @@ Value *BoUpSLP::vectorizeTree(
IRBuilder<>::InsertPointGuard Guard(Builder);
if (auto *IVec = dyn_cast<Instruction>(Vec))
Builder.SetInsertPoint(IVec->getNextNonDebugInstruction());
- Vec = Builder.CreateIntCast(Vec, VU->getType(),
- BWIt->second.second);
+ Vec = Builder.CreateIntCast(
+ Vec,
+ FixedVectorType::get(
+ cast<VectorType>(VU->getType())->getElementType(),
+ cast<FixedVectorType>(Vec->getType())->getNumElements()),
+ BWIt->second.second);
VectorCasts.try_emplace(Scalar, Vec);
} else {
Vec = VecIt->second;
@@ -13070,10 +13100,14 @@ bool BoUpSLP::collectValuesToDemote(
if (isa<Constant>(V))
return true;
- // If the value is not a vectorized instruction in the expression with only
- // one use, it cannot be demoted.
+ // If the value is not a vectorized instruction in the expression and not used
+ // by the insertelement instruction and not used in multiple vector nodes, it
+ // cannot be demoted.
auto *I = dyn_cast<Instruction>(V);
- if (!I || !I->hasOneUse() || !getTreeEntry(I) || !Visited.insert(I).second)
+ if (!I || !getTreeEntry(I) || MultiNodeScalars.contains(I) ||
+ !Visited.insert(I).second || all_of(I->users(), [&](User *U) {
+ return isa<InsertElementInst>(U) && !getTreeEntry(U);
+ }))
return false;
unsigned Start = 0;
@@ -13144,11 +13178,6 @@ bool BoUpSLP::collectValuesToDemote(
}
void BoUpSLP::computeMinimumValueSizes() {
- // If there are no external uses, the expression tree must be rooted by a
- // store. We can't demote in-memory values, so there is nothing to do here.
- if (ExternalUses.empty())
- return;
-
// We only attempt to truncate integer expressions.
auto &TreeRoot = VectorizableTree[0]->Scalars;
auto *TreeRootIT = dyn_cast<IntegerType>(TreeRoot[0]->getType());
diff --git a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
index e481f7e38e6a..f88e25ea1d16 100644
--- a/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -1368,7 +1368,7 @@ std::string TreePredicateFn::getCodeToRunOnSDNode() const {
if (immCodeUsesAPFloat())
Result += "cast<ConstantFPSDNode>(Node)->getValueAPF();\n";
else if (immCodeUsesAPInt())
- Result += "cast<ConstantSDNode>(Node)->getAPIntValue();\n";
+ Result += "Node->getAsAPIntVal();\n";
else
Result += "cast<ConstantSDNode>(Node)->getSExtValue();\n";
return Result + ImmCode;
diff --git a/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp b/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp
index 6fd5698e7372..a3e2facf948e 100644
--- a/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp
+++ b/llvm/utils/TableGen/DAGISelMatcherEmitter.cpp
@@ -60,10 +60,8 @@ class MatcherTableEmitter {
// all the patterns with "identical" predicates.
StringMap<TinyPtrVector<TreePattern *>> NodePredicatesByCodeToRun;
- StringMap<unsigned> PatternPredicateMap;
std::vector<std::string> PatternPredicates;
- DenseMap<const ComplexPattern*, unsigned> ComplexPatternMap;
std::vector<const ComplexPattern*> ComplexPatterns;
@@ -84,8 +82,50 @@ class MatcherTableEmitter {
}
public:
- MatcherTableEmitter(const CodeGenDAGPatterns &cgp)
- : CGP(cgp), OpcodeCounts(Matcher::HighestKind + 1, 0) {}
+ MatcherTableEmitter(const Matcher *TheMatcher, const CodeGenDAGPatterns &cgp)
+ : CGP(cgp), OpcodeCounts(Matcher::HighestKind + 1, 0) {
+ // Record the usage of ComplexPattern.
+ DenseMap<const ComplexPattern *, unsigned> ComplexPatternUsage;
+ // Record the usage of PatternPredicate.
+ std::map<StringRef, unsigned> PatternPredicateUsage;
+
+ // Iterate the whole MatcherTable once and do some statistics.
+ std::function<void(const Matcher *)> Statistic = [&](const Matcher *N) {
+ while (N) {
+ if (auto *SM = dyn_cast<ScopeMatcher>(N))
+ for (unsigned I = 0; I < SM->getNumChildren(); I++)
+ Statistic(SM->getChild(I));
+ else if (auto *SOM = dyn_cast<SwitchOpcodeMatcher>(N))
+ for (unsigned I = 0; I < SOM->getNumCases(); I++)
+ Statistic(SOM->getCaseMatcher(I));
+ else if (auto *STM = dyn_cast<SwitchTypeMatcher>(N))
+ for (unsigned I = 0; I < STM->getNumCases(); I++)
+ Statistic(STM->getCaseMatcher(I));
+ else if (auto *CPM = dyn_cast<CheckComplexPatMatcher>(N))
+ ++ComplexPatternUsage[&CPM->getPattern()];
+ else if (auto *CPPM = dyn_cast<CheckPatternPredicateMatcher>(N))
+ ++PatternPredicateUsage[CPPM->getPredicate()];
+ N = N->getNext();
+ }
+ };
+ Statistic(TheMatcher);
+
+ // Sort ComplexPatterns by usage.
+ std::vector<std::pair<const ComplexPattern *, unsigned>> ComplexPatternList(
+ ComplexPatternUsage.begin(), ComplexPatternUsage.end());
+ sort(ComplexPatternList,
+ [](const auto &A, const auto &B) { return A.second > B.second; });
+ for (const auto &ComplexPattern : ComplexPatternList)
+ ComplexPatterns.push_back(ComplexPattern.first);
+
+ // Sort PatternPredicates by usage.
+ std::vector<std::pair<std::string, unsigned>> PatternPredicateList(
+ PatternPredicateUsage.begin(), PatternPredicateUsage.end());
+ sort(PatternPredicateList,
+ [](const auto &A, const auto &B) { return A.second > B.second; });
+ for (const auto &PatternPredicate : PatternPredicateList)
+ PatternPredicates.push_back(PatternPredicate.first);
+ }
unsigned EmitMatcherList(const Matcher *N, const unsigned Indent,
unsigned StartIdx, raw_ostream &OS);
@@ -138,20 +178,10 @@ private:
}
unsigned getPatternPredicate(StringRef PredName) {
- unsigned &Entry = PatternPredicateMap[PredName];
- if (Entry == 0) {
- PatternPredicates.push_back(PredName.str());
- Entry = PatternPredicates.size();
- }
- return Entry-1;
+ return llvm::find(PatternPredicates, PredName) - PatternPredicates.begin();
}
unsigned getComplexPat(const ComplexPattern &P) {
- unsigned &Entry = ComplexPatternMap[&P];
- if (Entry == 0) {
- ComplexPatterns.push_back(&P);
- Entry = ComplexPatterns.size();
- }
- return Entry-1;
+ return llvm::find(ComplexPatterns, &P) - ComplexPatterns.begin();
}
unsigned getNodeXFormID(Record *Rec) {
@@ -486,13 +516,15 @@ EmitMatcher(const Matcher *N, const unsigned Indent, unsigned CurrentIdx,
StringRef Pred = cast<CheckPatternPredicateMatcher>(N)->getPredicate();
unsigned PredNo = getPatternPredicate(Pred);
if (PredNo > 255)
- OS << "OPC_CheckPatternPredicate2, TARGET_VAL(" << PredNo << "),";
+ OS << "OPC_CheckPatternPredicateTwoByte, TARGET_VAL(" << PredNo << "),";
+ else if (PredNo < 8)
+ OS << "OPC_CheckPatternPredicate" << PredNo << ',';
else
OS << "OPC_CheckPatternPredicate, " << PredNo << ',';
if (!OmitComments)
OS << " // " << Pred;
OS << '\n';
- return 2 + (PredNo > 255);
+ return 2 + (PredNo > 255) - (PredNo < 8);
}
case Matcher::CheckPredicate: {
TreePredicateFn Pred = cast<CheckPredicateMatcher>(N)->getPredicate();
@@ -652,8 +684,13 @@ EmitMatcher(const Matcher *N, const unsigned Indent, unsigned CurrentIdx,
case Matcher::CheckComplexPat: {
const CheckComplexPatMatcher *CCPM = cast<CheckComplexPatMatcher>(N);
const ComplexPattern &Pattern = CCPM->getPattern();
- OS << "OPC_CheckComplexPat, /*CP*/" << getComplexPat(Pattern) << ", /*#*/"
- << CCPM->getMatchNumber() << ',';
+ unsigned PatternNo = getComplexPat(Pattern);
+ if (PatternNo < 8)
+ OS << "OPC_CheckComplexPat" << PatternNo << ", /*#*/"
+ << CCPM->getMatchNumber() << ',';
+ else
+ OS << "OPC_CheckComplexPat, /*CP*/" << PatternNo << ", /*#*/"
+ << CCPM->getMatchNumber() << ',';
if (!OmitComments) {
OS << " // " << Pattern.getSelectFunc();
@@ -665,7 +702,7 @@ EmitMatcher(const Matcher *N, const unsigned Indent, unsigned CurrentIdx,
OS << " + chain result";
}
OS << '\n';
- return 3;
+ return PatternNo < 8 ? 2 : 3;
}
case Matcher::CheckAndImm: {
@@ -1267,7 +1304,7 @@ void llvm::EmitMatcherTable(Matcher *TheMatcher,
OS << "#endif\n\n";
BeginEmitFunction(OS, "void", "SelectCode(SDNode *N)", false/*AddOverride*/);
- MatcherTableEmitter MatcherEmitter(CGP);
+ MatcherTableEmitter MatcherEmitter(TheMatcher, CGP);
// First we size all the children of the three kinds of matchers that have
// them. This is done by sharing the code in EmitMatcher(). but we don't
diff --git a/llvm/utils/TableGen/ExegesisEmitter.cpp b/llvm/utils/TableGen/ExegesisEmitter.cpp
index 736f1220be14..d48c7f3a480f 100644
--- a/llvm/utils/TableGen/ExegesisEmitter.cpp
+++ b/llvm/utils/TableGen/ExegesisEmitter.cpp
@@ -81,6 +81,11 @@ collectPfmCounters(const RecordKeeper &Records) {
"duplicate ResourceName " + ResourceName);
AddPfmCounterName(IssueCounter);
}
+
+ for (const Record *ValidationCounter :
+ Def->getValueAsListOfDefs("ValidationCounters"))
+ AddPfmCounterName(ValidationCounter);
+
AddPfmCounterName(Def->getValueAsDef("CycleCounter"));
AddPfmCounterName(Def->getValueAsDef("UopsCounter"));
}
@@ -100,6 +105,17 @@ ExegesisEmitter::ExegesisEmitter(RecordKeeper &RK)
Target = std::string(Targets[0]->getName());
}
+struct ValidationCounterInfo {
+ int64_t EventNumber;
+ StringRef EventName;
+ unsigned PfmCounterID;
+};
+
+bool EventNumberLess(const ValidationCounterInfo &LHS,
+ const ValidationCounterInfo &RHS) {
+ return LHS.EventNumber < RHS.EventNumber;
+}
+
void ExegesisEmitter::emitPfmCountersInfo(const Record &Def,
unsigned &IssueCountersTableOffset,
raw_ostream &OS) const {
@@ -109,6 +125,31 @@ void ExegesisEmitter::emitPfmCountersInfo(const Record &Def,
Def.getValueAsDef("UopsCounter")->getValueAsString("Counter");
const size_t NumIssueCounters =
Def.getValueAsListOfDefs("IssueCounters").size();
+ const size_t NumValidationCounters =
+ Def.getValueAsListOfDefs("ValidationCounters").size();
+
+ // Emit Validation Counters Array
+ if (NumValidationCounters != 0) {
+ std::vector<ValidationCounterInfo> ValidationCounters;
+ ValidationCounters.reserve(NumValidationCounters);
+ for (const Record *ValidationCounter :
+ Def.getValueAsListOfDefs("ValidationCounters")) {
+ ValidationCounters.push_back(
+ {ValidationCounter->getValueAsDef("EventType")
+ ->getValueAsInt("EventNumber"),
+ ValidationCounter->getValueAsDef("EventType")->getName(),
+ getPfmCounterId(ValidationCounter->getValueAsString("Counter"))});
+ }
+ std::sort(ValidationCounters.begin(), ValidationCounters.end(),
+ EventNumberLess);
+ OS << "\nstatic const std::pair<ValidationEvent, const char*> " << Target
+ << Def.getName() << "ValidationCounters[] = {\n";
+ for (const ValidationCounterInfo &VCI : ValidationCounters) {
+ OS << " { " << VCI.EventName << ", " << Target << "PfmCounterNames["
+ << VCI.PfmCounterID << "]},\n";
+ }
+ OS << "};\n";
+ }
OS << "\nstatic const PfmCountersInfo " << Target << Def.getName()
<< " = {\n";
@@ -129,10 +170,17 @@ void ExegesisEmitter::emitPfmCountersInfo(const Record &Def,
// Issue Counters
if (NumIssueCounters == 0)
- OS << " nullptr, // No issue counters.\n 0\n";
+ OS << " nullptr, 0, // No issue counters\n";
else
OS << " " << Target << "PfmIssueCounters + " << IssueCountersTableOffset
- << ", " << NumIssueCounters << " // Issue counters.\n";
+ << ", " << NumIssueCounters << ", // Issue counters.\n";
+
+ // Validation Counters
+ if (NumValidationCounters == 0)
+ OS << " nullptr, 0 // No validation counters.\n";
+ else
+ OS << " " << Target << Def.getName() << "ValidationCounters, "
+ << NumValidationCounters << " // Validation counters.\n";
OS << "};\n";
IssueCountersTableOffset += NumIssueCounters;
diff --git a/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp b/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp
index 348b3b3e0898..c092772386ec 100644
--- a/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp
+++ b/llvm/utils/TableGen/GlobalISelCombinerEmitter.cpp
@@ -2318,7 +2318,7 @@ bool CombineRuleBuilder::emitInstructionApplyPattern(
M.actions_begin(), getLLTCodeGenOrTempType(Ty, M), TempRegID);
}
- DstMI.addRenderer<TempRegRenderer>(TempRegID);
+ DstMI.addRenderer<TempRegRenderer>(TempRegID, /*IsDef=*/true);
}
// Render MIFlags